From a727783abdfa4542401bcfb2dec4ec44db6ca7b3 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Wed, 21 Jun 2023 16:51:41 -0700 Subject: [PATCH 001/253] chore: remove 'dummy_root' in test samples (#9640) * remove 'dummy_root' in test samples * more cleanup --- .../server/test/xgboost_sample_pipeline.yaml | 2 +- ...ight_python_functions_v2_pipeline_rev.yaml | 2 +- .../pipeline_with_various_io_types.yaml | 2 +- .../v2/pipeline/xgboost_sample_pipeline.yaml | 2 +- ...ight_python_functions_v2_pipeline_rev.yaml | 2 +- .../data/test/xgboost_sample_pipeline.yaml | 2 +- frontend/src/pages/NewRunV2.test.tsx | 2 +- samples/test/after.py | 4 +-- samples/v2/pipeline_with_env.py | 5 ++-- sdk/python/kfp/compiler/compiler_test.py | 10 +++---- .../component_with_optional_inputs.yaml | 4 +-- .../component_with_pip_index_urls.yaml | 4 +-- .../components_with_optional_artifacts.yaml | 6 ++-- .../container_component_with_no_inputs.yaml | 2 +- .../lightweight_python_functions_pipeline.py | 2 +- ...lightweight_python_functions_pipeline.yaml | 7 ++--- ...ghtweight_python_functions_with_outputs.py | 2 +- ...tweight_python_functions_with_outputs.yaml | 11 ++++---- .../pipelines/pipeline_as_exit_task.yaml | 10 +++---- .../pipelines/pipeline_in_pipeline.yaml | 6 ++-- .../pipeline_in_pipeline_complex.yaml | 6 ++-- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 8 +++--- .../pipelines/pipeline_with_after.py | 2 +- .../pipelines/pipeline_with_after.yaml | 3 +- .../pipeline_with_concat_placeholder.py | 4 +-- .../pipeline_with_concat_placeholder.yaml | 3 +- .../pipelines/pipeline_with_condition.py | 2 +- .../pipelines/pipeline_with_condition.yaml | 13 ++++----- ...pipeline_with_dynamic_importer_metadata.py | 2 +- ...peline_with_dynamic_importer_metadata.yaml | 5 ++-- .../test_data/pipelines/pipeline_with_env.py | 2 +- .../pipelines/pipeline_with_env.yaml | 5 ++-- .../pipelines/pipeline_with_exit_handler.yaml | 8 +++--- .../pipeline_with_google_artifact_type.yaml | 6 ++-- .../pipelines/pipeline_with_if_placeholder.py | 3 +- .../pipeline_with_if_placeholder.yaml | 3 +- .../pipelines/pipeline_with_importer.py | 2 +- .../pipelines/pipeline_with_importer.yaml | 7 ++--- .../pipeline_with_importer_and_gcpc_types.py | 3 +- ...pipeline_with_importer_and_gcpc_types.yaml | 3 +- .../pipelines/pipeline_with_loops.yaml | 18 ++++++------ .../pipeline_with_loops_and_conditions.yaml | 28 +++++++++---------- .../pipeline_with_metadata_fields.yaml | 6 ++-- .../pipeline_with_metrics_outputs.py | 2 +- .../pipeline_with_metrics_outputs.yaml | 7 ++--- .../pipeline_with_multiple_exit_handlers.yaml | 16 +++++------ .../pipeline_with_nested_conditions.yaml | 18 ++++++------ .../pipeline_with_nested_conditions_yaml.py | 1 - .../pipeline_with_nested_conditions_yaml.yaml | 3 +- .../pipelines/pipeline_with_nested_loops.yaml | 10 +++---- .../pipelines/pipeline_with_ontology.py | 1 - .../pipelines/pipeline_with_ontology.yaml | 3 +- .../pipelines/pipeline_with_outputs.yaml | 6 ++-- ...pipeline_with_parallelfor_parallelism.yaml | 14 +++++----- ...ipeline_with_params_containing_format.yaml | 8 +++--- .../pipelines/pipeline_with_placeholders.yaml | 12 ++++---- .../pipelines/pipeline_with_resource_spec.py | 1 - .../pipeline_with_resource_spec.yaml | 3 +- .../pipelines/pipeline_with_retry.yaml | 4 +-- .../pipeline_with_reused_component.py | 2 +- .../pipeline_with_reused_component.yaml | 3 +- .../pipeline_with_task_final_status.yaml | 8 +++--- .../pipeline_with_task_final_status_yaml.yaml | 2 +- ...th_task_using_ignore_upstream_failure.yaml | 6 ++-- .../pipeline_with_various_io_types.py | 2 +- .../pipeline_with_various_io_types.yaml | 3 +- .../test_data/pipelines/two_step_pipeline.py | 2 +- .../pipelines/two_step_pipeline.yaml | 3 +- .../two_step_pipeline_containerized.yaml | 2 +- .../pipelines/xgboost_sample_pipeline.py | 2 +- .../pipelines/xgboost_sample_pipeline.yaml | 3 +- 71 files changed, 174 insertions(+), 202 deletions(-) diff --git a/backend/src/apiserver/server/test/xgboost_sample_pipeline.yaml b/backend/src/apiserver/server/test/xgboost_sample_pipeline.yaml index b9ba567619..e7d2be1cc3 100644 --- a/backend/src/apiserver/server/test/xgboost_sample_pipeline.yaml +++ b/backend/src/apiserver/server/test/xgboost_sample_pipeline.yaml @@ -237,7 +237,7 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root +defaultPipelineRoot: minio://dummy_root deploymentSpec: executors: exec-chicago-taxi-trips-dataset: diff --git a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml index 3d074610df..886f6141c4 100644 --- a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml +++ b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml @@ -233,4 +233,4 @@ root: enableCache: true componentRef: name: comp-preprocess -defaultPipelineRoot: dummy_root +defaultPipelineRoot: minio://dummy_root diff --git a/frontend/mock-backend/data/v2/pipeline/pipeline_with_various_io_types.yaml b/frontend/mock-backend/data/v2/pipeline/pipeline_with_various_io_types.yaml index ddaee4c210..8ede2e2f9b 100644 --- a/frontend/mock-backend/data/v2/pipeline/pipeline_with_various_io_types.yaml +++ b/frontend/mock-backend/data/v2/pipeline/pipeline_with_various_io_types.yaml @@ -87,7 +87,7 @@ components: parameters: output_1: parameterType: NUMBER_INTEGER -defaultPipelineRoot: dummy_root +defaultPipelineRoot: minio://dummy_root deploymentSpec: executors: exec-downstream: diff --git a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml index 982676b2de..2c3dbf0be6 100644 --- a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml +++ b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml @@ -198,7 +198,7 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root +defaultPipelineRoot: mino://dummy_root deploymentSpec: executors: exec-chicago-taxi-trips-dataset: diff --git a/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml b/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml index 3e569eb2c5..5e62958d6e 100644 --- a/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml +++ b/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml @@ -233,4 +233,4 @@ root: enableCache: true componentRef: name: comp-preprocess -defaultPipelineRoot: dummy_root +defaultPipelineRoot: minio://dummy_root diff --git a/frontend/src/data/test/xgboost_sample_pipeline.yaml b/frontend/src/data/test/xgboost_sample_pipeline.yaml index 982676b2de..bd9c668a10 100644 --- a/frontend/src/data/test/xgboost_sample_pipeline.yaml +++ b/frontend/src/data/test/xgboost_sample_pipeline.yaml @@ -198,7 +198,7 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root +defaultPipelineRoot: minio://dummy_root deploymentSpec: executors: exec-chicago-taxi-trips-dataset: diff --git a/frontend/src/pages/NewRunV2.test.tsx b/frontend/src/pages/NewRunV2.test.tsx index 1354d0ac25..906b625554 100644 --- a/frontend/src/pages/NewRunV2.test.tsx +++ b/frontend/src/pages/NewRunV2.test.tsx @@ -554,7 +554,7 @@ describe('NewRunV2', () => { pipeline_id: ORIGINAL_TEST_PIPELINE_ID, pipeline_version_id: ORIGINAL_TEST_PIPELINE_VERSION_ID, }, - runtime_config: { parameters: {}, pipeline_root: 'dummy_root' }, + runtime_config: { parameters: {}, pipeline_root: 'minio://dummy_root' }, service_account: '', }), ); diff --git a/samples/test/after.py b/samples/test/after.py index 5c036c42b8..5a776a3910 100644 --- a/samples/test/after.py +++ b/samples/test/after.py @@ -42,6 +42,4 @@ def my_pipeline(): if __name__ == '__main__': compiler.Compiler().compile( - pipeline_func=my_pipeline, - pipeline_root='dummy_root', - output_path=__file__ + '.json') + pipeline_func=my_pipeline, output_path=__file__ + '.json') diff --git a/samples/v2/pipeline_with_env.py b/samples/v2/pipeline_with_env.py index c790615758..6204094470 100644 --- a/samples/v2/pipeline_with_env.py +++ b/samples/v2/pipeline_with_env.py @@ -47,7 +47,7 @@ def print_env_op(): """) -@dsl.pipeline(name='pipeline-with-env', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-env') def pipeline_with_env(): print_env_op().set_env_variable(name='ENV1', value='val1') print_env_2_op().set_env_variable( @@ -57,5 +57,4 @@ def pipeline_with_env(): if __name__ == '__main__': compiler.Compiler().compile( - pipeline_func=pipeline_with_env, - package_path='pipeline_with_env.yaml') \ No newline at end of file + pipeline_func=pipeline_with_env, package_path='pipeline_with_env.yaml') diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index a9d5f97c16..7071ca7688 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -192,7 +192,7 @@ def test_compile_pipeline_with_misused_inputvalue_should_raise_error(self): ' type "system.Model@0.0.1" cannot be paired with InputValuePlaceholder.' ): - @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') + @dsl.pipeline(name='test-pipeline') def my_pipeline(): downstream_op(model=upstream_op().output) @@ -213,7 +213,7 @@ def test_compile_pipeline_with_misused_inputpath_should_raise_error(self): TypeError, ' type "String" cannot be paired with InputPathPlaceholder.'): - @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') + @dsl.pipeline(name='test-pipeline') def my_pipeline(text: str): component_op(text=text) @@ -222,7 +222,7 @@ def test_compile_pipeline_with_missing_task_should_raise_error(self): with self.assertRaisesRegex(ValueError, 'Task is missing from pipeline.'): - @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') + @dsl.pipeline(name='test-pipeline') def my_pipeline(text: str): pass @@ -243,7 +243,7 @@ def test_compile_pipeline_with_misused_inputuri_should_raise_error(self): TypeError, ' type "Float" cannot be paired with InputUriPlaceholder.'): - @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') + @dsl.pipeline(name='test-pipeline') def my_pipeline(value: float): component_op(value=value) @@ -264,7 +264,7 @@ def test_compile_pipeline_with_misused_outputuri_should_raise_error(self): TypeError, ' type "Integer" cannot be paired with OutputUriPlaceholder.'): - @dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root') + @dsl.pipeline(name='test-pipeline') def my_pipeline(): component_op() diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index 05ad4c2d2a..aa8dd25973 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -68,4 +68,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index ddb43d85d3..e8611f4712 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -19,7 +19,7 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.0-beta.16' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -45,4 +45,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index 25f4a0767e..6294901944 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -155,7 +155,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -237,4 +237,4 @@ root: schemaVersion: 0.0.1 isOptional: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/container_component_with_no_inputs.yaml b/sdk/python/test_data/pipelines/container_component_with_no_inputs.yaml index c27d712d8c..01daa9fe71 100644 --- a/sdk/python/test_data/pipelines/container_component_with_no_inputs.yaml +++ b/sdk/python/test_data/pipelines/container_component_with_no_inputs.yaml @@ -24,4 +24,4 @@ root: taskInfo: name: hello-world-container schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.py b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.py index fc0866d480..e0dc235e37 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.py +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.py @@ -120,7 +120,7 @@ def train( model.metadata['accuracy'] = 0.9 -@dsl.pipeline(pipeline_root='dummy_root', name='my-test-pipeline-beta') +@dsl.pipeline(name='my-test-pipeline-beta') def pipeline(message: str, input_dict: Dict[str, int] = {'A': 1, 'B': 2}): preprocess_task = preprocess( diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index 8d262365f4..a57f218574 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -64,7 +64,6 @@ components: artifactType: schemaTitle: system.Model schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-preprocess: @@ -79,7 +78,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -131,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -239,4 +238,4 @@ root: message: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.py b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.py index 9681c48868..42fb9c87eb 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.py +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.py @@ -67,7 +67,7 @@ def output_named_tuple( return output(scalar, metrics, model) -@dsl.pipeline(pipeline_root='dummy_root', name='functions-with-outputs') +@dsl.pipeline(name='functions-with-outputs') def pipeline(first_message: str, second_message: str, first_number: int, second_number: int): concat_op = concat_message(first=first_message, second=second_message) diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index f1146dff2b..f933eb76b2 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -67,7 +67,6 @@ components: parameters: scalar: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-add-numbers: @@ -82,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -109,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -136,7 +135,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -163,7 +162,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -274,4 +273,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index fdf3197f80..15e7fa8601 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,7 +129,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -156,7 +156,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -183,7 +183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -210,7 +210,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -262,4 +262,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index d6f24930c1..c899cb25c1 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -152,4 +152,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index 248066037f..b52048b78d 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -188,7 +188,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -241,4 +241,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index 9758383cad..b90af91c22 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -264,4 +264,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_after.py b/sdk/python/test_data/pipelines/pipeline_with_after.py index a1093a4449..7ffd8015e3 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_after.py +++ b/sdk/python/test_data/pipelines/pipeline_with_after.py @@ -33,7 +33,7 @@ """) -@dsl.pipeline(name='pipeline-with-after', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-after') def my_pipeline(): task1 = component_op(text='1st task') task2 = component_op(text='2nd task').after(task1) diff --git a/sdk/python/test_data/pipelines/pipeline_with_after.yaml b/sdk/python/test_data/pipelines/pipeline_with_after.yaml index 84e4b213a3..8ddd829b36 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_after.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_after.yaml @@ -19,7 +19,6 @@ components: parameters: text: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-print-text: @@ -105,4 +104,4 @@ root: taskInfo: name: print-text-3 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.py b/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.py index 13bf9170b2..04b0723695 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.py +++ b/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.py @@ -23,9 +23,7 @@ str(test_data_dir / 'concat_placeholder_component.yaml')) -@dsl.pipeline( - name='one-step-pipeline-with-concat-placeholder', - pipeline_root='dummy_root') +@dsl.pipeline(name='one-step-pipeline-with-concat-placeholder') def my_pipeline(): component = component_op(input_prefix='some prefix:') diff --git a/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.yaml b/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.yaml index 124248a170..27fbeaf92c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_concat_placeholder.yaml @@ -7,7 +7,6 @@ components: parameters: input_prefix: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-component-with-concat-placeholder: @@ -34,4 +33,4 @@ root: taskInfo: name: component-with-concat-placeholder schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.py b/sdk/python/test_data/pipelines/pipeline_with_condition.py index a2c55c023f..da292a8261 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.py +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.py @@ -31,7 +31,7 @@ def print_op(msg: str): print(msg) -@dsl.pipeline(name='single-condition-pipeline', pipeline_root='dummy_root') +@dsl.pipeline(name='single-condition-pipeline') def my_pipeline(text: str = 'condition test'): flip1 = flip_coin_op().set_caching_options(False) print_op(msg=flip1.output) diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index 14c0fef350..436a60bf0d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -74,7 +74,6 @@ components: parameters: msg: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-flip-coin-op: @@ -89,7 +88,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -117,7 +116,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -145,7 +144,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -172,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -199,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -265,4 +264,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.py b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.py index 099bd2d8dc..c988833f24 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.py +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.py @@ -27,7 +27,7 @@ def make_name(name: str) -> str: return name -@dsl.pipeline(name='pipeline-with-importer', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-importer') def my_pipeline(name: str = 'default-name', int_input: int = 1, pipeline_input_artifact_uri: str = DEFAULT_ARTIFACT_URI, diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 1af374bad0..29753c24ec 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -50,7 +50,6 @@ components: parameters: Output: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-importer: @@ -95,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -182,4 +181,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.py b/sdk/python/test_data/pipelines/pipeline_with_env.py index 98ebd6d0cb..3b2ddf5de6 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.py +++ b/sdk/python/test_data/pipelines/pipeline_with_env.py @@ -44,7 +44,7 @@ def print_env_op(): """) -@dsl.pipeline(name='pipeline-with-env', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-env') def my_pipeline(): print_env_op().set_env_variable(name='ENV1', value='val1') print_env_2_op().set_env_variable( diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index 9e35173671..8e9f5d74e9 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -5,7 +5,6 @@ components: executorLabel: exec-print-env comp-print-env-op: executorLabel: exec-print-env-op -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-print-env: @@ -42,7 +41,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -80,4 +79,4 @@ root: taskInfo: name: print-env-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index e3517d29d3..60f7602c31 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,7 +65,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -92,7 +92,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -119,7 +119,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -171,4 +171,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index dfeab06f1f..646b956e21 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -57,7 +57,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.0-beta.16' 'kfp==2.0.0-beta.16' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.0-rc.2' 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -90,7 +90,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.0-beta.16' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -150,4 +150,4 @@ root: taskInfo: name: model-producer schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.py b/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.py index eb221c6aff..4cc355c544 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.py +++ b/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.py @@ -23,8 +23,7 @@ str(test_data_dir / 'if_placeholder_component.yaml')) -@dsl.pipeline( - name='one-step-pipeline-with-if-placeholder', pipeline_root='dummy_root') +@dsl.pipeline(name='one-step-pipeline-with-if-placeholder') def my_pipeline(input0: str, input1: str, input2: str): # supply only optional_input_1 but not optional_input_2 component = component_op(required_input=input0, optional_input_1=input1) diff --git a/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.yaml b/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.yaml index 9a1285027b..ccf75514de 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_if_placeholder.yaml @@ -17,7 +17,6 @@ components: parameterType: STRING required_input: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-component-with-optional-inputs: @@ -56,4 +55,4 @@ root: input2: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.py b/sdk/python/test_data/pipelines/pipeline_with_importer.py index 39bc647cd6..4ded772c76 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.py +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.py @@ -49,7 +49,7 @@ def pass_through_op(value: str) -> str: return value -@dsl.pipeline(name='pipeline-with-importer', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-importer') def my_pipeline(dataset2: str = 'gs://ml-pipeline-playground/shakespeare2.txt'): importer1 = importer( diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index dc34793aa9..b9ff81a39d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -96,7 +96,6 @@ components: parameters: scalar: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-importer: @@ -128,7 +127,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -160,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -236,4 +235,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.py b/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.py index 98431d362c..3cbe9a488a 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.py +++ b/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.py @@ -38,8 +38,7 @@ class VertexDataset(dsl.Artifact): """) -@dsl.pipeline( - name='pipeline-with-importer-and-gcpc-type', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-importer-and-gcpc-type') def my_pipeline(): importer1 = importer( diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.yaml index aa47e4eef3..00dce2d86f 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer_and_gcpc_types.yaml @@ -21,7 +21,6 @@ components: artifactType: schemaTitle: google.VertexDataset schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-consumer-op: @@ -73,4 +72,4 @@ root: taskInfo: name: importer schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index e4ea602990..37bf1b9547 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -250,7 +250,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -276,7 +276,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -302,7 +302,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -328,7 +328,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -354,7 +354,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -424,4 +424,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index 31dc82fac3..fc214ebafe 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,7 +602,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -631,7 +631,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -660,7 +660,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -688,7 +688,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -714,7 +714,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -741,7 +741,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -768,7 +768,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -795,7 +795,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -822,7 +822,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -849,7 +849,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -876,7 +876,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -903,7 +903,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -930,7 +930,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1022,4 +1022,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 203ee18371..5f0da89e46 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -95,7 +95,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -172,4 +172,4 @@ root: schemaVersion: 0.0.1 description: The final concatenated dataset. schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.py b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.py index 887f97c003..7b9992d36e 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.py +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.py @@ -28,7 +28,7 @@ def output_metrics(metrics: Output[Metrics]): metrics.log_metric('accuracy', result) -@dsl.pipeline(name='pipeline-with-metrics-outputs', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-metrics-outputs') def my_pipeline(): output_metrics() diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index db913faaf2..355a257b52 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -46,7 +46,6 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-output-metrics: @@ -61,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -90,7 +89,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -149,4 +148,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index 0006354da2..db39ef1192 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,7 +125,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -260,7 +260,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -287,7 +287,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -389,4 +389,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index ca51913681..2ffa5012bb 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,7 +147,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -175,7 +175,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -313,7 +313,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -340,7 +340,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -426,4 +426,4 @@ root: taskInfo: name: print-op-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.py b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.py index 732aad76b0..89e22ed117 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.py +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.py @@ -68,7 +68,6 @@ def random_num_op(low, high): @dsl.pipeline( name='conditional-execution-pipeline', display_name='Conditional execution pipeline.', - pipeline_root='dummy_root', description='Shows how to use dsl.Condition().') def my_pipeline(): flip = flip_coin_op() diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.yaml index 153ccdb04d..d473abcd75 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions_yaml.yaml @@ -240,7 +240,6 @@ components: parameters: msg: parameterType: STRING -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-flip-coin: @@ -348,4 +347,4 @@ root: taskInfo: name: flip-coin schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index 4bfb2efc39..f062c20860 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -1,7 +1,7 @@ # PIPELINE DEFINITION # Name: pipeline-with-nested-loops # Inputs: -# loop_parameter: list [Default: [{'p_b': 'hello', 'p_a': [{'q_a': '1'}, {'q_a': '2'}]}, {'p_b': 'halo', 'p_a': [{'q_a': '11'}, {'q_a': '22'}]}]] +# loop_parameter: list [Default: [{'p_a': [{'q_a': '1'}, {'q_a': '2'}], 'p_b': 'hello'}, {'p_a': [{'q_a': '11'}, {'q_a': '22'}], 'p_b': 'halo'}]] components: comp-for-loop-1: dag: @@ -145,7 +145,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -172,7 +172,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -199,7 +199,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -256,4 +256,4 @@ root: isOptional: true parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_ontology.py b/sdk/python/test_data/pipelines/pipeline_with_ontology.py index e85c3335cf..63dc89f214 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_ontology.py +++ b/sdk/python/test_data/pipelines/pipeline_with_ontology.py @@ -29,7 +29,6 @@ @dsl.pipeline( name='two-step-pipeline-with-ontology', - pipeline_root='dummy_root', description='A linear two-step pipeline with artifact ontology types.') def my_pipeline(input_location: str = 'gs://test-bucket/pipeline_root', optimizer: str = 'sgd', diff --git a/sdk/python/test_data/pipelines/pipeline_with_ontology.yaml b/sdk/python/test_data/pipelines/pipeline_with_ontology.yaml index 14760c3ee4..c738ba3515 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_ontology.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_ontology.yaml @@ -37,7 +37,6 @@ components: artifactType: schemaTitle: system.Model schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-ingestion: @@ -112,4 +111,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index 99baf1b5f7..b5f0cbb1c9 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,7 +104,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -131,7 +131,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,4 +203,4 @@ root: schemaTitle: system.Artifact schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index bd931cabb9..48acb3b4ef 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -205,7 +205,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -257,7 +257,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -283,7 +283,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -357,4 +357,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index dfafe54771..3445124b9c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -128,7 +128,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -201,4 +201,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index ba86ea06ba..9541824312 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,7 +55,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -107,7 +107,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -133,7 +133,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -254,4 +254,4 @@ root: taskInfo: name: print-op-5 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_resource_spec.py b/sdk/python/test_data/pipelines/pipeline_with_resource_spec.py index c4a42119c6..29e1cc72ec 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_resource_spec.py +++ b/sdk/python/test_data/pipelines/pipeline_with_resource_spec.py @@ -29,7 +29,6 @@ @dsl.pipeline( name='two-step-pipeline-with-resource-spec', - pipeline_root='dummy_root', description='A linear two-step pipeline with resource specification.') def my_pipeline(input_location: str = 'gs://test-bucket/pipeline_root', optimizer: str = 'sgd', diff --git a/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml b/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml index 6448199c4f..e045b3c36b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_resource_spec.yaml @@ -37,7 +37,6 @@ components: artifactType: schemaTitle: system.Model schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-ingestion: @@ -120,4 +119,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index 3f6a05fc11..bfff3d14c6 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,7 +30,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -78,4 +78,4 @@ root: isOptional: true parameterType: NUMBER_DOUBLE schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_reused_component.py b/sdk/python/test_data/pipelines/pipeline_with_reused_component.py index b824bff2c6..d52985bbe7 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_reused_component.py +++ b/sdk/python/test_data/pipelines/pipeline_with_reused_component.py @@ -23,7 +23,7 @@ str(test_data_dir / 'add_component.yaml')) -@dsl.pipeline(name='add-pipeline', pipeline_root='dummy_root') +@dsl.pipeline(name='add-pipeline') def my_pipeline( a: int = 2, b: int = 5, diff --git a/sdk/python/test_data/pipelines/pipeline_with_reused_component.yaml b/sdk/python/test_data/pipelines/pipeline_with_reused_component.yaml index 69d4234620..d709e5d20a 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_reused_component.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_reused_component.yaml @@ -40,7 +40,6 @@ components: parameters: sum: parameterType: NUMBER_INTEGER -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-add: @@ -150,4 +149,4 @@ root: isOptional: true parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index 67a1d2ab17..729de262d8 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -99,7 +99,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -180,4 +180,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status_yaml.yaml index 1bfddfd250..5065d84754 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status_yaml.yaml @@ -92,4 +92,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index d6754e2a11..cba9c92beb 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,7 +35,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -62,7 +62,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -117,4 +117,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_various_io_types.py b/sdk/python/test_data/pipelines/pipeline_with_various_io_types.py index 96fdc5b329..934921ee76 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_various_io_types.py +++ b/sdk/python/test_data/pipelines/pipeline_with_various_io_types.py @@ -80,7 +80,7 @@ """) -@dsl.pipeline(name='pipeline-with-various-types', pipeline_root='dummy_root') +@dsl.pipeline(name='pipeline-with-various-types') def my_pipeline(input1: str, input3: Input[Artifact], input4: str = ''): component_1 = component_op_1( input_1=input1, diff --git a/sdk/python/test_data/pipelines/pipeline_with_various_io_types.yaml b/sdk/python/test_data/pipelines/pipeline_with_various_io_types.yaml index 44314129c6..2aae338e84 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_various_io_types.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_various_io_types.yaml @@ -96,7 +96,6 @@ components: parameters: output_1: parameterType: NUMBER_INTEGER -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-downstream: @@ -213,4 +212,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/two_step_pipeline.py b/sdk/python/test_data/pipelines/two_step_pipeline.py index 68e3bd701c..16869e2922 100644 --- a/sdk/python/test_data/pipelines/two_step_pipeline.py +++ b/sdk/python/test_data/pipelines/two_step_pipeline.py @@ -52,7 +52,7 @@ """) -@dsl.pipeline(name='simple-two-step-pipeline', pipeline_root='dummy_root') +@dsl.pipeline(name='simple-two-step-pipeline') def my_pipeline(text: str = 'Hello world!'): component_1 = component_op_1(text=text).set_display_name('Producer') component_2 = component_op_2( diff --git a/sdk/python/test_data/pipelines/two_step_pipeline.yaml b/sdk/python/test_data/pipelines/two_step_pipeline.yaml index 0cb2907ca1..b8ebfdd969 100644 --- a/sdk/python/test_data/pipelines/two_step_pipeline.yaml +++ b/sdk/python/test_data/pipelines/two_step_pipeline.yaml @@ -23,7 +23,6 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-read-from-gcs: @@ -89,4 +88,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/two_step_pipeline_containerized.yaml b/sdk/python/test_data/pipelines/two_step_pipeline_containerized.yaml index 7f581b4c5c..1ddd43a1c5 100644 --- a/sdk/python/test_data/pipelines/two_step_pipeline_containerized.yaml +++ b/sdk/python/test_data/pipelines/two_step_pipeline_containerized.yaml @@ -78,4 +78,4 @@ root: text: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py index 8b51fc0280..cb40d4905d 100644 --- a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py +++ b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.py @@ -36,7 +36,7 @@ ) -@dsl.pipeline(name='xgboost-sample-pipeline', pipeline_root='dummy_root') +@dsl.pipeline(name='xgboost-sample-pipeline') def xgboost_pipeline(): training_data_csv = chicago_taxi_dataset_op( where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"', diff --git a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml index b9ba567619..6555229532 100644 --- a/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml +++ b/sdk/python/test_data/pipelines/xgboost_sample_pipeline.yaml @@ -237,7 +237,6 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 -defaultPipelineRoot: dummy_root deploymentSpec: executors: exec-chicago-taxi-trips-dataset: @@ -924,4 +923,4 @@ root: taskInfo: name: xgboost-train-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.0-rc.2 From 45608d02bd72be87257d1f8c9f920f3650ad3b69 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 21 Jun 2023 17:49:29 -0700 Subject: [PATCH 002/253] docs(components): point to v2 docs in GCPC readme PiperOrigin-RevId: 542411539 --- components/google-cloud/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/components/google-cloud/README.md b/components/google-cloud/README.md index 58795e172a..b7297b7ab0 100644 --- a/components/google-cloud/README.md +++ b/components/google-cloud/README.md @@ -15,7 +15,6 @@ Please see the [Google Cloud Pipeline Components user guide](https://cloud.googl Please see the [Google Cloud Pipeline Components API reference documentation](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.41/). ### Release details - For details about previous and upcoming releases, please see the [release notes](https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/RELEASE.md). ## Examples From 536d93a1bffe035bf5222e7a48faf59d3b053800 Mon Sep 17 00:00:00 2001 From: axel7083 <42176370+axel7083@users.noreply.github.com> Date: Fri, 23 Jun 2023 01:48:08 +0200 Subject: [PATCH 003/253] fix(frontend): Splitting logsDetails into lines based on CR and LF. Fixes #9593 (#9594) --- frontend/src/components/tabs/RuntimeNodeDetailsV2.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/components/tabs/RuntimeNodeDetailsV2.tsx b/frontend/src/components/tabs/RuntimeNodeDetailsV2.tsx index 2bcc956992..4b1aa618e8 100644 --- a/frontend/src/components/tabs/RuntimeNodeDetailsV2.tsx +++ b/frontend/src/components/tabs/RuntimeNodeDetailsV2.tsx @@ -202,7 +202,7 @@ function TaskNodeDetail({ )} {!logsBannerMessage && (
- +
)} From f9dda53f89c16374916a8ae8ecee06a5461a0dcb Mon Sep 17 00:00:00 2001 From: Tommy Li Date: Mon, 26 Jun 2023 10:46:14 -0700 Subject: [PATCH 004/253] chore(components): Update KServe component with active owners (#9684) --- components/kserve/OWNERS | 3 +-- components/kserve/README.md | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/kserve/OWNERS b/components/kserve/OWNERS index 39b3c59a1c..41cc4556df 100644 --- a/components/kserve/OWNERS +++ b/components/kserve/OWNERS @@ -1,7 +1,6 @@ approvers: - - animeshsingh - Tomcli + - yhwang reviewers: - - animeshsingh - Tomcli - yhwang diff --git a/components/kserve/README.md b/components/kserve/README.md index 7ada670447..52775aa5c5 100644 --- a/components/kserve/README.md +++ b/components/kserve/README.md @@ -14,7 +14,8 @@ https://raw.githubusercontent.com/kubeflow/pipelines/1.8.1/components/kserve/com Test status: Currently manual tests Owners information: - - Animesh Singh (animeshsingh) - IBM, singhan@us.ibm.com + - Tommy Li (Tomcli) - IBM, tommy.chaoping.li@ibm.com + - Yi-Hong Wang (yhwang) - IBM, yh.wang@ibm.com ## Usage From ea7a5efb97e5aa14dbf58ab55aa2f68b1ddc5941 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 26 Jun 2023 11:27:15 -0700 Subject: [PATCH 005/253] feat(components): Review and update batch_predict_job GCPC docstrings PiperOrigin-RevId: 543494203 --- .../v1/batch_predict_job/component.py | 156 ++++++++---------- 1 file changed, 71 insertions(+), 85 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py index 5d2fdb2c0b..75c86ab906 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py @@ -64,109 +64,95 @@ def model_batch_predict( encryption_spec_key_name: str = '', ): # fmt: off - """Creates a Google Cloud Vertex BatchPredictionJob and waits for it to - complete. + """Creates a Google Cloud Vertex `BatchPredictionJob `_ and waits for it to complete. - For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/create. + For more details, see `BatchPredictionJob.Create `_. Args: project: Project to create the BatchPredictionJob. job_display_name: The user-defined name of this BatchPredictionJob. location: Location for creating the BatchPredictionJob. - If not set, default to us-central1. instances_format: The format in which instances are - given, must be one of the Model's supportedInputStorageFormats. If not - set, default to "jsonl". For more details about this input config, - see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + given, must be one of the `Model `_'s supportedInputStorageFormats. + For more details about this input config, see + `InputConfig `_ predictions_format: The format in which Vertex AI gives the predictions. Must be one of the - Model's supportedOutputStorageFormats. If not set, default to "jsonl". - For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + Model's supportedOutputStorageFormats. + For more details about this output config, see `OutputConfig `_. model: The Model used to get predictions via this job. Must share the same ancestor Location. Starting this job has no impact on any existing deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified. + ``unmanaged_container_model`` must be specified. unmanaged_container_model: The unmanaged container model used to get predictions via this job. This should be used for models that are not uploaded to Vertex. Either this or model must be specified. gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction - on. They must match `instances_format`. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + on. They must match ``instances_format``. May contain wildcards. For more + information on wildcards, see `WildcardNames `_. + For more details about this input config, see `InputConfig `_. bigquery_source_input_uri: BigQuery URI to a table, up to 2000 characters long. For example: - `projectId.bqDatasetId.bqTableId` For more details about this input + ``projectId.bqDatasetId.bqTableId`` For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. model_parameters: The parameters that govern the predictions. The schema of the parameters - instance_type (Optional[str]): - The format of the instance that the Model accepts. Vertex AI will - convert compatible - [batch prediction input instance formats][InputConfig.instances_format] - to the specified format. - Supported values are: - ** `object`: Each input is converted to JSON object format. - * For `bigquery`, each row is converted to an object. - * For `jsonl`, each line of the JSONL input must be an object. - * Does not apply to `csv`, `file-list`, `tf-record`, or - `tf-record-gzip`. - ** `array`: Each input is converted to JSON array format. - * For `bigquery`, each row is converted to an array. The order - of columns is determined by the BigQuery column order, unless - [included_fields][] is populated. - [included_fields][] must be populated for specifying field orders. - * For `jsonl`, if each line of the JSONL input is an object, - [included_fields][] must be populated for specifying field orders. - * Does not apply to `csv`, `file-list`, `tf-record`, or - `tf-record-gzip`. + instance_type: The format of the instance that the Model + accepts. Vertex AI will convert compatible + `InstancesFormat `_ + to the specified format. Supported values are: + ``object``: Each input is converted to JSON object format. + * For ``bigquery``, each row is converted to an object. + * For ``jsonl``, each line of the JSONL input must be an object. + * Does not apply to ``csv``, ``file-list``, ``tf-record``, or ``tf-record-gzip``. + ``array``: Each input is converted to JSON array format. + * For ``bigquery``, each row is converted to an array. The order + of columns is determined by the BigQuery column order, unless + `included_fields `_ is populated. + ``included_fields`` must be populated for specifying field orders. + * For ``jsonl``, if each line of the JSONL input is an object, + ``included_fields`` must be populated for specifying field orders. + * Does not apply to `csv`, ``file-list``, ``tf-record``, or + ``tf-record-gzip``. If not specified, Vertex AI converts the batch prediction input as follows: - * For `bigquery` and `csv`, the behavior is the same as `array`. The + * For ``bigquery`` and ``csv``, the behavior is the same as ``array`. The order of columns is the same as defined in the file or table, unless - [included_fields][] is populated. - * For `jsonl`, the prediction instance format is determined by + included_fields is populated. + * For ``jsonl``, the prediction instance format is determined by each line of the input. - * For `tf-record`/`tf-record-gzip`, each record will be converted to - an object in the format of `{"b64": }`, where `` is + * For ``tf-record``/``tf-record-gzip``, each record will be converted to + an object in the format of ``{"b64": }``, where ```` is the Base64-encoded string of the content of the record. - * For `file-list`, each file in the list will be converted to an - object in the format of `{"b64": }`, where `` is + * For ``file-list``, each file in the list will be converted to an + object in the format of ``{"b64": }``, where ```` is the Base64-encoded string of the content of the file. - (-- api-linter: core::0140::base64=disabled - aip.dev/not-precedent: Base64 is not for this field. --) - key_field (Optional[str]): - The name of the field that is considered as a key. + key_field: The name of the field that is considered as a key. The values identified by the key field is not included in the transformed instances that is sent to the Model. This is similar to - specifying this name of the field in [excluded_fields][]. In addition, + specifying this name of the field in `excluded_fields `_. In addition, the batch prediction output will not include the instances. Instead the output will only include the value of the key field, in a field named - `key` in the output: - * For `jsonl` output format, the output will have a `key` field - instead of the `instance` field. - * For `csv`/`bigquery` output format, the output will have have a `key` + ``key`` in the output: + * For ``jsonl`` output format, the output will have a ``key`` field + instead of the ``instance`` field. + * For ``csv``/``bigquery`` output format, the output will have have a ``key`` column instead of the instance feature columns. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. - included_fields (Optional[Sequence[str]]): - Fields that will be included in the prediction instance that is + included_fields: Fields that will be included in the prediction instance that is sent to the Model. - If [instance_type][] is `array`, the order of field names in - included_fields also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + If ``instance_type`` is ``array``, the order of field names in + ``included_fields`` also determines the order of the values in the array. + When ``included_fields`` is populated, ``excluded_fields`` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. - excluded_fields (Optional[Sequence[str]]): - Fields that will be excluded in the prediction instance that is + excluded_fields: Fields that will be excluded in the prediction instance that is sent to the Model. Excluded will be attached to the batch prediction output if - [key_field][] is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + key_field is not specified. + When ``excluded_fields`` is populated, ``included_fields`` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. - may be specified via the Model's `parameters_schema_uri`. + may be specified via the Model's ``parameters_schema_uri``. gcs_destination_output_uri_prefix: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is @@ -200,7 +186,7 @@ def model_batch_predict( Model's instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status `_ represented as a STRUCT, and containing only ``code`` and ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. @@ -214,53 +200,53 @@ def model_batch_predict( For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `accelerator_count`. Only used if - `machine_type` is set. For more details about the machine spec, see + attached to the machine as per ``accelerator_count``. Only used if + ``machine_type`` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec accelerator_count: The number of accelerators to attach - to the `machine_type`. Only used if `machine_type` is set. For more + to the ``machine_type``. Only used if ``machine_type`` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI - decides starting number, not greater than `max_replica_count`. Only - used if `machine_type` is set. + decides starting number, not greater than ``max_replica_count``. Only + used if ``machine_type`` is set. max_replica_count: The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10. + to. Only used if ``machine_type`` is set. manual_batch_tuning_parameters_batch_size: The number of the records (e.g. instances) of the operation given in each batch to a machine replica. Machine type, and size of a single record should be considered when setting this parameter, higher value speeds up the batch operation's execution, but too high value will result in a whole batch not fitting in a machine's memory, and the whole operation will - fail. The default value is 4. + fail. generate_explanation: Generate explanation along with the batch prediction results. This will cause the batch prediction - output to include explanations based on the `prediction_format`: - - `bigquery`: output includes a column named `explanation`. The value is + output to include explanations based on the ``prediction_format``: - + ``bigquery``: output includes a column named ``explanation``. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - - `jsonl`: The JSON objects on each line include an additional entry - keyed `explanation`. The value of the entry is a JSON object that - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + ``jsonl``: The JSON objects on each line include an additional entry + keyed ``explanation``. The value of the entry is a JSON object that + conforms to the [aiplatform.gapic.Explanation] object. - ``csv``: Generating explanations for CSV format is not supported. If this field is set to true, either the Model.explanation_spec or explanation_metadata and explanation_parameters must be populated. explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if - `generate_explanation` is set to `True`. This value overrides the - value of `Model.explanation_metadata`. All fields of - `explanation_metadata` are optional in the request. If a field of the - `explanation_metadata` object is not populated, the corresponding + ``generate_explanation`` is set to `True`. This value overrides the + value of ``Model.explanation_metadata``. All fields of + ``explanation_metadata`` are optional in the request. If a field of the + ``explanation_metadata`` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if - `generate_explanation` is set to `True`. This value overrides the + ``generate_explanation`` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of - `explanation_parameters` are optional in the request. If a field of - the `explanation_parameters` object is not populated, the - corresponding field of the `Model.explanation_parameters` object is + ``explanation_parameters`` are optional in the request. If a field of + the ``explanation_parameters`` object is not populated, the + corresponding field of the ``Model.explanation_parameters`` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. labels: The labels with user-defined metadata to From 35f750703dcf4d1073a90708375459d1491085dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Jun 2023 19:22:14 +0000 Subject: [PATCH 006/253] chore(deps): bump fast-xml-parser and @aws-sdk/credential-providers in /frontend/server (#9592) Bumps [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) to 4.2.4 and updates ancestor dependency [@aws-sdk/credential-providers](https://github.com/aws/aws-sdk-js-v3/tree/HEAD/packages/credential-providers). These dependencies need to be updated together. Updates `fast-xml-parser` from 4.0.11 to 4.2.4 - [Release notes](https://github.com/NaturalIntelligence/fast-xml-parser/releases) - [Changelog](https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/CHANGELOG.md) - [Commits](https://github.com/NaturalIntelligence/fast-xml-parser/compare/v4.0.11...v4.2.4) Updates `@aws-sdk/credential-providers` from 3.241.0 to 3.348.0 - [Release notes](https://github.com/aws/aws-sdk-js-v3/releases) - [Changelog](https://github.com/aws/aws-sdk-js-v3/blob/main/packages/credential-providers/CHANGELOG.md) - [Commits](https://github.com/aws/aws-sdk-js-v3/commits/v3.348.0/packages/credential-providers) --- updated-dependencies: - dependency-name: fast-xml-parser dependency-type: indirect - dependency-name: "@aws-sdk/credential-providers" dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- frontend/server/package-lock.json | 1528 +++++++++++++++-------------- frontend/server/package.json | 2 +- 2 files changed, 796 insertions(+), 734 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index 8a63202565..fcddfb4b4d 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -2,10 +2,27 @@ "requires": true, "lockfileVersion": 1, "dependencies": { + "@aws-crypto/crc32": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-3.0.0.tgz", + "integrity": "sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==", + "requires": { + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^1.11.1" + }, + "dependencies": { + "tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==" + } + } + }, "@aws-crypto/ie11-detection": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@aws-crypto/ie11-detection/-/ie11-detection-2.0.2.tgz", - "integrity": "sha512-5XDMQY98gMAf/WRTic5G++jfmS/VLM0rwpiOpaainKi4L0nqWMSB1SzsrEG5rjFZGYN6ZAefO+/Yta2dFM0kMw==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/ie11-detection/-/ie11-detection-3.0.0.tgz", + "integrity": "sha512-341lBBkiY1DfDNKai/wXM3aujNBkXR7tq1URPQDL9wi3AUbI80NR74uF1TXHMm7po1AcnFk8iu2S2IeU/+/A+Q==", "requires": { "tslib": "^1.11.1" }, @@ -18,15 +35,15 @@ } }, "@aws-crypto/sha256-browser": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-browser/-/sha256-browser-2.0.0.tgz", - "integrity": "sha512-rYXOQ8BFOaqMEHJrLHul/25ckWH6GTJtdLSajhlqGMx0PmSueAuvboCuZCTqEKlxR8CQOwRarxYMZZSYlhRA1A==", - "requires": { - "@aws-crypto/ie11-detection": "^2.0.0", - "@aws-crypto/sha256-js": "^2.0.0", - "@aws-crypto/supports-web-crypto": "^2.0.0", - "@aws-crypto/util": "^2.0.0", - "@aws-sdk/types": "^3.1.0", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-browser/-/sha256-browser-3.0.0.tgz", + "integrity": "sha512-8VLmW2B+gjFbU5uMeqtQM6Nj0/F1bro80xQXCW6CQBWgosFWXTx77aeOF5CAIAmbOK64SdMBJdNr6J41yP5mvQ==", + "requires": { + "@aws-crypto/ie11-detection": "^3.0.0", + "@aws-crypto/sha256-js": "^3.0.0", + "@aws-crypto/supports-web-crypto": "^3.0.0", + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", "@aws-sdk/util-locate-window": "^3.0.0", "@aws-sdk/util-utf8-browser": "^3.0.0", "tslib": "^1.11.1" @@ -40,12 +57,12 @@ } }, "@aws-crypto/sha256-js": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-js/-/sha256-js-2.0.0.tgz", - "integrity": "sha512-VZY+mCY4Nmrs5WGfitmNqXzaE873fcIZDu54cbaDaaamsaTOP1DBImV9F4pICc3EHjQXujyE8jig+PFCaew9ig==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-js/-/sha256-js-3.0.0.tgz", + "integrity": "sha512-PnNN7os0+yd1XvXAy23CFOmTbMaDxgxXtTKHybrJ39Y8kGzBATgBFibWJKH6BhytLI/Zyszs87xCOBNyBig6vQ==", "requires": { - "@aws-crypto/util": "^2.0.0", - "@aws-sdk/types": "^3.1.0", + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", "tslib": "^1.11.1" }, "dependencies": { @@ -57,9 +74,9 @@ } }, "@aws-crypto/supports-web-crypto": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@aws-crypto/supports-web-crypto/-/supports-web-crypto-2.0.2.tgz", - "integrity": "sha512-6mbSsLHwZ99CTOOswvCRP3C+VCWnzBf+1SnbWxzzJ9lR0mA0JnY2JEAhp8rqmTE0GPFy88rrM27ffgp62oErMQ==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/supports-web-crypto/-/supports-web-crypto-3.0.0.tgz", + "integrity": "sha512-06hBdMwUAb2WFTuGG73LSC0wfPu93xWwo5vL2et9eymgmu3Id5vFAHBbajVWiGhPO37qcsdCap/FqXvJGJWPIg==", "requires": { "tslib": "^1.11.1" }, @@ -72,11 +89,11 @@ } }, "@aws-crypto/util": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-2.0.2.tgz", - "integrity": "sha512-Lgu5v/0e/BcrZ5m/IWqzPUf3UYFTy/PpeED+uc9SWUR1iZQL8XXbGQg10UfllwwBryO3hFF5dizK+78aoXC1eA==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-3.0.0.tgz", + "integrity": "sha512-2OJlpeJpCR48CC8r+uKVChzs9Iungj9wkZrl8Z041DWEWvyIHILYKCPNzJghKsivj+S3mLo6BVc7mBNzdxA46w==", "requires": { - "@aws-sdk/types": "^3.110.0", + "@aws-sdk/types": "^3.222.0", "@aws-sdk/util-utf8-browser": "^3.0.0", "tslib": "^1.11.1" }, @@ -89,594 +106,608 @@ } }, "@aws-sdk/abort-controller": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/abort-controller/-/abort-controller-3.226.0.tgz", - "integrity": "sha512-cJVzr1xxPBd08voknXvR0RLgtZKGKt6WyDpH/BaPCu3rfSqWCDZKzwqe940eqosjmKrxC6pUZNKASIqHOQ8xxQ==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/abort-controller/-/abort-controller-3.347.0.tgz", + "integrity": "sha512-P/2qE6ntYEmYG4Ez535nJWZbXqgbkJx8CMz7ChEuEg3Gp3dvVYEKg+iEUEvlqQ2U5dWP5J3ehw5po9t86IsVPQ==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/client-cognito-identity": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-cognito-identity/-/client-cognito-identity-3.241.0.tgz", - "integrity": "sha512-9X/MwcnSwWfB0ijggFjyBWa4gtlUAyI39eBaVSE0AxMcgLlHKedEK6w5F1RrtvWqb7KyJDsyAysVecU4E9zQQQ==", - "requires": { - "@aws-crypto/sha256-browser": "2.0.0", - "@aws-crypto/sha256-js": "2.0.0", - "@aws-sdk/client-sts": "3.241.0", - "@aws-sdk/config-resolver": "3.234.0", - "@aws-sdk/credential-provider-node": "3.241.0", - "@aws-sdk/fetch-http-handler": "3.226.0", - "@aws-sdk/hash-node": "3.226.0", - "@aws-sdk/invalid-dependency": "3.226.0", - "@aws-sdk/middleware-content-length": "3.226.0", - "@aws-sdk/middleware-endpoint": "3.226.0", - "@aws-sdk/middleware-host-header": "3.226.0", - "@aws-sdk/middleware-logger": "3.226.0", - "@aws-sdk/middleware-recursion-detection": "3.226.0", - "@aws-sdk/middleware-retry": "3.235.0", - "@aws-sdk/middleware-serde": "3.226.0", - "@aws-sdk/middleware-signing": "3.226.0", - "@aws-sdk/middleware-stack": "3.226.0", - "@aws-sdk/middleware-user-agent": "3.226.0", - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/node-http-handler": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/smithy-client": "3.234.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "@aws-sdk/util-base64": "3.208.0", - "@aws-sdk/util-body-length-browser": "3.188.0", - "@aws-sdk/util-body-length-node": "3.208.0", - "@aws-sdk/util-defaults-mode-browser": "3.234.0", - "@aws-sdk/util-defaults-mode-node": "3.234.0", - "@aws-sdk/util-endpoints": "3.241.0", - "@aws-sdk/util-retry": "3.229.0", - "@aws-sdk/util-user-agent-browser": "3.226.0", - "@aws-sdk/util-user-agent-node": "3.226.0", - "@aws-sdk/util-utf8-browser": "3.188.0", - "@aws-sdk/util-utf8-node": "3.208.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-cognito-identity/-/client-cognito-identity-3.348.0.tgz", + "integrity": "sha512-1fcJFUQTsAXjkaAn/kn9ty790uHbCpukkuqJ/0QNPFYaa6vu93xx7FnzOvRK4XvaojwZ/C+yxp0fNQ+GjXG0vg==", + "requires": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/client-sts": "3.348.0", + "@aws-sdk/config-resolver": "3.347.0", + "@aws-sdk/credential-provider-node": "3.348.0", + "@aws-sdk/fetch-http-handler": "3.347.0", + "@aws-sdk/hash-node": "3.347.0", + "@aws-sdk/invalid-dependency": "3.347.0", + "@aws-sdk/middleware-content-length": "3.347.0", + "@aws-sdk/middleware-endpoint": "3.347.0", + "@aws-sdk/middleware-host-header": "3.347.0", + "@aws-sdk/middleware-logger": "3.347.0", + "@aws-sdk/middleware-recursion-detection": "3.347.0", + "@aws-sdk/middleware-retry": "3.347.0", + "@aws-sdk/middleware-serde": "3.347.0", + "@aws-sdk/middleware-signing": "3.347.0", + "@aws-sdk/middleware-stack": "3.347.0", + "@aws-sdk/middleware-user-agent": "3.347.0", + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/node-http-handler": "3.348.0", + "@aws-sdk/smithy-client": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/util-base64": "3.310.0", + "@aws-sdk/util-body-length-browser": "3.310.0", + "@aws-sdk/util-body-length-node": "3.310.0", + "@aws-sdk/util-defaults-mode-browser": "3.347.0", + "@aws-sdk/util-defaults-mode-node": "3.347.0", + "@aws-sdk/util-endpoints": "3.347.0", + "@aws-sdk/util-retry": "3.347.0", + "@aws-sdk/util-user-agent-browser": "3.347.0", + "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-utf8": "3.310.0", + "@smithy/protocol-http": "^1.0.1", + "@smithy/types": "^1.0.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/client-sso": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.241.0.tgz", - "integrity": "sha512-Jm4HR+RYAqKMEYZvvWaq0NYUKKonyInOeubObXH4BLXZpmUBSdYCSjjLdNJY3jkQoxbDVPVMIurVNh5zT5SMRw==", - "requires": { - "@aws-crypto/sha256-browser": "2.0.0", - "@aws-crypto/sha256-js": "2.0.0", - "@aws-sdk/config-resolver": "3.234.0", - "@aws-sdk/fetch-http-handler": "3.226.0", - "@aws-sdk/hash-node": "3.226.0", - "@aws-sdk/invalid-dependency": "3.226.0", - "@aws-sdk/middleware-content-length": "3.226.0", - "@aws-sdk/middleware-endpoint": "3.226.0", - "@aws-sdk/middleware-host-header": "3.226.0", - "@aws-sdk/middleware-logger": "3.226.0", - "@aws-sdk/middleware-recursion-detection": "3.226.0", - "@aws-sdk/middleware-retry": "3.235.0", - "@aws-sdk/middleware-serde": "3.226.0", - "@aws-sdk/middleware-stack": "3.226.0", - "@aws-sdk/middleware-user-agent": "3.226.0", - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/node-http-handler": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/smithy-client": "3.234.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "@aws-sdk/util-base64": "3.208.0", - "@aws-sdk/util-body-length-browser": "3.188.0", - "@aws-sdk/util-body-length-node": "3.208.0", - "@aws-sdk/util-defaults-mode-browser": "3.234.0", - "@aws-sdk/util-defaults-mode-node": "3.234.0", - "@aws-sdk/util-endpoints": "3.241.0", - "@aws-sdk/util-retry": "3.229.0", - "@aws-sdk/util-user-agent-browser": "3.226.0", - "@aws-sdk/util-user-agent-node": "3.226.0", - "@aws-sdk/util-utf8-browser": "3.188.0", - "@aws-sdk/util-utf8-node": "3.208.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.348.0.tgz", + "integrity": "sha512-5S23gVKBl0fhZ96RD8LdPhMKeh8E5fmebyZxMNZuWliSXz++Q9ZCrwPwQbkks3duPOTcKKobs3IoqP82HoXMvQ==", + "requires": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/config-resolver": "3.347.0", + "@aws-sdk/fetch-http-handler": "3.347.0", + "@aws-sdk/hash-node": "3.347.0", + "@aws-sdk/invalid-dependency": "3.347.0", + "@aws-sdk/middleware-content-length": "3.347.0", + "@aws-sdk/middleware-endpoint": "3.347.0", + "@aws-sdk/middleware-host-header": "3.347.0", + "@aws-sdk/middleware-logger": "3.347.0", + "@aws-sdk/middleware-recursion-detection": "3.347.0", + "@aws-sdk/middleware-retry": "3.347.0", + "@aws-sdk/middleware-serde": "3.347.0", + "@aws-sdk/middleware-stack": "3.347.0", + "@aws-sdk/middleware-user-agent": "3.347.0", + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/node-http-handler": "3.348.0", + "@aws-sdk/smithy-client": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/util-base64": "3.310.0", + "@aws-sdk/util-body-length-browser": "3.310.0", + "@aws-sdk/util-body-length-node": "3.310.0", + "@aws-sdk/util-defaults-mode-browser": "3.347.0", + "@aws-sdk/util-defaults-mode-node": "3.347.0", + "@aws-sdk/util-endpoints": "3.347.0", + "@aws-sdk/util-retry": "3.347.0", + "@aws-sdk/util-user-agent-browser": "3.347.0", + "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-utf8": "3.310.0", + "@smithy/protocol-http": "^1.0.1", + "@smithy/types": "^1.0.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/client-sso-oidc": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.241.0.tgz", - "integrity": "sha512-/Ml2QBGpGfUEeBrPzBZhSTBkHuXFD2EAZEIHGCBH4tKaURDI6/FoGI8P1Rl4BzoFt+II/Cr91Eox6YT9EwChsQ==", - "requires": { - "@aws-crypto/sha256-browser": "2.0.0", - "@aws-crypto/sha256-js": "2.0.0", - "@aws-sdk/config-resolver": "3.234.0", - "@aws-sdk/fetch-http-handler": "3.226.0", - "@aws-sdk/hash-node": "3.226.0", - "@aws-sdk/invalid-dependency": "3.226.0", - "@aws-sdk/middleware-content-length": "3.226.0", - "@aws-sdk/middleware-endpoint": "3.226.0", - "@aws-sdk/middleware-host-header": "3.226.0", - "@aws-sdk/middleware-logger": "3.226.0", - "@aws-sdk/middleware-recursion-detection": "3.226.0", - "@aws-sdk/middleware-retry": "3.235.0", - "@aws-sdk/middleware-serde": "3.226.0", - "@aws-sdk/middleware-stack": "3.226.0", - "@aws-sdk/middleware-user-agent": "3.226.0", - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/node-http-handler": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/smithy-client": "3.234.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "@aws-sdk/util-base64": "3.208.0", - "@aws-sdk/util-body-length-browser": "3.188.0", - "@aws-sdk/util-body-length-node": "3.208.0", - "@aws-sdk/util-defaults-mode-browser": "3.234.0", - "@aws-sdk/util-defaults-mode-node": "3.234.0", - "@aws-sdk/util-endpoints": "3.241.0", - "@aws-sdk/util-retry": "3.229.0", - "@aws-sdk/util-user-agent-browser": "3.226.0", - "@aws-sdk/util-user-agent-node": "3.226.0", - "@aws-sdk/util-utf8-browser": "3.188.0", - "@aws-sdk/util-utf8-node": "3.208.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.348.0.tgz", + "integrity": "sha512-tvHpcycx4EALvk38I9rAOdPeHvBDezqIB4lrE7AvnOJljlvCcdQ2gXa9GDrwrM7zuYBIZMBRE/njTMrCwoOdAA==", + "requires": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/config-resolver": "3.347.0", + "@aws-sdk/fetch-http-handler": "3.347.0", + "@aws-sdk/hash-node": "3.347.0", + "@aws-sdk/invalid-dependency": "3.347.0", + "@aws-sdk/middleware-content-length": "3.347.0", + "@aws-sdk/middleware-endpoint": "3.347.0", + "@aws-sdk/middleware-host-header": "3.347.0", + "@aws-sdk/middleware-logger": "3.347.0", + "@aws-sdk/middleware-recursion-detection": "3.347.0", + "@aws-sdk/middleware-retry": "3.347.0", + "@aws-sdk/middleware-serde": "3.347.0", + "@aws-sdk/middleware-stack": "3.347.0", + "@aws-sdk/middleware-user-agent": "3.347.0", + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/node-http-handler": "3.348.0", + "@aws-sdk/smithy-client": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/util-base64": "3.310.0", + "@aws-sdk/util-body-length-browser": "3.310.0", + "@aws-sdk/util-body-length-node": "3.310.0", + "@aws-sdk/util-defaults-mode-browser": "3.347.0", + "@aws-sdk/util-defaults-mode-node": "3.347.0", + "@aws-sdk/util-endpoints": "3.347.0", + "@aws-sdk/util-retry": "3.347.0", + "@aws-sdk/util-user-agent-browser": "3.347.0", + "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-utf8": "3.310.0", + "@smithy/protocol-http": "^1.0.1", + "@smithy/types": "^1.0.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/client-sts": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.241.0.tgz", - "integrity": "sha512-vmlG8cJzRf8skCtTJbA2wBvD2c3NQ5gZryzJvTKDS06KzBzcEpnjlLseuTekcnOiRNekbFUX5hRu5Zj3N2ReLg==", - "requires": { - "@aws-crypto/sha256-browser": "2.0.0", - "@aws-crypto/sha256-js": "2.0.0", - "@aws-sdk/config-resolver": "3.234.0", - "@aws-sdk/credential-provider-node": "3.241.0", - "@aws-sdk/fetch-http-handler": "3.226.0", - "@aws-sdk/hash-node": "3.226.0", - "@aws-sdk/invalid-dependency": "3.226.0", - "@aws-sdk/middleware-content-length": "3.226.0", - "@aws-sdk/middleware-endpoint": "3.226.0", - "@aws-sdk/middleware-host-header": "3.226.0", - "@aws-sdk/middleware-logger": "3.226.0", - "@aws-sdk/middleware-recursion-detection": "3.226.0", - "@aws-sdk/middleware-retry": "3.235.0", - "@aws-sdk/middleware-sdk-sts": "3.226.0", - "@aws-sdk/middleware-serde": "3.226.0", - "@aws-sdk/middleware-signing": "3.226.0", - "@aws-sdk/middleware-stack": "3.226.0", - "@aws-sdk/middleware-user-agent": "3.226.0", - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/node-http-handler": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/smithy-client": "3.234.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "@aws-sdk/util-base64": "3.208.0", - "@aws-sdk/util-body-length-browser": "3.188.0", - "@aws-sdk/util-body-length-node": "3.208.0", - "@aws-sdk/util-defaults-mode-browser": "3.234.0", - "@aws-sdk/util-defaults-mode-node": "3.234.0", - "@aws-sdk/util-endpoints": "3.241.0", - "@aws-sdk/util-retry": "3.229.0", - "@aws-sdk/util-user-agent-browser": "3.226.0", - "@aws-sdk/util-user-agent-node": "3.226.0", - "@aws-sdk/util-utf8-browser": "3.188.0", - "@aws-sdk/util-utf8-node": "3.208.0", - "fast-xml-parser": "4.0.11", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.348.0.tgz", + "integrity": "sha512-4iaQlWAOHMEF4xjR/FB/ws3aUjXjJHwbsIcqbdYAxsKijDYYTZYCPc/gM0NE1yi28qlNYNhMzHipe5xTYbU2Eg==", + "requires": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/config-resolver": "3.347.0", + "@aws-sdk/credential-provider-node": "3.348.0", + "@aws-sdk/fetch-http-handler": "3.347.0", + "@aws-sdk/hash-node": "3.347.0", + "@aws-sdk/invalid-dependency": "3.347.0", + "@aws-sdk/middleware-content-length": "3.347.0", + "@aws-sdk/middleware-endpoint": "3.347.0", + "@aws-sdk/middleware-host-header": "3.347.0", + "@aws-sdk/middleware-logger": "3.347.0", + "@aws-sdk/middleware-recursion-detection": "3.347.0", + "@aws-sdk/middleware-retry": "3.347.0", + "@aws-sdk/middleware-sdk-sts": "3.347.0", + "@aws-sdk/middleware-serde": "3.347.0", + "@aws-sdk/middleware-signing": "3.347.0", + "@aws-sdk/middleware-stack": "3.347.0", + "@aws-sdk/middleware-user-agent": "3.347.0", + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/node-http-handler": "3.348.0", + "@aws-sdk/smithy-client": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/util-base64": "3.310.0", + "@aws-sdk/util-body-length-browser": "3.310.0", + "@aws-sdk/util-body-length-node": "3.310.0", + "@aws-sdk/util-defaults-mode-browser": "3.347.0", + "@aws-sdk/util-defaults-mode-node": "3.347.0", + "@aws-sdk/util-endpoints": "3.347.0", + "@aws-sdk/util-retry": "3.347.0", + "@aws-sdk/util-user-agent-browser": "3.347.0", + "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-utf8": "3.310.0", + "@smithy/protocol-http": "^1.0.1", + "@smithy/types": "^1.0.0", + "fast-xml-parser": "4.2.4", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/config-resolver": { - "version": "3.234.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/config-resolver/-/config-resolver-3.234.0.tgz", - "integrity": "sha512-uZxy4wzllfvgCQxVc+Iqhde0NGAnfmV2hWR6ejadJaAFTuYNvQiRg9IqJy3pkyDPqXySiJ8Bom5PoJfgn55J/A==", - "requires": { - "@aws-sdk/signature-v4": "3.226.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-config-provider": "3.208.0", - "@aws-sdk/util-middleware": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/config-resolver/-/config-resolver-3.347.0.tgz", + "integrity": "sha512-2ja+Sf/VnUO7IQ3nKbDQ5aumYKKJUaTm/BuVJ29wNho8wYHfuf7wHZV0pDTkB8RF5SH7IpHap7zpZAj39Iq+EA==", + "requires": { + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-config-provider": "3.310.0", + "@aws-sdk/util-middleware": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-cognito-identity": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-cognito-identity/-/credential-provider-cognito-identity-3.241.0.tgz", - "integrity": "sha512-e2hlXWG9DH93uVe2wHIUrUOrgZTLzCV3gBd10D3/usSzS4FvVVU7OmidnRPYCLLnt3EvnL5b4REOedO1q8hv8g==", + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-cognito-identity/-/credential-provider-cognito-identity-3.348.0.tgz", + "integrity": "sha512-VQQVEP844mAwn5iEIzc/hBOuSzMGBL61sqEGqqgxhe6Sjnd8NfGNlOjV6fOxlUHhOelumqBMXgn6liIZbfcqFQ==", "requires": { - "@aws-sdk/client-cognito-identity": "3.241.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/client-cognito-identity": "3.348.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-env": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.226.0.tgz", - "integrity": "sha512-sd8uK1ojbXxaZXlthzw/VXZwCPUtU3PjObOfr3Evj7MPIM2IH8h29foOlggx939MdLQGboJf9gKvLlvKDWtJRA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.347.0.tgz", + "integrity": "sha512-UnEM+LKGpXKzw/1WvYEQsC6Wj9PupYZdQOE+e2Dgy2dqk/pVFy4WueRtFXYDT2B41ppv3drdXUuKZRIDVqIgNQ==", "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-imds": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-imds/-/credential-provider-imds-3.226.0.tgz", - "integrity": "sha512-//z/COQm2AjYFI1Lb0wKHTQSrvLFTyuKLFQGPJsKS7DPoxGOCKB7hmYerlbl01IDoCxTdyL//TyyPxbZEOQD5Q==", - "requires": { - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-imds/-/credential-provider-imds-3.347.0.tgz", + "integrity": "sha512-7scCy/DCDRLIhlqTxff97LQWDnRwRXji3bxxMg+xWOTTaJe7PWx+etGSbBWaL42vsBHFShQjSLvJryEgoBktpw==", + "requires": { + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-ini": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.241.0.tgz", - "integrity": "sha512-CI+mu6h74Kzmscw35TvNkc/wYHsHPGAwP7humSHoWw53H9mVw21Ggft/dT1iFQQZWQ8BNXkzuXlNo1IlqwMgOA==", - "requires": { - "@aws-sdk/credential-provider-env": "3.226.0", - "@aws-sdk/credential-provider-imds": "3.226.0", - "@aws-sdk/credential-provider-process": "3.226.0", - "@aws-sdk/credential-provider-sso": "3.241.0", - "@aws-sdk/credential-provider-web-identity": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.348.0.tgz", + "integrity": "sha512-0IEH5mH/cz2iLyr/+pSa3sCsQcGADiLSEn6yivsXdfz1zDqBiv+ffDoL0+Pvnp+TKf8sA6OlX8PgoMoEBvBdKw==", + "requires": { + "@aws-sdk/credential-provider-env": "3.347.0", + "@aws-sdk/credential-provider-imds": "3.347.0", + "@aws-sdk/credential-provider-process": "3.347.0", + "@aws-sdk/credential-provider-sso": "3.348.0", + "@aws-sdk/credential-provider-web-identity": "3.347.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-node": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.241.0.tgz", - "integrity": "sha512-08zPQcD5o9brQmzEipWHeHgU85aQcEF8MWLfpeyjO6e1/l7ysQ35NsS+PYtv77nLpGCx/X+ZuW/KXWoRrbw77w==", - "requires": { - "@aws-sdk/credential-provider-env": "3.226.0", - "@aws-sdk/credential-provider-imds": "3.226.0", - "@aws-sdk/credential-provider-ini": "3.241.0", - "@aws-sdk/credential-provider-process": "3.226.0", - "@aws-sdk/credential-provider-sso": "3.241.0", - "@aws-sdk/credential-provider-web-identity": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.348.0.tgz", + "integrity": "sha512-ngRWphm9e36i58KqVi7Z8WOub+k0cSl+JZaAmgfFm0+dsfBG5uheo598OeiwWV0DqlilvaQZFaMVQgG2SX/tHg==", + "requires": { + "@aws-sdk/credential-provider-env": "3.347.0", + "@aws-sdk/credential-provider-imds": "3.347.0", + "@aws-sdk/credential-provider-ini": "3.348.0", + "@aws-sdk/credential-provider-process": "3.347.0", + "@aws-sdk/credential-provider-sso": "3.348.0", + "@aws-sdk/credential-provider-web-identity": "3.347.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-process": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.226.0.tgz", - "integrity": "sha512-iUDMdnrTvbvaCFhWwqyXrhvQ9+ojPqPqXhwZtY1X/Qaz+73S9gXBPJHZaZb2Ke0yKE1Ql3bJbKvmmxC/qLQMng==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.347.0.tgz", + "integrity": "sha512-yl1z4MsaBdXd4GQ2halIvYds23S67kElyOwz7g8kaQ4kHj+UoYWxz3JVW/DGusM6XmQ9/F67utBrUVA0uhQYyw==", "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-sso": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.241.0.tgz", - "integrity": "sha512-6Bjd6eEIrVomRTrPrM4dlxusQm+KMJ9hLYKECCpFkwDKIK+pTgZNLRtQdalHyzwneHJPdimrm8cOv1kUQ8hPoA==", - "requires": { - "@aws-sdk/client-sso": "3.241.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/token-providers": "3.241.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.348.0.tgz", + "integrity": "sha512-5cQao705376KgGkLv9xgkQ3T5H7KdNddWuyoH2wDcrHd1BA2Lnrell3Yyh7R6jQeV7uCQE/z0ugUOKhDqNKIqQ==", + "requires": { + "@aws-sdk/client-sso": "3.348.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/token-providers": "3.348.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-provider-web-identity": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.226.0.tgz", - "integrity": "sha512-CCpv847rLB0SFOHz2igvUMFAzeT2fD3YnY4C8jltuJoEkn0ITn1Hlgt13nTJ5BUuvyti2mvyXZHmNzhMIMrIlw==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.347.0.tgz", + "integrity": "sha512-DxoTlVK8lXjS1zVphtz/Ab+jkN/IZor9d6pP2GjJHNoAIIzXfRwwj5C8vr4eTayx/5VJ7GRP91J8GJ2cKly8Qw==", "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/credential-providers": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.241.0.tgz", - "integrity": "sha512-J3Q45t1o35OhUI6gWks7rmosPT+mFWXiaHl2LST509Ovjwx6SFs2PvbGP6n7xqUzxyq5Rk6FzZBwB8ItuAa6Qw==", - "requires": { - "@aws-sdk/client-cognito-identity": "3.241.0", - "@aws-sdk/client-sso": "3.241.0", - "@aws-sdk/client-sts": "3.241.0", - "@aws-sdk/credential-provider-cognito-identity": "3.241.0", - "@aws-sdk/credential-provider-env": "3.226.0", - "@aws-sdk/credential-provider-imds": "3.226.0", - "@aws-sdk/credential-provider-ini": "3.241.0", - "@aws-sdk/credential-provider-node": "3.241.0", - "@aws-sdk/credential-provider-process": "3.226.0", - "@aws-sdk/credential-provider-sso": "3.241.0", - "@aws-sdk/credential-provider-web-identity": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.348.0.tgz", + "integrity": "sha512-lpq1aHjFyExqD/6L8BK0OaROpCJuhnexGrABYljGI6yaLsyHbQpdE2+Y/WaxuRAK9wyP5s+7KNJ1ZK1ktrk5uQ==", + "requires": { + "@aws-sdk/client-cognito-identity": "3.348.0", + "@aws-sdk/client-sso": "3.348.0", + "@aws-sdk/client-sts": "3.348.0", + "@aws-sdk/credential-provider-cognito-identity": "3.348.0", + "@aws-sdk/credential-provider-env": "3.347.0", + "@aws-sdk/credential-provider-imds": "3.347.0", + "@aws-sdk/credential-provider-ini": "3.348.0", + "@aws-sdk/credential-provider-node": "3.348.0", + "@aws-sdk/credential-provider-process": "3.347.0", + "@aws-sdk/credential-provider-sso": "3.348.0", + "@aws-sdk/credential-provider-web-identity": "3.347.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" + } + } + }, + "@aws-sdk/eventstream-codec": { + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/eventstream-codec/-/eventstream-codec-3.347.0.tgz", + "integrity": "sha512-61q+SyspjsaQ4sdgjizMyRgVph2CiW4aAtfpoH69EJFJfTxTR/OqnZ9Jx/3YiYi0ksrvDenJddYodfWWJqD8/w==", + "requires": { + "@aws-crypto/crc32": "3.0.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-hex-encoding": "3.310.0", + "tslib": "^2.5.0" + }, + "dependencies": { + "tslib": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/fetch-http-handler": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/fetch-http-handler/-/fetch-http-handler-3.226.0.tgz", - "integrity": "sha512-JewZPMNEBXfi1xVnRa7pVtK/zgZD8/lQ/YnD8pq79WuMa2cwyhDtr8oqCoqsPW+WJT5ScXoMtuHxN78l8eKWgg==", - "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/querystring-builder": "3.226.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-base64": "3.208.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/fetch-http-handler/-/fetch-http-handler-3.347.0.tgz", + "integrity": "sha512-sQ5P7ivY8//7wdxfA76LT1sF6V2Tyyz1qF6xXf9sihPN5Q1Y65c+SKpMzXyFSPqWZ82+SQQuDliYZouVyS6kQQ==", + "requires": { + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/querystring-builder": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-base64": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/hash-node": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/hash-node/-/hash-node-3.226.0.tgz", - "integrity": "sha512-MdlJhJ9/Espwd0+gUXdZRsHuostB2WxEVAszWxobP0FTT9PnicqnfK7ExmW+DUAc0ywxtEbR3e0UND65rlSTVw==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/hash-node/-/hash-node-3.347.0.tgz", + "integrity": "sha512-96+ml/4EaUaVpzBdOLGOxdoXOjkPgkoJp/0i1fxOJEvl8wdAQSwc3IugVK9wZkCxy2DlENtgOe6DfIOhfffm/g==", "requires": { - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-buffer-from": "3.208.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-buffer-from": "3.310.0", + "@aws-sdk/util-utf8": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/invalid-dependency": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/invalid-dependency/-/invalid-dependency-3.226.0.tgz", - "integrity": "sha512-QXOYFmap8g9QzRjumcRCIo2GEZkdCwd7ePQW0OABWPhKHzlJ74vvBxywjU3s39EEBEluWXtZ7Iufg6GxZM4ifw==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/invalid-dependency/-/invalid-dependency-3.347.0.tgz", + "integrity": "sha512-8imQcwLwqZ/wTJXZqzXT9pGLIksTRckhGLZaXT60tiBOPKuerTsus2L59UstLs5LP8TKaVZKFFSsjRIn9dQdmQ==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/is-array-buffer": { - "version": "3.201.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/is-array-buffer/-/is-array-buffer-3.201.0.tgz", - "integrity": "sha512-UPez5qLh3dNgt0DYnPD/q0mVJY84rA17QE26hVNOW3fAji8W2wrwrxdacWOxyXvlxWsVRcKmr+lay1MDqpAMfg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/is-array-buffer/-/is-array-buffer-3.310.0.tgz", + "integrity": "sha512-urnbcCR+h9NWUnmOtet/s4ghvzsidFmspfhYaHAmSRdy9yDjdjBJMFjjsn85A1ODUktztm+cVncXjQ38WCMjMQ==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-content-length": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-content-length/-/middleware-content-length-3.226.0.tgz", - "integrity": "sha512-ksUzlHJN2JMuyavjA46a4sctvnrnITqt2tbGGWWrAuXY1mel2j+VbgnmJUiwHKUO6bTFBBeft5Vd1TSOb4JmiA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-content-length/-/middleware-content-length-3.347.0.tgz", + "integrity": "sha512-i4qtWTDImMaDUtwKQPbaZpXsReiwiBomM1cWymCU4bhz81HL01oIxOxOBuiM+3NlDoCSPr3KI6txZSz/8cqXCQ==", "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-endpoint": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-endpoint/-/middleware-endpoint-3.226.0.tgz", - "integrity": "sha512-EvLFafjtUxTT0AC9p3aBQu1/fjhWdIeK58jIXaNFONfZ3F8QbEYUPuF/SqZvJM6cWfOO9qwYKkRDbCSTYhprIg==", - "requires": { - "@aws-sdk/middleware-serde": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/signature-v4": "3.226.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/url-parser": "3.226.0", - "@aws-sdk/util-config-provider": "3.208.0", - "@aws-sdk/util-middleware": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-endpoint/-/middleware-endpoint-3.347.0.tgz", + "integrity": "sha512-unF0c6dMaUL1ffU+37Ugty43DgMnzPWXr/Jup/8GbK5fzzWT5NQq6dj9KHPubMbWeEjQbmczvhv25JuJdK8gNQ==", + "requires": { + "@aws-sdk/middleware-serde": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/util-middleware": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-host-header": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.226.0.tgz", - "integrity": "sha512-haVkWVh6BUPwKgWwkL6sDvTkcZWvJjv8AgC8jiQuSl8GLZdzHTB8Qhi3IsfFta9HAuoLjxheWBE5Z/L0UrfhLA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.347.0.tgz", + "integrity": "sha512-kpKmR9OvMlnReqp5sKcJkozbj1wmlblbVSbnQAIkzeQj2xD5dnVR3Nn2ogQKxSmU1Fv7dEroBtrruJ1o3fY38A==", "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-logger": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.226.0.tgz", - "integrity": "sha512-m9gtLrrYnpN6yckcQ09rV7ExWOLMuq8mMPF/K3DbL/YL0TuILu9i2T1W+JuxSX+K9FMG2HrLAKivE/kMLr55xA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.347.0.tgz", + "integrity": "sha512-NYC+Id5UCkVn+3P1t/YtmHt75uED06vwaKyxDy0UmB2K66PZLVtwWbLpVWrhbroaw1bvUHYcRyQ9NIfnVcXQjA==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-recursion-detection": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.226.0.tgz", - "integrity": "sha512-mwRbdKEUeuNH5TEkyZ5FWxp6bL2UC1WbY+LDv6YjHxmSMKpAoOueEdtU34PqDOLrpXXxIGHDFmjeGeMfktyEcA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.347.0.tgz", + "integrity": "sha512-qfnSvkFKCAMjMHR31NdsT0gv5Sq/ZHTUD4yQsSLpbVQ6iYAS834lrzXt41iyEHt57Y514uG7F/Xfvude3u4icQ==", "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-retry": { - "version": "3.235.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-retry/-/middleware-retry-3.235.0.tgz", - "integrity": "sha512-50WHbJGpD3SNp9763MAlHqIhXil++JdQbKejNpHg7HsJne/ao3ub+fDOfx//mMBjpzBV25BGd5UlfL6blrClSg==", - "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/service-error-classification": "3.229.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-middleware": "3.226.0", - "@aws-sdk/util-retry": "3.229.0", - "tslib": "^2.3.1", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-retry/-/middleware-retry-3.347.0.tgz", + "integrity": "sha512-CpdM+8dCSbX96agy4FCzOfzDmhNnGBM/pxrgIVLm5nkYTLuXp/d7ubpFEUHULr+4hCd5wakHotMt7yO29NFaVw==", + "requires": { + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/service-error-classification": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-middleware": "3.347.0", + "@aws-sdk/util-retry": "3.347.0", + "tslib": "^2.5.0", "uuid": "^8.3.2" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" }, "uuid": { "version": "8.3.2", @@ -686,573 +717,573 @@ } }, "@aws-sdk/middleware-sdk-sts": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-sts/-/middleware-sdk-sts-3.226.0.tgz", - "integrity": "sha512-NN9T/qoSD1kZvAT+VLny3NnlqgylYQcsgV3rvi/8lYzw/G/2s8VS6sm/VTWGGZhx08wZRv20MWzYu3bftcyqUg==", - "requires": { - "@aws-sdk/middleware-signing": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/signature-v4": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-sts/-/middleware-sdk-sts-3.347.0.tgz", + "integrity": "sha512-38LJ0bkIoVF3W97x6Jyyou72YV9Cfbml4OaDEdnrCOo0EssNZM5d7RhjMvQDwww7/3OBY/BzeOcZKfJlkYUXGw==", + "requires": { + "@aws-sdk/middleware-signing": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-serde": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-serde/-/middleware-serde-3.226.0.tgz", - "integrity": "sha512-nPuOOAkSfx9TxzdKFx0X2bDlinOxGrqD7iof926K/AEflxGD1DBdcaDdjlYlPDW2CVE8LV/rAgbYuLxh/E/1VA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-serde/-/middleware-serde-3.347.0.tgz", + "integrity": "sha512-x5Foi7jRbVJXDu9bHfyCbhYDH5pKK+31MmsSJ3k8rY8keXLBxm2XEEg/AIoV9/TUF9EeVvZ7F1/RmMpJnWQsEg==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-signing": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-signing/-/middleware-signing-3.226.0.tgz", - "integrity": "sha512-E6HmtPcl+IjYDDzi1xI2HpCbBq2avNWcjvCriMZWuTAtRVpnA6XDDGW5GY85IfS3A8G8vuWqEVPr8JcYUcjfew==", - "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/signature-v4": "3.226.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-middleware": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-signing/-/middleware-signing-3.347.0.tgz", + "integrity": "sha512-zVBF/4MGKnvhAE/J+oAL/VAehiyv+trs2dqSQXwHou9j8eA8Vm8HS2NdOwpkZQchIxTuwFlqSusDuPEdYFbvGw==", + "requires": { + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/signature-v4": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-middleware": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-stack": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-stack/-/middleware-stack-3.226.0.tgz", - "integrity": "sha512-85wF29LvPvpoed60fZGDYLwv1Zpd/cM0C22WSSFPw1SSJeqO4gtFYyCg2squfT3KI6kF43IIkOCJ+L7GtryPug==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-stack/-/middleware-stack-3.347.0.tgz", + "integrity": "sha512-Izidg4rqtYMcKuvn2UzgEpPLSmyd8ub9+LQ2oIzG3mpIzCBITq7wp40jN1iNkMg+X6KEnX9vdMJIYZsPYMCYuQ==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/middleware-user-agent": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.226.0.tgz", - "integrity": "sha512-N1WnfzCW1Y5yWhVAphf8OPGTe8Df3vmV7/LdsoQfmpkCZgLZeK2o0xITkUQhRj1mbw7yp8tVFLFV3R2lMurdAQ==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.347.0.tgz", + "integrity": "sha512-wJbGN3OE1/daVCrwk49whhIr9E0j1N4gWwN/wi4WuyYIA+5lMUfVp0aGIOvZR+878DxuFz2hQ4XcZVT4K2WvQw==", "requires": { - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-endpoints": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/node-config-provider": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/node-config-provider/-/node-config-provider-3.226.0.tgz", - "integrity": "sha512-B8lQDqiRk7X5izFEUMXmi8CZLOKCTWQJU9HQf3ako+sF0gexo4nHN3jhoRWyLtcgC5S3on/2jxpAcqtm7kuY3w==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/node-config-provider/-/node-config-provider-3.347.0.tgz", + "integrity": "sha512-faU93d3+5uTTUcotGgMXF+sJVFjrKh+ufW+CzYKT4yUHammyaIab/IbTPWy2hIolcEGtuPeVoxXw8TXbkh/tuw==", "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/node-http-handler": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/node-http-handler/-/node-http-handler-3.226.0.tgz", - "integrity": "sha512-xQCddnZNMiPmjr3W7HYM+f5ir4VfxgJh37eqZwX6EZmyItFpNNeVzKUgA920ka1VPz/ZUYB+2OFGiX3LCLkkaA==", - "requires": { - "@aws-sdk/abort-controller": "3.226.0", - "@aws-sdk/protocol-http": "3.226.0", - "@aws-sdk/querystring-builder": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/node-http-handler/-/node-http-handler-3.348.0.tgz", + "integrity": "sha512-wxdgc4tO5F6lN4wHr0CZ4TyIjDW/ORp4SJZdWYNs2L5J7+/SwqgJY2lxRlGi0i7Md+apAdE3sT3ukVQ/9pVfPg==", + "requires": { + "@aws-sdk/abort-controller": "3.347.0", + "@aws-sdk/protocol-http": "3.347.0", + "@aws-sdk/querystring-builder": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/property-provider": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/property-provider/-/property-provider-3.226.0.tgz", - "integrity": "sha512-TsljjG+Sg0LmdgfiAlWohluWKnxB/k8xenjeozZfzOr5bHmNHtdbWv6BtNvD/R83hw7SFXxbJHlD5H4u9p2NFg==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/property-provider/-/property-provider-3.347.0.tgz", + "integrity": "sha512-t3nJ8CYPLKAF2v9nIHOHOlF0CviQbTvbFc2L4a+A+EVd/rM4PzL3+3n8ZJsr0h7f6uD04+b5YRFgKgnaqLXlEg==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/protocol-http": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/protocol-http/-/protocol-http-3.226.0.tgz", - "integrity": "sha512-zWkVqiTA9RXL6y0hhfZc9bcU4DX2NI6Hw9IhQmSPeM59mdbPjJlY4bLlMr5YxywqO3yQ/ylNoAfrEzrDjlOSRg==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/protocol-http/-/protocol-http-3.347.0.tgz", + "integrity": "sha512-2YdBhc02Wvy03YjhGwUxF0UQgrPWEy8Iq75pfS42N+/0B/+eWX1aQgfjFxIpLg7YSjT5eKtYOQGlYd4MFTgj9g==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/querystring-builder": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-builder/-/querystring-builder-3.226.0.tgz", - "integrity": "sha512-LVurypuNeotO4lmirKXRC4NYrZRAyMJXuwO0f2a5ZAUJCjauwYrifKue6yCfU7bls7gut7nfcR6B99WBYpHs3g==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-builder/-/querystring-builder-3.347.0.tgz", + "integrity": "sha512-phtKTe6FXoV02MoPkIVV6owXI8Mwr5IBN3bPoxhcPvJG2AjEmnetSIrhb8kwc4oNhlwfZwH6Jo5ARW/VEWbZtg==", "requires": { - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-uri-escape": "3.201.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-uri-escape": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/querystring-parser": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-parser/-/querystring-parser-3.226.0.tgz", - "integrity": "sha512-FzB+VrQ47KAFxiPt2YXrKZ8AOLZQqGTLCKHzx4bjxGmwgsjV8yIbtJiJhZLMcUQV4LtGeIY9ixIqQhGvnZHE4A==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-parser/-/querystring-parser-3.347.0.tgz", + "integrity": "sha512-5VXOhfZz78T2W7SuXf2avfjKglx1VZgZgp9Zfhrt/Rq+MTu2D+PZc5zmJHhYigD7x83jLSLogpuInQpFMA9LgA==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/service-error-classification": { - "version": "3.229.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/service-error-classification/-/service-error-classification-3.229.0.tgz", - "integrity": "sha512-dnzWWQ0/NoWMUZ5C0DW3dPm0wC1O76Y/SpKbuJzWPkx1EYy6r8p32Ly4D9vUzrKDbRGf48YHIF2kOkBmu21CLg==" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/service-error-classification/-/service-error-classification-3.347.0.tgz", + "integrity": "sha512-xZ3MqSY81Oy2gh5g0fCtooAbahqh9VhsF8vcKjVX8+XPbGC8y+kej82+MsMg4gYL8gRFB9u4hgYbNgIS6JTAvg==" }, "@aws-sdk/shared-ini-file-loader": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/shared-ini-file-loader/-/shared-ini-file-loader-3.226.0.tgz", - "integrity": "sha512-661VQefsARxVyyV2FX9V61V+nNgImk7aN2hYlFKla6BCwZfMng+dEtD0xVGyg1PfRw0qvEv5LQyxMVgHcUSevA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/shared-ini-file-loader/-/shared-ini-file-loader-3.347.0.tgz", + "integrity": "sha512-Xw+zAZQVLb+xMNHChXQ29tzzLqm3AEHsD8JJnlkeFjeMnWQtXdUfOARl5s8NzAppcKQNlVe2gPzjaKjoy2jz1Q==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/signature-v4": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4/-/signature-v4-3.226.0.tgz", - "integrity": "sha512-/R5q5agdPd7HJB68XMzpxrNPk158EHUvkFkuRu5Qf3kkkHebEzWEBlWoVpUe6ss4rP9Tqcue6xPuaftEmhjpYw==", - "requires": { - "@aws-sdk/is-array-buffer": "3.201.0", - "@aws-sdk/types": "3.226.0", - "@aws-sdk/util-hex-encoding": "3.201.0", - "@aws-sdk/util-middleware": "3.226.0", - "@aws-sdk/util-uri-escape": "3.201.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4/-/signature-v4-3.347.0.tgz", + "integrity": "sha512-58Uq1do+VsTHYkP11dTK+DF53fguoNNJL9rHRWhzP+OcYv3/mBMLoS2WPz/x9FO5mBg4ESFsug0I6mXbd36tjw==", + "requires": { + "@aws-sdk/eventstream-codec": "3.347.0", + "@aws-sdk/is-array-buffer": "3.310.0", + "@aws-sdk/types": "3.347.0", + "@aws-sdk/util-hex-encoding": "3.310.0", + "@aws-sdk/util-middleware": "3.347.0", + "@aws-sdk/util-uri-escape": "3.310.0", + "@aws-sdk/util-utf8": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/smithy-client": { - "version": "3.234.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/smithy-client/-/smithy-client-3.234.0.tgz", - "integrity": "sha512-8AtR/k4vsFvjXeQbIzq/Wy7Nbk48Ou0wUEeVYPHWHPSU8QamFWORkOwmKtKMfHAyZvmqiAPeQqHFkq+UJhWyyQ==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/smithy-client/-/smithy-client-3.347.0.tgz", + "integrity": "sha512-PaGTDsJLGK0sTjA6YdYQzILRlPRN3uVFyqeBUkfltXssvUzkm8z2t1lz2H4VyJLAhwnG5ZuZTNEV/2mcWrU7JQ==", "requires": { - "@aws-sdk/middleware-stack": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/middleware-stack": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/token-providers": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.241.0.tgz", - "integrity": "sha512-79okvuOS7V559OIL/RalIPG98wzmWxeFOChFnbEjn2pKOyGQ6FJRwLPYZaVRtNdAtnkBNgRpmFq9dX843QxhtQ==", - "requires": { - "@aws-sdk/client-sso-oidc": "3.241.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/shared-ini-file-loader": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.348.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.348.0.tgz", + "integrity": "sha512-nTjoJkUsJUrJTZuqaeMD9PW2//Rdg2HgfDjiyC4jmAXtayWYCi11mqauurMaUHJ3p5qJ8f5xzxm6vBTbrftPag==", + "requires": { + "@aws-sdk/client-sso-oidc": "3.348.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/shared-ini-file-loader": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/types": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.226.0.tgz", - "integrity": "sha512-MmmNHrWeO4man7wpOwrAhXlevqtOV9ZLcH4RhnG5LmRce0RFOApx24HoKENfFCcOyCm5LQBlsXCqi0dZWDWU0A==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.347.0.tgz", + "integrity": "sha512-GkCMy79mdjU9OTIe5KT58fI/6uqdf8UmMdWqVHmFJ+UpEzOci7L/uw4sOXWo7xpPzLs6cJ7s5ouGZW4GRPmHFA==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/url-parser": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/url-parser/-/url-parser-3.226.0.tgz", - "integrity": "sha512-p5RLE0QWyP0OcTOLmFcLdVgUcUEzmEfmdrnOxyNzomcYb0p3vUagA5zfa1HVK2azsQJFBv28GfvMnba9bGhObg==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/url-parser/-/url-parser-3.347.0.tgz", + "integrity": "sha512-lhrnVjxdV7hl+yCnJfDZOaVLSqKjxN20MIOiijRiqaWGLGEAiSqBreMhL89X1WKCifxAs4zZf9YB9SbdziRpAA==", "requires": { - "@aws-sdk/querystring-parser": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/querystring-parser": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-base64": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-base64/-/util-base64-3.208.0.tgz", - "integrity": "sha512-PQniZph5A6N7uuEOQi+1hnMz/FSOK/8kMFyFO+4DgA1dZ5pcKcn5wiFwHkcTb/BsgVqQa3Jx0VHNnvhlS8JyTg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-base64/-/util-base64-3.310.0.tgz", + "integrity": "sha512-v3+HBKQvqgdzcbL+pFswlx5HQsd9L6ZTlyPVL2LS9nNXnCcR3XgGz9jRskikRUuUvUXtkSG1J88GAOnJ/apTPg==", "requires": { - "@aws-sdk/util-buffer-from": "3.208.0", - "tslib": "^2.3.1" + "@aws-sdk/util-buffer-from": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-body-length-browser": { - "version": "3.188.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-body-length-browser/-/util-body-length-browser-3.188.0.tgz", - "integrity": "sha512-8VpnwFWXhnZ/iRSl9mTf+VKOX9wDE8QtN4bj9pBfxwf90H1X7E8T6NkiZD3k+HubYf2J94e7DbeHs7fuCPW5Qg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-body-length-browser/-/util-body-length-browser-3.310.0.tgz", + "integrity": "sha512-sxsC3lPBGfpHtNTUoGXMQXLwjmR0zVpx0rSvzTPAuoVILVsp5AU/w5FphNPxD5OVIjNbZv9KsKTuvNTiZjDp9g==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-body-length-node": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-body-length-node/-/util-body-length-node-3.208.0.tgz", - "integrity": "sha512-3zj50e5g7t/MQf53SsuuSf0hEELzMtD8RX8C76f12OSRo2Bca4FLLYHe0TZbxcfQHom8/hOaeZEyTyMogMglqg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-body-length-node/-/util-body-length-node-3.310.0.tgz", + "integrity": "sha512-2tqGXdyKhyA6w4zz7UPoS8Ip+7sayOg9BwHNidiGm2ikbDxm1YrCfYXvCBdwaJxa4hJfRVz+aL9e+d3GqPI9pQ==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-buffer-from": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-buffer-from/-/util-buffer-from-3.208.0.tgz", - "integrity": "sha512-7L0XUixNEFcLUGPeBF35enCvB9Xl+K6SQsmbrPk1P3mlV9mguWSDQqbOBwY1Ir0OVbD6H/ZOQU7hI/9RtRI0Zw==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-buffer-from/-/util-buffer-from-3.310.0.tgz", + "integrity": "sha512-i6LVeXFtGih5Zs8enLrt+ExXY92QV25jtEnTKHsmlFqFAuL3VBeod6boeMXkN2p9lbSVVQ1sAOOYZOHYbYkntw==", "requires": { - "@aws-sdk/is-array-buffer": "3.201.0", - "tslib": "^2.3.1" + "@aws-sdk/is-array-buffer": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-config-provider": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-config-provider/-/util-config-provider-3.208.0.tgz", - "integrity": "sha512-DSRqwrERUsT34ug+anlMBIFooBEGwM8GejC7q00Y/9IPrQy50KnG5PW2NiTjuLKNi7pdEOlwTSEocJE15eDZIg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-config-provider/-/util-config-provider-3.310.0.tgz", + "integrity": "sha512-xIBaYo8dwiojCw8vnUcIL4Z5tyfb1v3yjqyJKJWV/dqKUFOOS0U591plmXbM+M/QkXyML3ypon1f8+BoaDExrg==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-defaults-mode-browser": { - "version": "3.234.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-browser/-/util-defaults-mode-browser-3.234.0.tgz", - "integrity": "sha512-IHMKXjTbOD8XMz5+2oCOsVP94BYb9YyjXdns0aAXr2NAo7k2+RCzXQ2DebJXppGda1F6opFutoKwyVSN0cmbMw==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-browser/-/util-defaults-mode-browser-3.347.0.tgz", + "integrity": "sha512-+JHFA4reWnW/nMWwrLKqL2Lm/biw/Dzi/Ix54DAkRZ08C462jMKVnUlzAI+TfxQE3YLm99EIa0G7jiEA+p81Qw==", "requires": { - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", "bowser": "^2.11.0", - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-defaults-mode-node": { - "version": "3.234.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-node/-/util-defaults-mode-node-3.234.0.tgz", - "integrity": "sha512-UGjQ+OjBYYhxFVtUY+jtr0ZZgzZh6OHtYwRhFt8IHewJXFCfZTyfsbX20szBj5y1S4HRIUJ7cwBLIytTqMbI5w==", - "requires": { - "@aws-sdk/config-resolver": "3.234.0", - "@aws-sdk/credential-provider-imds": "3.226.0", - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/property-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-node/-/util-defaults-mode-node-3.347.0.tgz", + "integrity": "sha512-A8BzIVhAAZE5WEukoAN2kYebzTc99ZgncbwOmgCCbvdaYlk5tzguR/s+uoT4G0JgQGol/4hAMuJEl7elNgU6RQ==", + "requires": { + "@aws-sdk/config-resolver": "3.347.0", + "@aws-sdk/credential-provider-imds": "3.347.0", + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/property-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-endpoints": { - "version": "3.241.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.241.0.tgz", - "integrity": "sha512-jVf8bKrN22Ey0xLmj75sL7EUvm5HFpuOMkXsZkuXycKhCwLBcEUWlvtJYtRjOU1zScPQv9GMJd2QXQglp34iOQ==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.347.0.tgz", + "integrity": "sha512-/WUkirizeNAqwVj0zkcrqdQ9pUm1HY5kU+qy7xTR0OebkuJauglkmSTMD+56L1JPunWqHhlwCMVRaz5eaJdSEQ==", "requires": { - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-hex-encoding": { - "version": "3.201.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-hex-encoding/-/util-hex-encoding-3.201.0.tgz", - "integrity": "sha512-7t1vR1pVxKx0motd3X9rI3m/xNp78p3sHtP5yo4NP4ARpxyJ0fokBomY8ScaH2D/B+U5o9ARxldJUdMqyBlJcA==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-hex-encoding/-/util-hex-encoding-3.310.0.tgz", + "integrity": "sha512-sVN7mcCCDSJ67pI1ZMtk84SKGqyix6/0A1Ab163YKn+lFBQRMKexleZzpYzNGxYzmQS6VanP/cfU7NiLQOaSfA==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-locate-window": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.208.0.tgz", - "integrity": "sha512-iua1A2+P7JJEDHVgvXrRJSvsnzG7stYSGQnBVphIUlemwl6nN5D+QrgbjECtrbxRz8asYFHSzhdhECqN+tFiBg==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.310.0.tgz", + "integrity": "sha512-qo2t/vBTnoXpjKxlsC2e1gBrRm80M3bId27r0BRB2VniSSe7bL1mmzM+/HFtujm0iAxtPM+aLEflLJlJeDPg0w==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-middleware": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-middleware/-/util-middleware-3.226.0.tgz", - "integrity": "sha512-B96CQnwX4gRvQdaQkdUtqvDPkrptV5+va6FVeJOocU/DbSYMAScLxtR3peMS8cnlOT6nL1Eoa42OI9AfZz1VwQ==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-middleware/-/util-middleware-3.347.0.tgz", + "integrity": "sha512-8owqUA3ePufeYTUvlzdJ7Z0miLorTwx+rNol5lourGQZ9JXsVMo23+yGA7nOlFuXSGkoKpMOtn6S0BT2bcfeiw==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-retry": { - "version": "3.229.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-retry/-/util-retry-3.229.0.tgz", - "integrity": "sha512-0zKTqi0P1inD0LzIMuXRIYYQ/8c1lWMg/cfiqUcIAF1TpatlpZuN7umU0ierpBFud7S+zDgg0oemh+Nj8xliJw==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-retry/-/util-retry-3.347.0.tgz", + "integrity": "sha512-NxnQA0/FHFxriQAeEgBonA43Q9/VPFQa8cfJDuT2A1YZruMasgjcltoZszi1dvoIRWSZsFTW42eY2gdOd0nffQ==", "requires": { - "@aws-sdk/service-error-classification": "3.229.0", - "tslib": "^2.3.1" + "@aws-sdk/service-error-classification": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-uri-escape": { - "version": "3.201.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-uri-escape/-/util-uri-escape-3.201.0.tgz", - "integrity": "sha512-TeTWbGx4LU2c5rx0obHeDFeO9HvwYwQtMh1yniBz00pQb6Qt6YVOETVQikRZ+XRQwEyCg/dA375UplIpiy54mA==", + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-uri-escape/-/util-uri-escape-3.310.0.tgz", + "integrity": "sha512-drzt+aB2qo2LgtDoiy/3sVG8w63cgLkqFIa2NFlGpUgHFWTXkqtbgf4L5QdjRGKWhmZsnqkbtL7vkSWEcYDJ4Q==", "requires": { - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-user-agent-browser": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.226.0.tgz", - "integrity": "sha512-PhBIu2h6sPJPcv2I7ELfFizdl5pNiL4LfxrasMCYXQkJvVnoXztHA1x+CQbXIdtZOIlpjC+6BjDcE0uhnpvfcA==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.347.0.tgz", + "integrity": "sha512-ydxtsKVtQefgbk1Dku1q7pMkjDYThauG9/8mQkZUAVik55OUZw71Zzr3XO8J8RKvQG8lmhPXuAQ0FKAyycc0RA==", "requires": { - "@aws-sdk/types": "3.226.0", + "@aws-sdk/types": "3.347.0", "bowser": "^2.11.0", - "tslib": "^2.3.1" + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, "@aws-sdk/util-user-agent-node": { - "version": "3.226.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.226.0.tgz", - "integrity": "sha512-othPc5Dz/pkYkxH+nZPhc1Al0HndQT8zHD4e9h+EZ+8lkd8n+IsnLfTS/mSJWrfiC6UlNRVw55cItstmJyMe/A==", + "version": "3.347.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.347.0.tgz", + "integrity": "sha512-6X0b9qGsbD1s80PmbaB6v1/ZtLfSx6fjRX8caM7NN0y/ObuLoX8LhYnW6WlB2f1+xb4EjaCNgpP/zCf98MXosw==", "requires": { - "@aws-sdk/node-config-provider": "3.226.0", - "@aws-sdk/types": "3.226.0", - "tslib": "^2.3.1" + "@aws-sdk/node-config-provider": "3.347.0", + "@aws-sdk/types": "3.347.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, - "@aws-sdk/util-utf8-browser": { - "version": "3.188.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8-browser/-/util-utf8-browser-3.188.0.tgz", - "integrity": "sha512-jt627x0+jE+Ydr9NwkFstg3cUvgWh56qdaqAMDsqgRlKD21md/6G226z/Qxl7lb1VEW2LlmCx43ai/37Qwcj2Q==", + "@aws-sdk/util-utf8": { + "version": "3.310.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8/-/util-utf8-3.310.0.tgz", + "integrity": "sha512-DnLfFT8uCO22uOJc0pt0DsSNau1GTisngBCDw8jQuWT5CqogMJu4b/uXmwEqfj8B3GX6Xsz8zOd6JpRlPftQoA==", "requires": { - "tslib": "^2.3.1" + "@aws-sdk/util-buffer-from": "3.310.0", + "tslib": "^2.5.0" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, - "@aws-sdk/util-utf8-node": { - "version": "3.208.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8-node/-/util-utf8-node-3.208.0.tgz", - "integrity": "sha512-jKY87Acv0yWBdFxx6bveagy5FYjz+dtV8IPT7ay1E2WPWH1czoIdMAkc8tSInK31T6CRnHWkLZ1qYwCbgRfERQ==", + "@aws-sdk/util-utf8-browser": { + "version": "3.259.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8-browser/-/util-utf8-browser-3.259.0.tgz", + "integrity": "sha512-UvFa/vR+e19XookZF8RzFZBrw2EUkQWxiBW0yYQAhvk3C+QVGl0H3ouca8LDBlBfQKXwmW3huo/59H8rwb1wJw==", "requires": { - "@aws-sdk/util-buffer-from": "3.208.0", "tslib": "^2.3.1" }, "dependencies": { "tslib": { - "version": "2.4.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.1.tgz", - "integrity": "sha512-tGyy4dAjRIEwI7BzsB0lynWgOpfqjUdq91XXAlIWD2OwKBH7oCl/GZG/HT4BOHrTlPMOASlMQ7veyTqpmRcrNA==" + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" } } }, @@ -2506,6 +2537,37 @@ "type-detect": "4.0.8" } }, + "@smithy/protocol-http": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-1.0.1.tgz", + "integrity": "sha512-9OrEn0WfOVtBNYJUjUAn9AOiJ4lzERCJJ/JeZs8E6yajTGxBaFRxUnNBHiNqoDJVg076hY36UmEnPx7xXrvUSg==", + "requires": { + "@smithy/types": "^1.0.0", + "tslib": "^2.5.0" + }, + "dependencies": { + "tslib": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" + } + } + }, + "@smithy/types": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-1.0.0.tgz", + "integrity": "sha512-kc1m5wPBHQCTixwuaOh9vnak/iJm21DrSf9UK6yDE5S3mQQ4u11pqAUiKWnlrZnYkeLfAI9UEHj9OaMT1v5Umg==", + "requires": { + "tslib": "^2.5.0" + }, + "dependencies": { + "tslib": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" + } + } + }, "@types/babel__core": { "version": "7.1.7", "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.1.7.tgz", @@ -4408,9 +4470,9 @@ "integrity": "sha512-R9bHCvweUxxwkDwhjav5vxpFvdPGlVngtqmx4pIZfSUhM/Q4NiIUHB456BAf+Q1Nwu3HEZYONtu+Rya+af4jiQ==" }, "fast-xml-parser": { - "version": "4.0.11", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.0.11.tgz", - "integrity": "sha512-4aUg3aNRR/WjQAcpceODG1C3x3lFANXRo8+1biqfieHmg9pyMt7qB4lQV/Ta6sJCTbA5vfD8fnA8S54JATiFUA==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.2.4.tgz", + "integrity": "sha512-fbfMDvgBNIdDJLdLOwacjFAPYt67tr31H9ZhWSm45CDAxvd0I6WTlSOUo7K2P/K5sA5JgMKG64PI3DMcaFdWpQ==", "requires": { "strnum": "^1.0.5" } diff --git a/frontend/server/package.json b/frontend/server/package.json index 7bac63c809..d36e6b2225 100644 --- a/frontend/server/package.json +++ b/frontend/server/package.json @@ -2,7 +2,7 @@ "description": "Frontend webserver package for Kubeflow Pipelines", "main": "server.js", "dependencies": { - "@aws-sdk/credential-providers": "^3.241.0", + "@aws-sdk/credential-providers": "^3.348.0", "@google-cloud/storage": "^2.5.0", "@kubernetes/client-node": "^0.8.2", "axios": ">=0.21.1", From e1708d3792daf9b8e41424f8b7c30052144ce7e6 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Thu, 29 Jun 2023 08:30:03 -0700 Subject: [PATCH 007/253] chore(components): Add automl directory under preview PiperOrigin-RevId: 544364157 --- .../preview/automl/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/__init__.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/__init__.py new file mode 100644 index 0000000000..aa8704bef8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From 7f23bfc88813e54927e46f1b019ea86c0f03ce70 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Thu, 29 Jun 2023 10:09:49 -0700 Subject: [PATCH 008/253] fix(components): Move model eval version.py to _implementation folder PiperOrigin-RevId: 544390055 --- .../model_evaluation/data_sampler/component.py | 4 ++-- .../model_evaluation/dataset_preprocessor/component.py | 4 ++-- .../model_evaluation/evaluated_annotation/component.py | 4 ++-- .../model_evaluation/target_field_data_remover/component.py | 4 ++-- .../{v1 => _implementation}/model_evaluation/version.py | 0 .../preview/model_evaluation/data_bias_component.py | 4 ++-- .../preview/model_evaluation/feature_attribution_component.py | 4 ++-- .../preview/model_evaluation/model_bias_component.py | 4 ++-- .../v1/model_evaluation/classification_component.py | 4 ++-- .../v1/model_evaluation/forecasting_component.py | 4 ++-- .../v1/model_evaluation/regression_component.py | 4 ++-- 11 files changed, 20 insertions(+), 20 deletions(-) rename components/google-cloud/google_cloud_pipeline_components/{v1 => _implementation}/model_evaluation/version.py (100%) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py index 0ec8555d56..7634caa091 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import ContainerSpec from kfp.dsl import OutputPath @@ -86,7 +86,7 @@ def evaluation_data_sampler( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py index a4a47e28f5..6d15b53003 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import VertexDataset -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp.dsl import container_component from kfp.dsl import ContainerSpec from kfp.dsl import IfPresentPlaceholder @@ -107,7 +107,7 @@ def dataset_preprocessor_error_analysis( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=['python3', '/main.py'], args=[ '--task', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/evaluated_annotation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/evaluated_annotation/component.py index 8eccb13aa4..7dedf79c21 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/evaluated_annotation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/evaluated_annotation/component.py @@ -15,7 +15,7 @@ from typing import Optional -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp import dsl from kfp.dsl import Artifact from kfp.dsl import Input @@ -84,7 +84,7 @@ def evaluated_annotation( """ # fmt: on return dsl.ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=['python', '/main.py'], args=[ '--task', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py index cbfcfef14d..0673671a79 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import ContainerSpec from kfp.dsl import OutputPath @@ -89,7 +89,7 @@ def target_field_data_remover( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/version.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py similarity index 100% rename from components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/version.py rename to components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py index b2bcf8991a..9ddad08757 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import VertexDataset -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp.dsl import Artifact from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -100,7 +100,7 @@ def detect_data_bias( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py index d9e72f7fd3..4510ff8abf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -107,7 +107,7 @@ def feature_attribution( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py index 2295ee1480..513cc7363d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -105,7 +105,7 @@ def detect_model_bias( """ # fmt: on return ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py index 1c5f24ba35..9f865cbb7b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics from google_cloud_pipeline_components.types.artifact_types import VertexModel -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp import dsl from kfp.dsl import container_component @@ -172,7 +172,7 @@ def model_evaluation_classification( """ # fmt: on return dsl.ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py index 6a32a60586..66cfac91aa 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ForecastingMetrics from google_cloud_pipeline_components.types.artifact_types import VertexModel -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp import dsl from kfp.dsl import container_component @@ -137,7 +137,7 @@ def model_evaluation_forecasting( """ # fmt: off return dsl.ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py index 8cc52948bf..13a69ef64f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.types.artifact_types import VertexModel -from google_cloud_pipeline_components.v1.model_evaluation.version import EVAL_IMAGE_TAG from kfp import dsl from kfp.dsl import container_component @@ -126,7 +126,7 @@ def model_evaluation_regression( """ # fmt: on return dsl.ContainerSpec( - image=EVAL_IMAGE_TAG, + image=version.EVAL_IMAGE_TAG, command=[ 'python3', '/main.py', From a634eef3ec541ee64eb0220d5db12b82f682479e Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 30 Jun 2023 16:36:50 -0700 Subject: [PATCH 009/253] feat(components): define new GCPC Model Eval component for LLM Text Generation PiperOrigin-RevId: 544776887 --- .../model_evaluation/__init__.py | 2 + .../llm_evaluation/__init__.py | 14 ++ .../llm_evaluation/component.py | 145 ++++++++++++++++++ .../_implementation/model_evaluation/utils.py | 86 +++++++++++ .../model_evaluation/version.py | 11 +- 5 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 11bc746456..798f3596bc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -22,6 +22,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as ModelEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -31,6 +32,7 @@ 'ErrorAnalysisAnnotationOp', 'EvaluatedAnnotationOp', 'FeatureExtractorOp', + 'ModelEvaluationTextGenerationOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', 'TargetFieldDataRemoverOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/__init__.py new file mode 100644 index 0000000000..07f468a2ae --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Evaluation Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py new file mode 100644 index 0000000000..203bbf00c4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -0,0 +1,145 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Text Generation LLM Evaluation component.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp import dsl +from kfp.dsl import container_component + + +@container_component +def model_evaluation_text_generation( + gcp_resources: dsl.OutputPath(str), + evaluation_metrics: dsl.Output[dsl.Metrics], + project: str, + location: str = 'us-central1', + evaluation_task: str = 'text-generation', + target_field_name: str = 'instance.ground_truth', + prediction_field_name: str = 'predictions.content', + predictions_format: str = 'jsonl', + joined_predictions_gcs_source: str = '', + predictions_gcs_source: str = '', + ground_truth_gcs_source: str = '', + display_name: str = 'model-evaluation-text-generation', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + enable_web_access: bool = True, + network: str = '', + reserved_ip_ranges: list = [], + encryption_spec_key_name: str = '', +): + """Computes evaluation metrics of a text generation model. + + Supports evaluating large language models performing the following generative + tasks: + `summarization`,`question-answering`,`text-generation` + + Args: + project (str): Required. Project to run the component. + location (Optional[str]): Location for running the component. If not set, + defaulted to `us-central1`. + evaluation_task (Optional[str]): The task that the large language model + will be evaluated on. The evaluation component computes a set of metrics + relevant to that specific task. Currently supported tasks are: + `summarization`,`question-answering`,`text-generation`. + target_field_name (Optional[str]): The full name path of the features + target field in the predictions file. Formatted to be able to find + nested columns, delimited by `.`. Alternatively referred to as the + ground truth (or ground_truth_column) field. If not set, defaulted to + `inputs.ground_truth`. + prediction_field_name (Optional[str]): The full name path of the + prediction field in the prediction file. Formatted to be able to find + nested columns, delimited by `.`. If not set, defaulted to + `predictions.content`. + predictions_format (Optional[str]): The file format for the LLM Batch + Prediction results. `jsonl` is currently the only allowed format. If not + set, defaulted to `jsonl`. + joined_predictions_gcs_source (Optional[str]): A storage URI pointing + toward a GCS directory or a GCS file with joined prediction & ground + truth files to be used for this evaluation. + predictions_gcs_source (Optional[str]): A storage URI pointing toward a + GCS directory with only prediction files to be used for this evaluation. + ground_truth_gcs_source (Optional[str]): A storage URI pointing toward a + GCS directory with only ground truth files to be used for this + evaluation. + display_name (Optional[str]): The name of the Evaluation job. + machine_type (Optional[str]): The machine type of this custom job. If not + set, defaulted to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account (Optional[str]): Sets the default service account for + workload run-as account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + enable_web_access (Optional[bool]): Whether you want Vertex AI to enable + [interactive shell + access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) + to training containers. If set to `true`, you can access interactive + shells at the URIs given by [CustomJob.web_access_uris][]. + network (Optional[str]): The full name of the Compute Engine network to + which the job should be peered. For example, + projects/12345/global/networks/myVPC. Format is of the form + projects/{project}/global/networks/{network}. Where {project} is a + project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + reserved_ip_ranges (Optional[Sequence[str]]): A list of names for the + reserved ip ranges under the VPC network that can be used for this job. + If set, we will deploy the job within the provided ip ranges. Otherwise, + the job will be deployed to any ip ranges under the provided VPC + network. + encryption_spec_key_name (Optional[str]): Customer-managed encryption key + options for the CustomJob. If this is set, then all resources created by + the CustomJob will be encrypted with the provided encryption key. + + Returns: + evaluation_metrics (system.Metrics): + A Metrics artifact representing the language model evaluation metrics. + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=version.LLM_EVAL_IMAGE_TAG, + args=[ + f'--evaluation_task={evaluation_task}', + f'--target_field_name={target_field_name}', + f'--prediction_field_name={prediction_field_name}', + f'--predictions_format={predictions_format}', + f'--joined_predictions_gcs_source={joined_predictions_gcs_source}', + f'--predictions_gcs_source={predictions_gcs_source}', + f'--ground_truth_gcs_source={ground_truth_gcs_source}', + f'--evaluation_metrics_output_path={evaluation_metrics.path}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + reserved_ip_ranges=reserved_ip_ranges, + enable_web_access=enable_web_access, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py new file mode 100644 index 0000000000..d1cbcb32d4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py @@ -0,0 +1,86 @@ +"""Utility functions used to create custom Kubeflow components.""" + +from typing import Any + +from google_cloud_pipeline_components import _image + + +def build_custom_job_payload( + *, + display_name: str, + image_uri: str, + args: list[str], + machine_type: str = 'n1-standard-4', + service_account: str = '', + network: str = '', + reserved_ip_ranges: list[str] = [], + enable_web_access: bool = False, + encryption_spec_key_name: str = '', + accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED', + accelerator_count: int = 0, +) -> dict[str, Any]: + """Generates payload for a CustomJob in a Sec4 horizontal compliant way. + + Args: + display_name: CustomJob display name. Can contain up to 128 UTF-8 + characters. + machine_type: The type of the machine. See the list of machine types + supported for custom training: + https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types + accelerator_type: The type of accelerator(s) that may be attached to the + machine as per acceleratorCount. + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType + accelerator_count: The number of accelerators to attach to the machine. + image_uri: Docker image URI to use for the CustomJob. + args: Arguments to pass to the Docker image. + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, projects/12345/global/networks/myVPC. Format is of + the form projects/{project}/global/networks/{network}. Where {project} is + a project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC + network that can be used for this job. If set, we will deploy the job + within the provided ip ranges. Otherwise, the job will be deployed to any + ip ranges under the provided VPC network. + enable_web_access: Whether you want Vertex AI to enable [interactive shell + access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) + to training containers. If set to `true`, you can access interactive + shells at the URIs given by [CustomJob.web_access_uris][]. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + CustomJob payload dictionary. + """ + payload = { + 'display_name': str(display_name), + 'job_spec': { + 'worker_pool_specs': [{ + 'replica_count': '1', + 'machine_spec': { + 'machine_type': str(machine_type), + 'accelerator_type': str(accelerator_type), + 'accelerator_count': int(accelerator_count), + }, + 'container_spec': { + 'image_uri': image_uri, + 'args': args, + }, + }], + 'service_account': str(service_account), + 'network': str(network), + 'reserved_ip_ranges': reserved_ip_ranges, + 'enable_web_access': bool(enable_web_access), + }, + 'encryption_spec': {'kms_key_name': str(encryption_spec_key_name)}, + } + return payload diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py index 0880702fba..3c40e9ddeb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py @@ -13,7 +13,12 @@ # limitations under the License. """Version constants for model evaluation components.""" -EVAL_VERSION = 'v0.9.2' +_EVAL_VERSION = 'v0.9.2' +_LLM_EVAL_VERSION = 'v0.1' -EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/model-evaluation' -EVAL_IMAGE_TAG = f'{EVAL_IMAGE_NAME}:{EVAL_VERSION}' +_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/model-evaluation' +_LLM_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/llm-model-evaluation' + + +EVAL_IMAGE_TAG = f'{_EVAL_IMAGE_NAME}:{_EVAL_VERSION}' +LLM_EVAL_IMAGE_TAG = f'{_LLM_EVAL_IMAGE_NAME}:{_LLM_EVAL_VERSION}' From 4a57ab69ddc0abf08daa73473646492bad7ab38d Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Wed, 5 Jul 2023 09:55:42 -0700 Subject: [PATCH 010/253] chore(components): Move openvino components and samples to the right locations. (#9498) --- .../contrib}/openvino/model_convert/README.md | 0 .../openvino/model_convert/containers/Dockerfile | 0 .../model_convert/containers/convert_model.py | 0 .../contrib}/openvino/ovms-deployer/README.md | 0 .../openvino/ovms-deployer/containers/Dockerfile | 0 .../ovms-deployer/containers/apply_template.py | 0 .../openvino/ovms-deployer/containers/classes.py | 0 .../openvino/ovms-deployer/containers/deploy.sh | 0 .../openvino/ovms-deployer/containers/evaluate.py | 0 .../openvino/ovms-deployer/containers/ovms.j2 | 0 .../ovms-deployer/containers/requirements.txt | 0 .../contrib}/openvino/predict/README.md | 0 .../contrib}/openvino/predict/containers/Dockerfile | 0 .../contrib}/openvino/predict/containers/classes.py | 0 .../contrib}/openvino/predict/containers/predict.py | 0 .../openvino/predict/containers/requirements.txt | 0 .../contrib}/openvino/tf-slim/README.md | 0 .../contrib}/openvino/tf-slim/containers/Dockerfile | 0 .../openvino/tf-slim/containers/slim_model.py | 0 .../contrib}/openvino/deployer/README.md | 0 .../contrib}/openvino/deployer/component.yaml | 0 .../contrib}/openvino/deployer/deployer.png | Bin .../contrib}/openvino/deployer/deployer.py | 0 .../contrib}/openvino/model_optimizer/README.md | 0 .../model_optimizer/convert_model_pipeline.py | 0 .../contrib}/openvino/predict/README.md | 0 .../contrib}/openvino/predict/numpy_predict.py | 0 .../contrib}/openvino/tf-slim/README.md | 0 .../contrib}/openvino/tf-slim/demo_pipeline1.png | Bin .../contrib}/openvino/tf-slim/demo_pipeline2.png | Bin .../contrib}/openvino/tf-slim/demo_pipeline3.png | Bin .../contrib}/openvino/tf-slim/tf-slim.py | 0 32 files changed, 0 insertions(+), 0 deletions(-) rename {contrib/components => components/contrib}/openvino/model_convert/README.md (100%) rename {contrib/components => components/contrib}/openvino/model_convert/containers/Dockerfile (100%) rename {contrib/components => components/contrib}/openvino/model_convert/containers/convert_model.py (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/README.md (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/Dockerfile (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/apply_template.py (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/classes.py (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/deploy.sh (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/evaluate.py (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/ovms.j2 (100%) rename {contrib/components => components/contrib}/openvino/ovms-deployer/containers/requirements.txt (100%) rename {contrib/components => components/contrib}/openvino/predict/README.md (100%) rename {contrib/components => components/contrib}/openvino/predict/containers/Dockerfile (100%) rename {contrib/components => components/contrib}/openvino/predict/containers/classes.py (100%) rename {contrib/components => components/contrib}/openvino/predict/containers/predict.py (100%) rename {contrib/components => components/contrib}/openvino/predict/containers/requirements.txt (100%) rename {contrib/components => components/contrib}/openvino/tf-slim/README.md (100%) rename {contrib/components => components/contrib}/openvino/tf-slim/containers/Dockerfile (100%) rename {contrib/components => components/contrib}/openvino/tf-slim/containers/slim_model.py (100%) rename {contrib/samples => samples/contrib}/openvino/deployer/README.md (100%) rename {contrib/samples => samples/contrib}/openvino/deployer/component.yaml (100%) rename {contrib/samples => samples/contrib}/openvino/deployer/deployer.png (100%) rename {contrib/samples => samples/contrib}/openvino/deployer/deployer.py (100%) rename {contrib/samples => samples/contrib}/openvino/model_optimizer/README.md (100%) rename {contrib/samples => samples/contrib}/openvino/model_optimizer/convert_model_pipeline.py (100%) rename {contrib/samples => samples/contrib}/openvino/predict/README.md (100%) rename {contrib/samples => samples/contrib}/openvino/predict/numpy_predict.py (100%) rename {contrib/samples => samples/contrib}/openvino/tf-slim/README.md (100%) rename {contrib/samples => samples/contrib}/openvino/tf-slim/demo_pipeline1.png (100%) rename {contrib/samples => samples/contrib}/openvino/tf-slim/demo_pipeline2.png (100%) rename {contrib/samples => samples/contrib}/openvino/tf-slim/demo_pipeline3.png (100%) rename {contrib/samples => samples/contrib}/openvino/tf-slim/tf-slim.py (100%) diff --git a/contrib/components/openvino/model_convert/README.md b/components/contrib/openvino/model_convert/README.md similarity index 100% rename from contrib/components/openvino/model_convert/README.md rename to components/contrib/openvino/model_convert/README.md diff --git a/contrib/components/openvino/model_convert/containers/Dockerfile b/components/contrib/openvino/model_convert/containers/Dockerfile similarity index 100% rename from contrib/components/openvino/model_convert/containers/Dockerfile rename to components/contrib/openvino/model_convert/containers/Dockerfile diff --git a/contrib/components/openvino/model_convert/containers/convert_model.py b/components/contrib/openvino/model_convert/containers/convert_model.py similarity index 100% rename from contrib/components/openvino/model_convert/containers/convert_model.py rename to components/contrib/openvino/model_convert/containers/convert_model.py diff --git a/contrib/components/openvino/ovms-deployer/README.md b/components/contrib/openvino/ovms-deployer/README.md similarity index 100% rename from contrib/components/openvino/ovms-deployer/README.md rename to components/contrib/openvino/ovms-deployer/README.md diff --git a/contrib/components/openvino/ovms-deployer/containers/Dockerfile b/components/contrib/openvino/ovms-deployer/containers/Dockerfile similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/Dockerfile rename to components/contrib/openvino/ovms-deployer/containers/Dockerfile diff --git a/contrib/components/openvino/ovms-deployer/containers/apply_template.py b/components/contrib/openvino/ovms-deployer/containers/apply_template.py similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/apply_template.py rename to components/contrib/openvino/ovms-deployer/containers/apply_template.py diff --git a/contrib/components/openvino/ovms-deployer/containers/classes.py b/components/contrib/openvino/ovms-deployer/containers/classes.py similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/classes.py rename to components/contrib/openvino/ovms-deployer/containers/classes.py diff --git a/contrib/components/openvino/ovms-deployer/containers/deploy.sh b/components/contrib/openvino/ovms-deployer/containers/deploy.sh similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/deploy.sh rename to components/contrib/openvino/ovms-deployer/containers/deploy.sh diff --git a/contrib/components/openvino/ovms-deployer/containers/evaluate.py b/components/contrib/openvino/ovms-deployer/containers/evaluate.py similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/evaluate.py rename to components/contrib/openvino/ovms-deployer/containers/evaluate.py diff --git a/contrib/components/openvino/ovms-deployer/containers/ovms.j2 b/components/contrib/openvino/ovms-deployer/containers/ovms.j2 similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/ovms.j2 rename to components/contrib/openvino/ovms-deployer/containers/ovms.j2 diff --git a/contrib/components/openvino/ovms-deployer/containers/requirements.txt b/components/contrib/openvino/ovms-deployer/containers/requirements.txt similarity index 100% rename from contrib/components/openvino/ovms-deployer/containers/requirements.txt rename to components/contrib/openvino/ovms-deployer/containers/requirements.txt diff --git a/contrib/components/openvino/predict/README.md b/components/contrib/openvino/predict/README.md similarity index 100% rename from contrib/components/openvino/predict/README.md rename to components/contrib/openvino/predict/README.md diff --git a/contrib/components/openvino/predict/containers/Dockerfile b/components/contrib/openvino/predict/containers/Dockerfile similarity index 100% rename from contrib/components/openvino/predict/containers/Dockerfile rename to components/contrib/openvino/predict/containers/Dockerfile diff --git a/contrib/components/openvino/predict/containers/classes.py b/components/contrib/openvino/predict/containers/classes.py similarity index 100% rename from contrib/components/openvino/predict/containers/classes.py rename to components/contrib/openvino/predict/containers/classes.py diff --git a/contrib/components/openvino/predict/containers/predict.py b/components/contrib/openvino/predict/containers/predict.py similarity index 100% rename from contrib/components/openvino/predict/containers/predict.py rename to components/contrib/openvino/predict/containers/predict.py diff --git a/contrib/components/openvino/predict/containers/requirements.txt b/components/contrib/openvino/predict/containers/requirements.txt similarity index 100% rename from contrib/components/openvino/predict/containers/requirements.txt rename to components/contrib/openvino/predict/containers/requirements.txt diff --git a/contrib/components/openvino/tf-slim/README.md b/components/contrib/openvino/tf-slim/README.md similarity index 100% rename from contrib/components/openvino/tf-slim/README.md rename to components/contrib/openvino/tf-slim/README.md diff --git a/contrib/components/openvino/tf-slim/containers/Dockerfile b/components/contrib/openvino/tf-slim/containers/Dockerfile similarity index 100% rename from contrib/components/openvino/tf-slim/containers/Dockerfile rename to components/contrib/openvino/tf-slim/containers/Dockerfile diff --git a/contrib/components/openvino/tf-slim/containers/slim_model.py b/components/contrib/openvino/tf-slim/containers/slim_model.py similarity index 100% rename from contrib/components/openvino/tf-slim/containers/slim_model.py rename to components/contrib/openvino/tf-slim/containers/slim_model.py diff --git a/contrib/samples/openvino/deployer/README.md b/samples/contrib/openvino/deployer/README.md similarity index 100% rename from contrib/samples/openvino/deployer/README.md rename to samples/contrib/openvino/deployer/README.md diff --git a/contrib/samples/openvino/deployer/component.yaml b/samples/contrib/openvino/deployer/component.yaml similarity index 100% rename from contrib/samples/openvino/deployer/component.yaml rename to samples/contrib/openvino/deployer/component.yaml diff --git a/contrib/samples/openvino/deployer/deployer.png b/samples/contrib/openvino/deployer/deployer.png similarity index 100% rename from contrib/samples/openvino/deployer/deployer.png rename to samples/contrib/openvino/deployer/deployer.png diff --git a/contrib/samples/openvino/deployer/deployer.py b/samples/contrib/openvino/deployer/deployer.py similarity index 100% rename from contrib/samples/openvino/deployer/deployer.py rename to samples/contrib/openvino/deployer/deployer.py diff --git a/contrib/samples/openvino/model_optimizer/README.md b/samples/contrib/openvino/model_optimizer/README.md similarity index 100% rename from contrib/samples/openvino/model_optimizer/README.md rename to samples/contrib/openvino/model_optimizer/README.md diff --git a/contrib/samples/openvino/model_optimizer/convert_model_pipeline.py b/samples/contrib/openvino/model_optimizer/convert_model_pipeline.py similarity index 100% rename from contrib/samples/openvino/model_optimizer/convert_model_pipeline.py rename to samples/contrib/openvino/model_optimizer/convert_model_pipeline.py diff --git a/contrib/samples/openvino/predict/README.md b/samples/contrib/openvino/predict/README.md similarity index 100% rename from contrib/samples/openvino/predict/README.md rename to samples/contrib/openvino/predict/README.md diff --git a/contrib/samples/openvino/predict/numpy_predict.py b/samples/contrib/openvino/predict/numpy_predict.py similarity index 100% rename from contrib/samples/openvino/predict/numpy_predict.py rename to samples/contrib/openvino/predict/numpy_predict.py diff --git a/contrib/samples/openvino/tf-slim/README.md b/samples/contrib/openvino/tf-slim/README.md similarity index 100% rename from contrib/samples/openvino/tf-slim/README.md rename to samples/contrib/openvino/tf-slim/README.md diff --git a/contrib/samples/openvino/tf-slim/demo_pipeline1.png b/samples/contrib/openvino/tf-slim/demo_pipeline1.png similarity index 100% rename from contrib/samples/openvino/tf-slim/demo_pipeline1.png rename to samples/contrib/openvino/tf-slim/demo_pipeline1.png diff --git a/contrib/samples/openvino/tf-slim/demo_pipeline2.png b/samples/contrib/openvino/tf-slim/demo_pipeline2.png similarity index 100% rename from contrib/samples/openvino/tf-slim/demo_pipeline2.png rename to samples/contrib/openvino/tf-slim/demo_pipeline2.png diff --git a/contrib/samples/openvino/tf-slim/demo_pipeline3.png b/samples/contrib/openvino/tf-slim/demo_pipeline3.png similarity index 100% rename from contrib/samples/openvino/tf-slim/demo_pipeline3.png rename to samples/contrib/openvino/tf-slim/demo_pipeline3.png diff --git a/contrib/samples/openvino/tf-slim/tf-slim.py b/samples/contrib/openvino/tf-slim/tf-slim.py similarity index 100% rename from contrib/samples/openvino/tf-slim/tf-slim.py rename to samples/contrib/openvino/tf-slim/tf-slim.py From 95901c88302c61e6cdc33ddd2cd96ab65663e881 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 5 Jul 2023 13:32:05 -0700 Subject: [PATCH 011/253] feat(components): Add helper functions to create slice_specs and bias_configs PiperOrigin-RevId: 545771527 --- .../preview/model_evaluation/utils.py | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py new file mode 100644 index 0000000000..8157976ebe --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py @@ -0,0 +1,184 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, List, Optional, Tuple, Union + +from google.cloud.aiplatform_v1.types.model_evaluation_slice import ModelEvaluationSlice +from google.protobuf import json_format +from google.protobuf import wrappers_pb2 + + +def create_slice_specs_list( + list_of_feature_and_value: List[ + Dict[str, Union[float, int, str, List[float], bool]] + ] +) -> List[ModelEvaluationSlice.Slice.SliceSpec]: + """Creates a list of ModelEvaluationSlice.Slice.SliceSpec from a list of dictionary inputs. + + Args: + list_of_feature_and_value: A list of feature_and_value. Each + feature_and_value is a dictionary of feature names to values. The feature + value can be a float, int, or str for + ModelEvaluationSlice.Slice.SliceSpec.Value; a bool for `all_values` or a + list for ModelEvaluationSlice.Slice.SliceSpec.Range. + + Returns: + A list of ModelEvaluationSlice.Slice.SliceSpec proto. + + Raises: + ValueError: if the format of a feature's value is invalid. + """ + slice_specs_list = [] + for feature_and_value in list_of_feature_and_value: + configs = {} + for feature, value in feature_and_value.items(): + if isinstance(value, bool): + # Bool must be checked first, bool is a child of int in Python. + configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + all_values=wrappers_pb2.BoolValue(value=value) + ) + elif isinstance(value, int) or isinstance(value, float): + configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value( + float_value=float(value) + ) + ) + elif isinstance(value, str): + configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value(string_value=value) + ) + elif isinstance(value, list): + configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + range=ModelEvaluationSlice.Slice.SliceSpec.Range( + low=value[0], high=value[1] + ) + ) + else: + raise ValueError( + 'Please provide a valid format of value for feature: {}. The' + ' accepted formats are: bool, float, int, str and list.'.format( + feature + ) + ) + slice_spec = ModelEvaluationSlice.Slice.SliceSpec(configs=configs) + slice_specs_list.append(json_format.MessageToDict(slice_spec._pb)) + return slice_specs_list + + +def create_bias_configs_list( + list_of_slice_a_and_slice_b: List[ + List[Dict[str, Union[float, int, str, List[float]]]] + ], +) -> List[Any]: + """Creates a list of BiasConfig from a list of tuple inputs. + + Args: + list_of_slice_a_and_slice_b: A list of slice_a_and_slice_b. Each + slice_a_and_slice_b is a list which contains 1 or two elelments. Each + element in the list is a dictionary of feature names to values that + represents the slice config for 'slice_a' or 'slice_b'. 'slice_b' is + optional. The feature value can be a float, int, or str for + ModelEvaluationSlice.Slice.SliceSpec.Value; a list for + ModelEvaluationSlice.Slice.SliceSpec.Range. Following are example inputs: + Ex 1. Only provide the config of slice_a: `list_of_slice_a_and_slice_b = + [[{'education': 'low'}]]`. Ex 2. Provide both configs of slice_a and + slice_b: `list_of_slice_a_and_slice_b = [[{'education': 'low'}, + {'education': 'high'}]]`. + + Returns: + A list of BiasConfig. + + Raises: + ValueError: if a feature's value is `all_values` or the format of the + feature's value is invalid. + """ + bias_configs_list = [] + for slice_a_and_slice_b in list_of_slice_a_and_slice_b: + slice_a = slice_a_and_slice_b[0] + if len(slice_a_and_slice_b) > 1: + slice_b = slice_a_and_slice_b[1] + else: + slice_b = None + bias_config = {} + configs_a = {} + for feature, value in slice_a.items(): + if isinstance(value, bool): + # Bool must be checked first, bool is a child of int in Python. + raise ValueError( + '`all_values` SliceConfig is not allowed for bias detection.' + ) + elif isinstance(value, int) or isinstance(value, float): + configs_a[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value( + float_value=float(value) + ) + ) + elif isinstance(value, str): + configs_a[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value(string_value=value) + ) + elif isinstance(value, list): + configs_a[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + range=ModelEvaluationSlice.Slice.SliceSpec.Range( + low=value[0], high=value[1] + ) + ) + else: + raise ValueError( + 'Please provide a valid format of value for feature: {}. The' + ' accepted formats are: bool, float, int, str and list.'.format( + feature + ) + ) + slice_spec_a = ModelEvaluationSlice.Slice.SliceSpec(configs=configs_a) + slice_spec_a_dict = json_format.MessageToDict(slice_spec_a._pb) + bias_config['slices'] = [slice_spec_a_dict] + if slice_b is not None: + configs_b = {} + for feature, value in slice_b.items(): + if isinstance(value, bool): + # Bool must be checked first, bool is a child of int in Python. + raise ValueError( + '`all_values` SliceConfig is not allowed for bias detection.' + ) + elif isinstance(value, int) or isinstance(value, float): + configs_b[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value( + float_value=float(value) + ) + ) + elif isinstance(value, str): + configs_b[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + value=ModelEvaluationSlice.Slice.SliceSpec.Value( + string_value=value + ) + ) + elif isinstance(value, list): + configs_b[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( + range=ModelEvaluationSlice.Slice.SliceSpec.Range( + low=value[0], high=value[1] + ) + ) + else: + raise ValueError( + 'Please provide a valid format of value for feature: {}. The' + ' accepted formats are: bool, float, int, str and list.'.format( + feature + ) + ) + slice_spec_b = ModelEvaluationSlice.Slice.SliceSpec(configs=configs_b) + slice_spec_b_dict = json_format.MessageToDict(slice_spec_b._pb) + bias_config['slices'].append(slice_spec_b_dict) + bias_configs_list.append(bias_config) + return bias_configs_list From 7e89d05b19a85d8749a47ca2b7ecf040cd32e58b Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Wed, 5 Jul 2023 15:28:51 -0700 Subject: [PATCH 012/253] test: Use GKE stable release channel to unblock presubmit tests. Fixes #9704. Part of #9706. (#9705) * Use GKE stable release channel to unblock presubmit tests * update TODO comment --- test/deploy-cluster.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/deploy-cluster.sh b/test/deploy-cluster.sh index 4949b0e455..50cbdab53e 100755 --- a/test/deploy-cluster.sh +++ b/test/deploy-cluster.sh @@ -87,9 +87,8 @@ else SCOPE_ARG="--scopes=storage-rw,cloud-platform" fi # Use regular release channel to keep up with newly created clusters in Google Cloud Marketplace. - # gcloud container clusters create ${TEST_CLUSTER} --release-channel regular ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} - # Temporarily use cos as image type until docker dependencies gets removed. reference: https://github.com/kubeflow/pipelines/issues/6696 - gcloud container clusters create ${TEST_CLUSTER} --image-type cos_containerd --release-channel regular ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} + # TODO(#9706): Switch back to regular channel once we stop building test images via dind. + gcloud container clusters create ${TEST_CLUSTER} --release-channel stable ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} fi gcloud container clusters get-credentials ${TEST_CLUSTER} From 5ea193c2a89321efcd6b0631fd153676253c567e Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:39:50 -0700 Subject: [PATCH 013/253] chore(frontend): Remove redundant handleVersionSelected helper function (#9694) * Remove redundant handleVersionSelected helper. * Unify variable name in handleVersionSelected() helper. --- frontend/src/pages/PipelineDetails.tsx | 55 +++----------------------- 1 file changed, 6 insertions(+), 49 deletions(-) diff --git a/frontend/src/pages/PipelineDetails.tsx b/frontend/src/pages/PipelineDetails.tsx index 8109b0fe77..412f2b1551 100644 --- a/frontend/src/pages/PipelineDetails.tsx +++ b/frontend/src/pages/PipelineDetails.tsx @@ -200,7 +200,7 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { pipeline={v2Pipeline} selectedVersion={v2SelectedVersion} versions={v2Versions} - handleVersionSelected={this.handleVersionSelectedV2.bind(this)} + handleVersionSelected={this.handleVersionSelected.bind(this)} /> )} {!this.state.graphIsLoading && !showV2Pipeline && ( @@ -572,57 +572,14 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { } public async handleVersionSelected(versionId: string): Promise { - if (this.state.v1Pipeline) { - const selectedVersionV1 = (this.state.v1Versions || []).find(v => v.id === versionId); - const selectedVersionV2 = (this.state.v2Versions || []).find( - v => v.pipeline_version_id === versionId, - ); - const pageTitle = this.state.v1Pipeline.name?.concat(' (', selectedVersionV1?.name!, ')'); - - const selectedVersionPipelineTemplate = await this._getTemplateString( - this.state.v1Pipeline.id!, - versionId, - ); - this.props.history.replace({ - pathname: `/pipelines/details/${this.state.v1Pipeline.id}/version/${versionId}`, - }); - this.props.updateToolbar(this.getInitialToolbarState()); - this.props.updateToolbar({ pageTitle }); - - const [graph, reducedGraph, graphV2] = await this._createGraph( - selectedVersionPipelineTemplate, - ); - if (isFeatureEnabled(FeatureKey.V2_ALPHA) && graphV2.length > 0) { - this.setStateSafe({ - graph: undefined, - reducedGraph: undefined, - graphV2, - graphIsLoading: false, - v2SelectedVersion: selectedVersionV2, - templateString: selectedVersionPipelineTemplate, - }); - } else { - this.setStateSafe({ - graph, - reducedGraph, - graphV2: undefined, - graphIsLoading: false, - v1SelectedVersion: selectedVersionV1, - templateString: selectedVersionPipelineTemplate, - }); - } - } - } - - public async handleVersionSelectedV2(versionId: string): Promise { if (this.state.v2Pipeline) { - const selectedVersionV1 = (this.state.v1Versions || []).find(v => v.id === versionId); - const selectedVersionV2 = (this.state.v2Versions || []).find( + const v1SelectedVersion = (this.state.v1Versions || []).find(v => v.id === versionId); + const v2SelectedVersion = (this.state.v2Versions || []).find( v => v.pipeline_version_id === versionId, ); const pageTitle = this.state.v2Pipeline.display_name?.concat( ' (', - selectedVersionV2?.display_name!, + v2SelectedVersion?.display_name!, ')', ); @@ -645,7 +602,7 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { reducedGraph: undefined, graphV2, graphIsLoading: false, - v2SelectedVersion: selectedVersionV2, + v2SelectedVersion, templateString: selectedVersionPipelineTemplate, }); } else { @@ -654,7 +611,7 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { reducedGraph, graphV2: undefined, graphIsLoading: false, - v1SelectedVersion: selectedVersionV1, + v1SelectedVersion, templateString: selectedVersionPipelineTemplate, }); } From d1be1d9ffdbf4a8bcf9aa8df36ec22ac182ceb7e Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:39:57 -0700 Subject: [PATCH 014/253] fix(frontend): Recurring run card in experiment details page (#9697) --- frontend/src/pages/ExperimentDetails.tsx | 23 +++--- .../ExperimentDetails.test.tsx.snap | 80 ++++++++++--------- 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/frontend/src/pages/ExperimentDetails.tsx b/frontend/src/pages/ExperimentDetails.tsx index 85f500e8b9..29e3787a24 100644 --- a/frontend/src/pages/ExperimentDetails.tsx +++ b/frontend/src/pages/ExperimentDetails.tsx @@ -59,6 +59,7 @@ const css = stylesheet({ fontSize: 12, minHeight: 16, paddingLeft: 0, + marginRight: 0, }, cardContent: { color: color.secondaryText, @@ -90,7 +91,7 @@ const css = stylesheet({ color: '#0d652d', }, recurringRunsCard: { - width: 158, + width: 270, }, recurringRunsDialog: { minWidth: 600, @@ -170,7 +171,17 @@ export class ExperimentDetails extends Page<{}, ExperimentDetailsState> { elevation={0} >
-
Recurring run configs
+
+ Recurring run configs + +
{ > {activeRecurringRunsCount + ' active'}
-
- Recurring run configs + + Recurring run configs + + + Manage +
1 active
- - Manage - - Recurring run configs + + Recurring run configs + + + Manage +
0 active
- - Manage -
- Recurring run configs + + Recurring run configs + + + Manage +
0 active
- - Manage -
- Recurring run configs + + Recurring run configs + + + Manage +
0 active
- - Manage -
Date: Thu, 6 Jul 2023 12:31:39 -0700 Subject: [PATCH 015/253] chore(components): Migrate AutoML components to preview and v1 as needed docs(components): Revert doc changes from preview sync PiperOrigin-RevId: 546062400 --- .../preview/automl/forecasting/__init__.py | 25 + .../forecasting/forecasting_ensemble.py | 139 + .../forecasting/forecasting_stage_1_tuner.py | 159 + .../forecasting/forecasting_stage_2_tuner.py | 157 + .../learn_to_learn_forecasting_pipeline.yaml | 7790 +++++++++++ ...ence_to_sequence_forecasting_pipeline.yaml | 7749 +++++++++++ ...sion_transformer_forecasting_pipeline.yaml | 7735 +++++++++++ ...es_dense_encoder_forecasting_pipeline.yaml | 7790 +++++++++++ .../preview/automl/forecasting/utils.py | 1023 ++ .../preview/automl/tabular/__init__.py | 35 + ...ml_tabular_feature_selection_pipeline.yaml | 11427 ++++++++++++++++ .../tabular/automl_tabular_v2_pipeline.yaml | 8327 +++++++++++ ..._params_large_data_large_search_space.json | 158 + ...params_large_data_medium_search_space.json | 158 + ..._params_large_data_small_search_space.json | 146 + ...params_medium_data_large_search_space.json | 158 + ...arams_medium_data_medium_search_space.json | 158 + ...params_medium_data_small_search_space.json | 146 + ..._params_small_data_large_search_space.json | 158 + ...params_small_data_medium_search_space.json | 158 + ..._params_small_data_small_search_space.json | 146 + .../tabular/configs/wide_and_deep_params.json | 132 + .../tabular/configs/xgboost_params.json | 309 + .../automl/tabular/feature_selection.py | 179 + .../tabular/feature_transform_engine.py | 976 ++ .../tabnet_hyperparameter_tuning_job.py | 236 + ...et_hyperparameter_tuning_job_pipeline.yaml | 4661 +++++++ .../preview/automl/tabular/tabnet_trainer.py | 300 + .../tabular/tabnet_trainer_pipeline.yaml | 4302 ++++++ .../preview/automl/tabular/utils.py | 3360 +++++ ...wide_and_deep_hyperparameter_tuning_job.py | 236 + ...ep_hyperparameter_tuning_job_pipeline.yaml | 4018 ++++++ .../automl/tabular/wide_and_deep_trainer.py | 281 + .../wide_and_deep_trainer_pipeline.yaml | 4048 ++++++ .../xgboost_hyperparameter_tuning_job.py | 124 + ...st_hyperparameter_tuning_job_pipeline.yaml | 4332 ++++++ .../preview/automl/tabular/xgboost_trainer.py | 77 + .../tabular/xgboost_trainer_pipeline.yaml | 4396 ++++++ .../v1/automl/forecasting/__init__.py | 21 + .../bqml_arima_predict_pipeline.yaml | 1159 ++ .../bqml_arima_train_pipeline.yaml | 5085 +++++++ .../forecasting/prophet_predict_pipeline.yaml | 2150 +++ .../v1/automl/forecasting/prophet_trainer.py | 211 + .../forecasting/prophet_trainer_pipeline.yaml | 2958 ++++ .../v1/automl/forecasting/utils.py | 341 + .../v1/automl/tabular/__init__.py | 37 + .../tabular/automl_tabular_pipeline.yaml | 11149 +++++++++++++++ .../v1/automl/tabular/cv_trainer.py | 166 + .../tabular/deprecated/default_pipeline.json | 7974 +++++++++++ .../v1/automl/tabular/ensemble.py | 167 + .../v1/automl/tabular/finalizer.py | 88 + .../v1/automl/tabular/infra_validator.py | 39 + .../automl/tabular/split_materialized_data.py | 119 + .../v1/automl/tabular/stage_1_tuner.py | 189 + .../automl/tabular/stats_and_example_gen.py | 304 + .../training_configurator_and_validator.py | 285 + .../v1/automl/tabular/transform.py | 200 + .../v1/automl/tabular/utils.py | 1435 ++ 58 files changed, 119786 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py new file mode 100644 index 0000000000..befa20f9ad --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Experimental AutoML forecasting components.""" + +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_1_tuner import automl_forecasting_stage_1_tuner as ForecastingStage1TunerOp +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_2_tuner import automl_forecasting_stage_2_tuner as ForecastingStage2TunerOp + +__all__ = [ + 'ForecastingStage1TunerOp', + 'ForecastingEnsembleOp', + 'ForecastingStage2TunerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py new file mode 100644 index 0000000000..b7e0580c4e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -0,0 +1,139 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Ensemble component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_ensemble( + project: str, + location: str, + root_dir: str, + transform_output: Input[Artifact], + metadata: Input[Artifact], + tuning_result_input: Input[Artifact], + instance_baseline: Input[Artifact], + instance_schema_path: Input[Artifact], + prediction_image_uri: str, + gcp_resources: dsl.OutputPath(str), + model_architecture: Output[Artifact], + unmanaged_container_model: Output[UnmanagedContainerModel], + explanation_metadata: dsl.OutputPath(dict), + explanation_metadata_artifact: Output[Artifact], + explanation_parameters: dsl.OutputPath(dict), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Ensembles AutoML Forecasting models. + + Args: + project: Project to run the job in. + location: Region to run the job in. + root_dir: The Cloud Storage path to store the output. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + tuning_result_input: AutoML Tabular tuning + result. + instance_baseline: The instance baseline + used to calculate explanations. + instance_schema_path: The path to the instance schema, + describing the input data for the tf_model at serving time. + encryption_spec_key_name: Customer-managed encryption key. + prediction_image_uri: URI of the Docker image to be used as the + container for serving predictions. This URI must identify an image in + Artifact Registry or Container Registry. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + model_architecture: The architecture of the output model. + unmanaged_container_model: Model information needed to perform batch prediction. + explanation_metadata: The explanation metadata used by Vertex online and batch explanations. + explanation_metadata_artifact: The explanation metadata used by Vertex online and batch explanations in the format of a KFP Artifact. + explanation_parameters: The explanation parameters used by Vertex online and batch explanations. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_ensemble', + '", "--transform_output_path=', + transform_output.uri, + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--instance_schema_path=', + instance_schema_path.uri, + '", "--prediction_docker_uri=', + prediction_image_uri, + '", "--model_relative_output_path=', + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model', + '", "--explanation_metadata_path=', + explanation_metadata, + ',', + explanation_metadata_artifact.uri, + '", "--explanation_parameters_path=', + explanation_parameters, + '", "--model_architecture_path=', + model_architecture.uri, + '", "--use_json=true', + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py new file mode 100644 index 0000000000..e82e55708b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -0,0 +1,159 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Stage 1 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_stage_1_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + study_spec_parameters_override: Optional[list] = [], + worker_pool_specs_override_json: Optional[list] = [], + reduce_search_space_mode: Optional[str] = 'regular', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Searches AutoML Forecasting architectures and selects the top trials. + + Args: + project: Project to run hyperparameter tuning. + location: Location for running the hyperparameter tuning. + root_dir: The Cloud Storage location to store the output. + study_spec_parameters_override: JSON study spec. E.g., + [{"parameter_id": "activation","categorical_value_spec": {"values": + ["tanh"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + reduce_search_space_mode: The reduce search space mode. Possible + values: "regular" (default), "minimal", "full". + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + deadline_hours: Number of hours the hyperparameter tuning should + run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The tabular example gen metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained model and architectures. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-forecasting-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_l2l_stage_1_tuner', + '", "--region=', + location, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "--reduce_search_space_mode=', + reduce_search_space_mode, + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + '", "--training_base_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', + '", "--num_parallel_trial=', + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--lro_job_info=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--use_json=true', + '", "--log_level=ERROR', + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py new file mode 100644 index 0000000000..5375f61955 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -0,0 +1,157 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Stage 2 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_stage_2_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + tuning_result_input_path: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + worker_pool_specs_override_json: Optional[list] = [], + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes AutoML Forecasting models and selects top trials. + + Args: + project: Project to run stage 2 tuner. + location: Cloud region for running the component: us-central1). + root_dir: The Cloud Storage location to store the output. + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + num_selected_trials: Number of selected trials. The number of weak + learners in the final model. + deadline_hours: Number of hours the cross-validation trainer + should run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The forecasting example gen + metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + tuning_result_input_path: Path to the json of hyperparameter + tuning results to use when evaluating models. + + Returns: + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained (private) model artifact paths and their hyperparameters. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-forecasting-stage-2-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_l2l_stage_2_tuner', + '", "--region=', + location, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + '", "--training_base_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', + '", "--num_parallel_trial=', + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--lro_job_info=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--tuning_result_input_path=', + tuning_result_input_path.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + ( + '", "--use_json=true", "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml new file mode 100644 index 0000000000..3d28c0a17f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -0,0 +1,7790 @@ +# PIPELINE DEFINITION +# Name: learn-to-learn-forecasting +# Description: The AutoML Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# quantiles: list +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: l2l + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: l2l + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: l2l + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: l2l + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + componentInputParameter: pipelinechannel--quantiles + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The AutoML Forecasting pipeline. + name: learn-to-learn-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml new file mode 100644 index 0000000000..4f656e1b99 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -0,0 +1,7749 @@ +# PIPELINE DEFINITION +# Name: sequence-to-sequence-forecasting +# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: seq2seq + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: seq2seq + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: seq2seq + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + runtimeValue: + constant: 0.0 + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: seq2seq + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + runtimeValue: + constant: [] + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. + name: sequence-to-sequence-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml new file mode 100644 index 0000000000..6bad578312 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -0,0 +1,7735 @@ +# PIPELINE DEFINITION +# Name: temporal-fusion-transformer-forecasting +# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + runtimeValue: + constant: 1.0 + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: tft + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + runtimeValue: + constant: 1.0 + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: tft + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: tft + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + runtimeValue: + constant: 0.0 + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: tft + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + runtimeValue: + constant: [] + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. + name: temporal-fusion-transformer-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml new file mode 100644 index 0000000000..afbf67ec9e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -0,0 +1,7790 @@ +# PIPELINE DEFINITION +# Name: time-series-dense-encoder-forecasting +# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# quantiles: list +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: tide + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: tide + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: tide + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: tide + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + componentInputParameter: pipelinechannel--quantiles + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. + name: time-series-dense-encoder-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py new file mode 100644 index 0000000000..2cf4444e5a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py @@ -0,0 +1,1023 @@ +"""Util functions for Vertex Forecasting pipelines.""" + +import os +import pathlib +from typing import Any, Dict, FrozenSet, List, Optional, Tuple + +_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() + +_RETAIL_MODEL_DISABLED_OPTIONS = frozenset([ + 'quantiles', + 'enable_probabilistic_inference', +]) + + +def _get_base_forecasting_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, + fields_to_exclude: FrozenSet[str] = frozenset(), +) -> Dict[str, Any]: + """Formats a set of parameters common across Vertex forecasting pipelines.""" + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not stage_2_trainer_worker_pool_specs_override: + stage_2_trainer_worker_pool_specs_override = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'dataflow_service_account': dataflow_service_account, + 'evaluated_examples_bigquery_path': evaluated_examples_bigquery_path, + 'target_column': target_column, + 'optimization_objective': optimization_objective, + 'transformations': transformations, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'time_series_attribute_columns': time_series_attribute_columns, + 'available_at_forecast_columns': available_at_forecast_columns, + 'unavailable_at_forecast_columns': unavailable_at_forecast_columns, + 'forecast_horizon': forecast_horizon, + 'context_window': context_window, + 'window_predefined_column': window_predefined_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'holiday_regions': holiday_regions, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'num_selected_trials': num_selected_trials, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'weight_column': weight_column, + 'dataflow_subnetwork': dataflow_subnetwork, + 'feature_transform_engine_dataflow_machine_type': ( + feature_transform_engine_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + feature_transform_engine_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + feature_transform_engine_dataflow_disk_size_gb + ), + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'feature_transform_engine_bigquery_staging_full_dataset_id': ( + feature_transform_engine_bigquery_staging_full_dataset_id + ), + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'stage_2_trainer_worker_pool_specs_override': ( + stage_2_trainer_worker_pool_specs_override + ), + 'quantiles': quantiles, + 'encryption_spec_key_name': encryption_spec_key_name, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + 'run_evaluation': run_evaluation, + 'group_columns': group_columns, + 'group_total_weight': group_total_weight, + 'temporal_total_weight': temporal_total_weight, + 'group_temporal_total_weight': group_temporal_total_weight, + } + + # Filter out empty values and those excluded from the particular pipeline. + # (example: TFT and Seq2Seq don't support `quantiles`.) + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None and param not in fields_to_exclude + } + ) + return parameter_values + + +def get_learn_to_learn_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +) -> Tuple[str, Dict[str, Any]]: + """Returns l2l_forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. If quantiles are specified, then the quantiles of the + distribution are also returned. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + group_columns: A list of time series attribute column names that define the + time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + quantiles=quantiles, + encryption_spec_key_name=encryption_spec_key_name, + enable_probabilistic_inference=enable_probabilistic_inference, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + group_columns=group_columns, + group_total_weight=group_total_weight, + temporal_total_weight=temporal_total_weight, + group_temporal_total_weight=group_temporal_total_weight, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'learn_to_learn_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +) -> Tuple[str, Dict[str, Any]]: + """Returns timeseries_dense_encoder_forecasting pipeline and parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. If quantiles are specified, then the quantiles of the + distribution are also returned. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + group_columns: A list of time series attribute column names that define the + time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + quantiles=quantiles, + encryption_spec_key_name=encryption_spec_key_name, + enable_probabilistic_inference=enable_probabilistic_inference, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + group_columns=group_columns, + group_total_weight=group_total_weight, + temporal_total_weight=temporal_total_weight, + group_temporal_total_weight=group_temporal_total_weight, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'time_series_dense_encoder_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, +): + """Returns tft_forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + """ + # TFT should only have 1 selected trial to freeze the ensemble size at 1. + excluded_parameters = _RETAIL_MODEL_DISABLED_OPTIONS.union({ + 'num_selected_trials', + }) + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + encryption_spec_key_name=encryption_spec_key_name, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + fields_to_exclude=excluded_parameters, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'temporal_fusion_transformer_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_sequence_to_sequence_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, +): + """Returns seq2seq forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + """ + + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + encryption_spec_key_name=encryption_spec_key_name, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + fields_to_exclude=_RETAIL_MODEL_DISABLED_OPTIONS, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'sequence_to_sequence_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py new file mode 100644 index 0000000000..764539056a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -0,0 +1,35 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Preview AutoML tabular components.""" + +from google_cloud_pipeline_components.preview.automl.tabular.feature_selection import tabular_feature_ranking_and_selection as FeatureSelectionOp +from google_cloud_pipeline_components.preview.automl.tabular.feature_transform_engine import feature_transform_engine as FeatureTransformEngineOp +from google_cloud_pipeline_components.preview.automl.tabular.tabnet_hyperparameter_tuning_job import tabnet_hyperparameter_tuning_job as TabNetHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.tabnet_trainer import tabnet_trainer as TabNetTrainerOp +from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_hyperparameter_tuning_job import wide_and_deep_hyperparameter_tuning_job as WideAndDeepHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_trainer import wide_and_deep_trainer as WideAndDeepTrainerOp +from google_cloud_pipeline_components.preview.automl.tabular.xgboost_hyperparameter_tuning_job import xgboost_hyperparameter_tuning_job as XGBoostHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.xgboost_trainer import xgboost_trainer as XGBoostTrainerOp + +__all__ = [ + 'FeatureSelectionOp', + 'WideAndDeepHyperparameterTuningJobOp', + 'WideAndDeepTrainerOp', + 'TabNetHyperparameterTuningJobOp', + 'TabNetTrainerOp', + 'FeatureTransformEngineOp', + 'XGBoostHyperparameterTuningJobOp', + 'XGBoostTrainerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml new file mode 100644 index 0000000000..b10b4b421a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -0,0 +1,11427 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-feature-selection-pipeline +# Description: The AutoML Tabular pipeline. +# Inputs: +# additional_experiments: dict +# apply_feature_selection_tuning: bool [Default: False] +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# disable_early_stopping: bool [Default: False] +# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# distill_batch_predict_max_replica_count: int [Default: 25.0] +# distill_batch_predict_starting_replica_count: int [Default: 25.0] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# location: str +# max_selected_features: int [Default: 1000.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] +# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] +# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# transformations: str +# validation_fraction: float [Default: -1.0] +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-3-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-3-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-3: + executorLabel: exec-automl-tabular-ensemble-3 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-3: + executorLabel: exec-automl-tabular-infra-validator-3 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-stage-1-tuner-2: + executorLabel: exec-automl-tabular-stage-1-tuner-2 + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform: + executorLabel: exec-automl-tabular-transform + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform-2: + executorLabel: exec-automl-tabular-transform-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-check-if-binary-classification: + executorLabel: exec-check-if-binary-classification + inputDefinitions: + artifacts: + example_gen_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: metadata generated by example gen. + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + runtimeValue: + constant: '' + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-7 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-7 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + feature_ranking: + componentInputArtifact: pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking + materialized_eval_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + tune_feature_selection_rate: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity-2 + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_distillation + taskInfo: + name: bool-identity-3 + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + condition-7: + componentRef: + name: comp-condition-7 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + - calculate-training-parameters-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + pipelinechannel--purge-unused-features-output_metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + pipelinechannel--tabular-stats-and-example-gen-eval_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + pipelinechannel--tabular-stats-and-example-gen-test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + pipelinechannel--tabular-stats-and-example-gen-train_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + parameters: + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: distill_stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: is-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'true' + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-7: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-8 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-8 + tasks: + automl-tabular-ensemble-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-3 + dependentTasks: + - automl-tabular-stage-1-tuner-2 + - automl-tabular-transform-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-3 + automl-tabular-infra-validator-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-3 + dependentTasks: + - automl-tabular-ensemble-3 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + taskInfo: + name: automl-tabular-infra-validator-3 + automl-tabular-stage-1-tuner-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner-2 + dependentTasks: + - automl-tabular-transform-2 + inputs: + artifacts: + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform-2 + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform-2 + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + parameters: + deadline_hours: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + runtimeValue: + constant: 1.0 + single_run_max_secs: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner-2 + automl-tabular-transform-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform-2 + dependentTasks: + - write-bp-result-path + - write-bp-result-path-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + eval_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path-2 + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + train_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform-2 + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - automl-tabular-ensemble-3 + - model-upload-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + pipelinechannel--model-upload-3-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-3 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-batch-predict-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-3 + dependentTasks: + - read-input-uri + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-train-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-3 + model-batch-predict-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-4 + dependentTasks: + - read-input-uri-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri-2 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-eval-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-4 + model-upload-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-3 + dependentTasks: + - automl-tabular-ensemble-3 + - automl-tabular-infra-validator-3 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + parameters: + display_name: + runtimeValue: + constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-3 + read-input-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + taskInfo: + name: read-input-uri + read-input-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri-2 + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + taskInfo: + name: read-input-uri-2 + write-bp-result-path: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path + dependentTasks: + - model-batch-predict-3 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-3 + taskInfo: + name: write-bp-result-path + write-bp-result-path-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path-2 + dependentTasks: + - model-batch-predict-4 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-4 + taskInfo: + name: write-bp-result-path-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-8: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-3 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-3 + tasks: + feature-attribution-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-3 + dependentTasks: + - model-batch-explanation-3 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-3 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-3 + model-batch-explanation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-3 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-3 + model-batch-predict-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-5 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-5 + model-evaluation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-3 + dependentTasks: + - model-batch-predict-5 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-5 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-3 + model-evaluation-import-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-3 + dependentTasks: + - feature-attribution-3 + - model-evaluation-3 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-3 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-3 + model: + componentInputArtifact: pipelinechannel--model-upload-3-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-3 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-3-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-transform: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform + dependentTasks: + - purge-unused-features + - tabular-stats-and-example-gen + inputs: + artifacts: + dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform + check-if-binary-classification: + cachingOptions: + enableCache: true + componentRef: + name: comp-check-if-binary-classification + dependentTasks: + - tabular-stats-and-example-gen + inputs: + artifacts: + example_gen_metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + taskInfo: + name: check-if-binary-classification + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - purge-unused-features + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--purge-unused-features-output_metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - purge-unused-features + - string-not-empty + - tabular-feature-ranking-and-selection + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--purge-unused-features-output_metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: + taskOutputArtifact: + outputArtifactKey: feature_ranking + producerTask: tabular-feature-ranking-and-selection + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - automl-tabular-transform + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + taskInfo: + name: merge-materialized-splits + purge-unused-features: + cachingOptions: + enableCache: true + componentRef: + name: comp-purge-unused-features + dependentTasks: + - tabular-feature-ranking-and-selection + - tabular-stats-and-example-gen + inputs: + artifacts: + selected_features: + taskOutputArtifact: + outputArtifactKey: selected_features + producerTask: tabular-feature-ranking-and-selection + unpurged_metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + taskInfo: + name: purge-unused-features + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + runtimeValue: + constant: '' + taskInfo: + name: string-not-empty + tabular-feature-ranking-and-selection: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-feature-ranking-and-selection + dependentTasks: + - check-if-binary-classification + - tabular-stats-and-example-gen + inputs: + artifacts: + data_source: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + binary_classification: + taskOutputParameter: + outputParameterKey: Output + producerTask: check-if-binary-classification + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column_name: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: tabular-feature-ranking-and-selection + tabular-stats-and-example-gen: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-stats-and-example-gen + inputs: + parameters: + additional_experiments_json: + componentInputParameter: pipelinechannel--additional_experiments + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + quantiles: + componentInputParameter: pipelinechannel--quantiles + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column_name: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + transformations: + runtimeValue: + constant: '[]' + transformations_path: + componentInputParameter: pipelinechannel--transformations + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column_name: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabular-stats-and-example-gen + inputDefinitions: + parameters: + pipelinechannel--additional_experiments: + parameterType: STRUCT + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + parameterType: STRING + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--transformations: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-3: + executorLabel: exec-feature-attribution-3 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-3: + executorLabel: exec-model-batch-explanation-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-3: + executorLabel: exec-model-batch-predict-3 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-4: + executorLabel: exec-model-batch-predict-4 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-5: + executorLabel: exec-model-batch-predict-5 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-3: + executorLabel: exec-model-evaluation-3 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-3: + executorLabel: exec-model-evaluation-import-3 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-3: + executorLabel: exec-model-upload-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-purge-unused-features: + executorLabel: exec-purge-unused-features + inputDefinitions: + artifacts: + selected_features: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: selected feature names separated by comma. + unpurged_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: metadata generated by example gen. + outputDefinitions: + artifacts: + output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-read-input-uri: + executorLabel: exec-read-input-uri + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-read-input-uri-2: + executorLabel: exec-read-input-uri-2 + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-tabular-feature-ranking-and-selection: + executorLabel: exec-tabular-feature-ranking-and-selection + inputDefinitions: + artifacts: + data_source: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + algorithm: + defaultValue: AMI + isOptional: true + parameterType: STRING + binary_classification: + defaultValue: 'false' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key. + + If this is set, then all resources will be encrypted with the provided + + encryption key. data_source(Dataset): The input dataset artifact which + + references csv, BigQuery, or TF Records. target_column_name(str): Target + + column name of the input dataset.' + isOptional: true + parameterType: STRING + location: + description: 'Location for running the feature selection. If not set, + + default to us-central1.' + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'number of features to select by the + + algorithm. If not set, default to 1000.' + isOptional: true + parameterType: NUMBER_INTEGER + prediction_type: + defaultValue: unknown + isOptional: true + parameterType: STRING + project: + description: Project to run feature selection. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + target_column_name: + parameterType: STRING + outputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: the dictionary of feature names and feature ranking values. + selected_features: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A json array of selected feature names. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-tabular-stats-and-example-gen: + executorLabel: exec-tabular-stats-and-example-gen + inputDefinitions: + parameters: + additional_experiments: + defaultValue: '' + isOptional: true + parameterType: STRING + additional_experiments_json: + defaultValue: {} + isOptional: true + parameterType: STRUCT + data_source_bigquery_table_path: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Location for running dataset statistics and example + + generation.' + parameterType: STRING + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The prediction type. Supported values: + + "classification", "regression".' + parameterType: STRING + project: + description: 'Project to run dataset statistics and example + + generation.' + parameterType: STRING + quantiles: + defaultValue: [] + isOptional: true + parameterType: LIST + request_type: + defaultValue: COLUMN_STATS_ONLY + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + target_column_name: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Quote escaped JSON string for transformations. Each + + transformation will apply transform function to given input column. And + + the result will be used for training. When creating transformation for + + BigQuery Struct column, the column should be flattened using "." as the + + delimiter.' + parameterType: STRING + transformations_path: + defaultValue: '' + description: 'Path to a GCS file containing JSON + + string for transformations.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column_name: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The instance baseline used to calculate explanations. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + downsampled_test_split_json: + description: The downsampled test split JSON object. + parameterType: LIST + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + test_split_json: + description: The test split JSON object. + parameterType: LIST + comp-write-bp-result-path: + executorLabel: exec-write-bp-result-path + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-write-bp-result-path-2: + executorLabel: exec-write-bp-result-path-2 + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-3: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-3: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-stage-1-tuner-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-check-if-binary-classification: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _check_if_binary_classification + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _check_if_binary_classification(\n example_gen_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + ) -> str:\n \"\"\"Construct Dataset based on the batch prediction job.\n\ + \n Args:\n example_gen_metadata: metadata generated by example gen.\n\ + \n Returns:\n \"true\" if binary classification, \"false\" otherwise.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(example_gen_metadata, 'r') as f:\n metadata_path = f.read()\n\ + \ metadata = json.loads(metadata_path)\n return str(metadata['objective']\ + \ == 'binary_classification').lower()\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-3: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-importer: + importer: + artifactUri: + constant: '' + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-4: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-5: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-3: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-3: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-3: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-purge-unused-features: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _purge_unused_features + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _purge_unused_features(\n unpurged_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + \ selected_features: dsl.InputPath('SelectedFeatures'),\n output_metadata:\ + \ dsl.OutputPath('TabularExampleGenMetadata'),\n):\n \"\"\"Purge features\ + \ from metadata if not included in selected features.\n\n Args:\n unpurged_metadata:\ + \ metadata generated by example gen.\n selected_features: selected feature\ + \ names separated by comma.\n output_metadata: purged metadata.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(unpurged_metadata, 'r') as f:\n metadata_path = f.read()\n\ + \ metadata = json.loads(metadata_path)\n\n with open(selected_features,\ + \ 'r') as f:\n selected_features_path = f.read()\n features = json.loads(selected_features_path)\n\ + \n train_spec = metadata['train_spec']\n\n features_set = set(features)\n\ + \n purged_transformation_list = []\n for transformation in train_spec['transformations']:\n\ + \ if 'numeric' in transformation:\n if transformation['numeric']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'categorical' in transformation:\n if transformation['categorical']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'timestamp' in transformation:\n if transformation['timestamp']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'text' in transformation:\n if transformation['text']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_numeric' in transformation:\n if transformation['repeated_numeric']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_categorical' in transformation:\n if transformation['repeated_categorical']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_text' in transformation:\n if transformation['repeated_text']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ else:\n raise ValueError(f'unsupported transformation: {transformation}')\n\ + \n train_spec['transformations'] = purged_transformation_list\n metadata['train_spec']\ + \ = train_spec\n\n with open(output_metadata, 'w') as f:\n f.write(json.dumps(metadata))\n\ + \n" + image: python:3.7 + exec-read-input-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-read-input-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-tabular-feature-ranking-and-selection: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", + "\", \"--binary_classification=", "{{$.inputs.parameters[''binary_classification'']}}", + "\", \"--algorithm=", "{{$.inputs.parameters[''algorithm'']}}", "\", \"--feature_selection_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection/\", + \"--job_name=tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}", + "\", \"--feature_selection_result_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}", + "\", \"--selected_features_path=", "{{$.outputs.artifacts[''selected_features''].uri}}", + "\", \"--parse_json=true\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-tabular-stats-and-example-gen: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": + \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": + \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": + \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": + \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": + ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": + ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": + ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", + \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", + "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", + "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", + "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", + "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", + "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", + "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", + "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", + "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", + \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", + "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", + "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", + \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", + \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", + "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", + "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", + "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", + "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", + \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", + \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", + "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", + "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-write-bp-result-path: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 + exec-write-bp-result-path-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 +pipelineInfo: + description: The AutoML Tabular pipeline. + name: automl-tabular-feature-selection-pipeline +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--additional_experiments: + componentInputParameter: additional_experiments + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: distill_batch_predict_starting_replica_count + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + componentInputParameter: stats_and_example_gen_dataflow_machine_type + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + componentInputParameter: stats_and_example_gen_dataflow_max_num_workers + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + additional_experiments: + description: Use this field to config private preview features. + isOptional: true + parameterType: STRUCT + apply_feature_selection_tuning: + defaultValue: false + description: tuning feature selection rate if true. + isOptional: true + parameterType: BOOLEAN + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + distill_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'The prediction server machine type for + + batch predict component in the model distillation.' + isOptional: true + parameterType: STRING + distill_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The max number of prediction server + + for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + distill_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'The initial number of + + prediction server for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: number of features to select for training. + isOptional: true + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in + + GB for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for + + stats_and_example_gen component.' + isOptional: true + parameterType: STRING + stats_and_example_gen_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow + + workers for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transformations: + description: 'The path to a GCS file containing the transformations to + + apply.' + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml new file mode 100644 index 0000000000..c625e042bc --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -0,0 +1,8327 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-v2 +# Description: The AutoML Tabular pipeline v2. +# Inputs: +# apply_feature_selection_tuning: bool [Default: False] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_early_stopping: bool [Default: False] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# feature_selection_algorithm: str [Default: 'AMI'] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 25.0] +# legacy_transformations_path: str [Default: ''] +# location: str +# max_selected_features: int [Default: 1000.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# num_selected_features: int [Default: 0.0] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + - training-configurator-and-validator + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + - training-configurator-and-validator + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: check-if-is-eval + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - calculate-training-parameters + inputs: + artifacts: + dataset_stats: + componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats + instance_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + training_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema + parameters: + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + quantiles: + componentInputParameter: pipelinechannel--quantiles + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts + stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters + stage_2_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-split_example_counts: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator-2 + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + feature_ranking: + componentInputArtifact: pipelinechannel--feature-transform-engine-feature_ranking + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + tune_feature_selection_rate: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: check-if-is-eval + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + runtimeValue: + constant: 0.0 + taskInfo: + name: check-if-is-distillation + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + training-configurator-and-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator-2 + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + dataset_stats: + componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats + instance_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + training_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema + parameters: + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + quantiles: + componentInputParameter: pipelinechannel--quantiles + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts + stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + stage_2_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-split_example_counts: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - split-materialized-data + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-feature_ranking: + taskOutputArtifact: + outputArtifactKey: feature_ranking + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + legacy_transformations_path: + componentInputParameter: pipelinechannel--legacy_transformations_path + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - split-materialized-data + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + taskInfo: + name: merge-materialized-splits + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty + inputDefinitions: + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--legacy_transformations_path: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-training-configurator-and-validator-2: + executorLabel: exec-training-configurator-and-validator-2 + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-training-configurator-and-validator-2: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The AutoML Tabular pipeline v2. + name: automl-tabular-v2 +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: apply_feature_selection_tuning + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--legacy_transformations_path: + componentInputParameter: legacy_transformations_path + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--num_selected_features: + componentInputParameter: num_selected_features + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + apply_feature_selection_tuning: + defaultValue: false + description: tuning feature selection rate if true. + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size + + in GB for feature transform engine component.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type + + for feature transform engine component.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of + + Dataflow workers for feature transform engine component.' + isOptional: true + parameterType: NUMBER_INTEGER + legacy_transformations_path: + defaultValue: '' + description: Path to train spec transformations json. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features for feature selection, + + defaults to None, in which case all features are used.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to apply feature selection or not. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json new file mode 100644 index 0000000000..65e64d953d --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 70000, 90000, 110000, 130000, 150000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [4096, 8192, 16384, 32768, 65536] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 700 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json new file mode 100644 index 0000000000..e7346ea9ae --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [4096, 8192, 16384, 32768] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 200, + "max_value": 500 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json new file mode 100644 index 0000000000..90ed01db8f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [8192, 16384, 32768] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0002, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 400 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 10.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json new file mode 100644 index 0000000000..b9350f33b6 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 500 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json new file mode 100644 index 0000000000..e7143fae84 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 400 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json new file mode 100644 index 0000000000..46968c00c8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 300 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json new file mode 100644 index 0000000000..40d2e7f85b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 300 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 10000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json new file mode 100644 index 0000000000..3a75145edf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 200 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 10000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json new file mode 100644 index 0000000000..eb7a4c99f7 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 15000, 20000, 25000, 30000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 200 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 5000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 4 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json new file mode 100644 index 0000000000..6458b992d0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json @@ -0,0 +1,132 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.0005 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "optimizer_type", + "categorical_value_spec": { + "values": ["adam", "ftrl", "sgd"] + } + }, + { + "parameter_id": "l1_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "l2_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "l2_shrinkage_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "beta_1", + "discrete_value_spec": { + "values": [0.7, 0.8, 0.9] + } + }, + { + "parameter_id": "beta_2", + "discrete_value_spec": { + "values": [0.8, 0.9, 0.999] + } + }, + { + "parameter_id": "hidden_units", + "categorical_value_spec": { + "values": ["30,30,30"] + } + }, + { + "parameter_id": "use_wide", + "categorical_value_spec": { + "values": ["true", "false"] + } + }, + { + "parameter_id": "embed_categories", + "categorical_value_spec": { + "values": ["true", "false"] + } + }, + { + "parameter_id": "dnn_dropout", + "discrete_value_spec": { + "values": [0, 0.1, 0.2] + } + }, + { + "parameter_id": "dnn_learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.0005 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "dnn_optimizer_type", + "categorical_value_spec": { + "values": ["adam", "ftrl", "sgd"] + } + }, + { + "parameter_id": "dnn_l1_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_l2_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_l2_shrinkage_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_beta_1", + "discrete_value_spec": { + "values": [0.7, 0.8, 0.9] + } + }, + { + "parameter_id": "dnn_beta_2", + "discrete_value_spec": { + "values": [0.8, 0.9, 0.999] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json new file mode 100644 index 0000000000..245a738beb --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json @@ -0,0 +1,309 @@ +[{ + "parameter_id": "num_boost_round", + "discrete_value_spec": { + "values": [1, 5, 10, 15, 20] + } +}, { + "parameter_id": "early_stopping_rounds", + "discrete_value_spec": { + "values": [3, 5, 10] + } +}, { + "parameter_id": "base_score", + "discrete_value_spec": { + "values": [0.5] + } +}, { + "parameter_id": "booster", + "categorical_value_spec": { + "values": ["gbtree", "gblinear", "dart"] + }, + "conditional_parameter_specs": [{ + "parameter_spec": { + "parameter_id": "eta", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "gamma", + "discrete_value_spec": { + "values": [0, 10, 50, 100, 500, 1000] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_depth", + "integer_value_spec": { + "min_value": 6, + "max_value": 10 + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "min_child_weight", + "double_value_spec": { + "min_value": 0.0, + "max_value": 10.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_delta_step", + "discrete_value_spec": { + "values": [0.0, 1.0, 3.0, 5.0, 7.0, 9.0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "subsample", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bytree", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bylevel", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bynode", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "lambda", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_REVERSE_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart", "gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "alpha", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart", "gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "tree_method", + "categorical_value_spec": { + "values": ["auto"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "scale_pos_weight", + "discrete_value_spec": { + "values": [1.0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "refresh_leaf", + "discrete_value_spec": { + "values": [1] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "process_type", + "categorical_value_spec": { + "values": ["default"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "grow_policy", + "categorical_value_spec": { + "values": ["depthwise"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "sampling_method", + "categorical_value_spec": { + "values": ["uniform"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "sample_type", + "categorical_value_spec": { + "values": ["uniform"] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "normalize_type", + "categorical_value_spec": { + "values": ["tree"] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "rate_drop", + "discrete_value_spec": { + "values": [0.0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "one_drop", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "skip_drop", + "discrete_value_spec": { + "values": [0.0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "num_parallel_tree", + "discrete_value_spec": { + "values": [1] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "feature_selector", + "categorical_value_spec": { + "values": ["cyclic"] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "top_k", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "max_leaves", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_bin", + "discrete_value_spec": { + "values": [256] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }] +}] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py new file mode 100644 index 0000000000..c1f753bd03 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -0,0 +1,179 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Feature Ranking and Selection component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def tabular_feature_ranking_and_selection( + project: str, + location: str, + root_dir: str, + data_source: Input[Dataset], + target_column_name: str, + feature_ranking: Output[Artifact], + selected_features: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + algorithm: Optional[str] = 'AMI', + prediction_type: Optional[str] = 'unknown', + binary_classification: Optional[str] = 'false', + max_selected_features: Optional[int] = 1000, +): + # fmt: off + """Launches a feature selection task to pick top features. + + Args: + project: Project to run feature selection. + location: Location for running the feature selection. If not set, + default to us-central1. + root_dir: The Cloud Storage location to store the output. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + If this is set, then all resources will be encrypted with the provided + encryption key. data_source(Dataset): The input dataset artifact which + references csv, BigQuery, or TF Records. target_column_name(str): Target + column name of the input dataset. + max_selected_features: number of features to select by the + algorithm. If not set, default to 1000. + + Returns: + feature_ranking: the dictionary of feature names and feature ranking values. + selected_features: A json array of selected feature names. + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["feature_selection", "--data_source=', + data_source.uri, + '", "--target_column=', + target_column_name, + '", "--prediction_type=', + prediction_type, + '", "--binary_classification=', + binary_classification, + '", "--algorithm=', + algorithm, + '", "--feature_selection_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection/",' + f' "--job_name=tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--max_selected_features=', + max_selected_features, + '", "--feature_selection_result_path=', + feature_ranking.uri, + '", "--selected_features_path=', + selected_features.uri, + '", "--parse_json=true"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py new file mode 100644 index 0000000000..4f93bbf285 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -0,0 +1,976 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Feature Transform Engine component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Output + + +@dsl.container_component +def feature_transform_engine( + root_dir: str, + project: str, + location: str, + dataset_stats: Output[Artifact], + materialized_data: Output[Dataset], + transform_output: Output[Artifact], + split_example_counts: dsl.OutputPath(str), + instance_schema: Output[Artifact], + training_schema: Output[Artifact], + bigquery_train_split_uri: dsl.OutputPath(str), + bigquery_validation_split_uri: dsl.OutputPath(str), + bigquery_test_split_uri: dsl.OutputPath(str), + bigquery_downsampled_test_split_uri: dsl.OutputPath(str), + feature_ranking: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataset_level_custom_transformation_definitions: Optional[list] = [], + dataset_level_transformations: Optional[list] = [], + forecasting_time_column: Optional[str] = '', + forecasting_time_series_identifier_column: Optional[str] = '', + forecasting_time_series_attribute_columns: Optional[list] = [], + forecasting_unavailable_at_forecast_columns: Optional[list] = [], + forecasting_available_at_forecast_columns: Optional[list] = [], + forecasting_forecast_horizon: Optional[int] = -1, + forecasting_context_window: Optional[int] = -1, + forecasting_predefined_window_column: Optional[str] = '', + forecasting_window_stride_length: Optional[int] = -1, + forecasting_window_max_count: Optional[int] = -1, + forecasting_holiday_regions: Optional[list] = [], + forecasting_apply_windowing: Optional[bool] = True, + predefined_split_key: Optional[str] = '', + stratified_split_key: Optional[str] = '', + timestamp_split_key: Optional[str] = '', + training_fraction: Optional[float] = -1, + validation_fraction: Optional[float] = -1, + test_fraction: Optional[float] = -1, + tf_transform_execution_engine: Optional[str] = 'dataflow', + tf_auto_transform_features: Optional[dict] = {}, + tf_custom_transformation_definitions: Optional[list] = [], + tf_transformations_path: Optional[str] = '', + legacy_transformations_path: Optional[str] = '', + target_column: Optional[str] = '', + weight_column: Optional[str] = '', + prediction_type: Optional[str] = '', + model_type: Optional[str] = None, + multimodal_image_columns: Optional[list] = [], + multimodal_text_columns: Optional[list] = [], + run_distill: Optional[bool] = False, + run_feature_selection: Optional[bool] = False, + feature_selection_algorithm: Optional[str] = 'AMI', + materialized_examples_format: Optional[str] = 'tfrecords_gzip', + max_selected_features: Optional[int] = 1000, + data_source_csv_filenames: Optional[str] = '', + data_source_bigquery_table_path: Optional[str] = '', + bigquery_staging_full_dataset_id: Optional[str] = '', + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + autodetect_csv_schema: Optional[bool] = False, + group_columns: Optional[list] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +): + # fmt: off + """Transforms raw data to engineered features. + + FTE performs dataset level transformations, data splitting, data statistic + generation, and TensorFlow-based row level transformations on the input + dataset based on the provided transformation configuration. + + Args: + root_dir: The Cloud Storage location to store the output. + project: Project to run feature transform engine. + location: Location for the created GCP services. + dataset_level_custom_transformation_definitions: List of dataset-level custom transformation definitions. Custom, + bring-your-own dataset-level transform functions, where users can define + and import their own transform function and use it with FTE's built-in + transformations. Using custom transformations is an experimental feature + and it is currently not supported during batch prediction. + Example: .. code-block:: python [ { "transformation": "ConcatCols", + "module_path": "/path/to/custom_transform_fn_dlt.py", + "function_name": "concat_cols" } ] Using custom transform function + together with FTE's built-in transformations: .. code-block:: + python [ { "transformation": "Join", "right_table_uri": + "bq://test-project.dataset_test.table", "join_keys": + [["join_key_col", "join_key_col"]] },{ "transformation": + "ConcatCols", "cols": ["feature_1", "feature_2"], "output_col": + "feature_1_2" } ] + dataset_level_transformations: List of dataset-level + transformations. + Example: .. code-block:: python [ { "transformation": "Join", + "right_table_uri": "bq://test-project.dataset_test.table", + "join_keys": [["join_key_col", "join_key_col"]] }, ... ] Additional + information about FTE's currently supported built-in + transformations: + Join: Joins features from right_table_uri. For each join key, the + left table keys will be included and the right table keys will + be dropped. + Example: .. code-block:: python { "transformation": "Join", + "right_table_uri": "bq://test-project.dataset_test.table", + "join_keys": [["join_key_col", "join_key_col"]] } + Arguments: + right_table_uri: Right table BigQuery uri to join + with input_full_table_id. + join_keys: Features to join on. For each + nested list, the first element is a left table column + and the second is its corresponding right table column. + TimeAggregate: Creates a new feature composed of values of an + existing feature from a fixed time period ago or in the future. + Ex: A feature for sales by store 1 year ago. + Example: .. code-block:: python { "transformation": + "TimeAggregate", "time_difference": 40, + "time_difference_units": "DAY", + "time_series_identifier_columns": ["store_id"], + "time_column": "time_col", "time_difference_target_column": + "target_col", "output_column": "output_col" } + Arguments: + time_difference: Number of time_difference_units to + look back or into the future on our + time_difference_target_column. + time_difference_units: Units of time_difference to + look back or into the future on our + time_difference_target_column. Must be one of * 'DAY' * + 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * + 'YEAR' + time_series_identifier_columns: Names of the + time series identifier columns. + time_column: Name of the time column. + time_difference_target_column: Column we wish to get + the value of time_difference time_difference_units in + the past or future. + output_column: Name of our new time aggregate + feature. + is_future: Whether we wish to look + forward in time. Defaults to False. + PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum: + Performs a partition by reduce operation (one of max, + min, avg, or sum) with a fixed historic time period. Ex: + Getting avg sales (the reduce column) for each store + (partition_by_column) over the previous 5 days + (time_column, time_ago_units, and time_ago). + Example: .. code-block:: python { "transformation": + "PartitionByMax", "reduce_column": "sell_price", + "partition_by_columns": ["store_id", "state_id"], + "time_column": "date", "time_ago": 1, "time_ago_units": + "WEEK", "output_column": "partition_by_reduce_max_output" } + Arguments: + reduce_column: Column to apply the reduce operation + on. Reduce operations include the + following: Max, Min, Avg, Sum. + partition_by_columns: List of columns to + partition by. + time_column: Time column for the partition by + operation's window function. + time_ago: Number of time_ago_units to look back on + our target_column, starting from time_column + (inclusive). + time_ago_units: Units of time_ago to look back on + our target_column. Must be one of * 'DAY' * 'WEEK' + output_column: Name of our output feature. + forecasting_time_column: Forecasting time column. + forecasting_time_series_identifier_column: Forecasting + time series identifier column. + forecasting_time_series_attribute_columns: Forecasting + time series attribute columns. + forecasting_unavailable_at_forecast_columns: Forecasting + unavailable at forecast columns. + forecasting_available_at_forecast_columns: Forecasting + available at forecast columns. + forecasting_forecast_horizon: Forecasting horizon. + forecasting_context_window: Forecasting context window. + forecasting_predefined_window_column: Forecasting predefined window column. + forecasting_window_stride_length: Forecasting window stride length. + forecasting_window_max_count: Forecasting window max count. + forecasting_holiday_regions: The geographical region based on which the + holiday effect is applied in modeling by adding holiday categorical + array feature that include all holidays matching the date. This option + only allowed when data granularity is day. By default, holiday effect + modeling is disabled. To turn it on, specify the holiday region using + this option. + Top level: * 'GLOBAL' + Second level: continental regions: * 'NA': North America + * 'JAPAC': Japan and Asia Pacific + * 'EMEA': Europe, the Middle East and Africa + * 'LAC': Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. + Valid regions: * 'GLOBAL' * 'NA' * 'JAPAC' * 'EMEA' * 'LAC' * 'AE' + * 'AR' * 'AT' * 'AU' * 'BE' * 'BR' * 'CA' * 'CH' * 'CL' * 'CN' * 'CO' + * 'CZ' * 'DE' * 'DK' * 'DZ' * 'EC' * 'EE' * 'EG' * 'ES' * 'FI' * 'FR' + * 'GB' * 'GR' * 'HK' * 'HU' * 'ID' * 'IE' * 'IL' * 'IN' * 'IR' * 'IT' + * 'JP' * 'KR' * 'LV' * 'MA' * 'MX' * 'MY' * 'NG' * 'NL' * 'NO' * 'NZ' + * 'PE' * 'PH' * 'PK' * 'PL' * 'PT' * 'RO' * 'RS' * 'RU' * 'SA' * 'SE' + * 'SG' * 'SI' * 'SK' * 'TH' * 'TR' * 'TW' * 'UA' * 'US' * 'VE' * 'VN' + * 'ZA' + forecasting_apply_windowing: Whether to apply window strategy. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + timestamp_split_key: Timestamp split key. + training_fraction: Fraction of input data for training. + validation_fraction: Fraction of input data for validation. + test_fraction: Fraction of input data for testing. + tf_transform_execution_engine: Execution engine to perform + row-level TF transformations. Can be one of: "dataflow" (by default) or + "bigquery". Using "bigquery" as the execution engine is experimental and + is for allowlisted customers only. In addition, executing on "bigquery" + only supports auto transformations (i.e., specified by + tf_auto_transform_features) and will raise an error when + tf_custom_transformation_definitions or tf_transformations_path is set. + tf_auto_transform_features: Dict mapping auto and/or type-resolutions to + TF transform features. FTE will automatically configure a set of + built-in transformations for each feature based on its data statistics. + If users do not want auto type resolution, but want the set of + transformations for a given type to be automatically generated, they + may specify pre-resolved transformations types. The following type hint + dict keys are supported: * 'auto' * 'categorical' * 'numeric' * 'text' + * 'timestamp' + Example: .. code-block:: python { "auto": ["feature1"], + "categorical": ["feature2", "feature3"], } Note that the target and + weight column may not be included as an auto transformation unless + users are running forecasting. + tf_custom_transformation_definitions: List of + TensorFlow-based custom transformation definitions. Custom, + bring-your-own transform functions, where users can define and import + their own transform function and use it with FTE's built-in + transformations. + Example: .. code-block:: python [ { "transformation": "PlusOne", + "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "plus_one_transform" }, { "transformation": + "MultiplyTwo", "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "multiply_two_transform" } ] Using custom + transform function together with FTE's built-in transformations: .. + code-block:: python [ { "transformation": "CastToFloat", + "input_columns": ["feature_1"], "output_columns": ["feature_1"] },{ + "transformation": "PlusOne", "input_columns": ["feature_1"] + "output_columns": ["feature_1_plused_one"] },{ "transformation": + "MultiplyTwo", "input_columns": ["feature_1"] "output_columns": + ["feature_1_multiplied_two"] } ] + tf_transformations_path: Path to TensorFlow-based + transformation configuration. Path to a JSON file used to specified + FTE's TF transformation configurations. In the following, we provide + some sample transform configurations to demonstrate FTE's capabilities. + All transformations on input columns are explicitly specified with FTE's + built-in transformations. Chaining of multiple transformations on a + single column is also supported. For example: .. code-block:: python [ + { "transformation": "ZScale", "input_columns": ["feature_1"] }, { + "transformation": "ZScale", "input_columns": ["feature_2"] } ] + Additional information about FTE's currently supported built-in + transformations: + Datetime: Extracts datetime featues from a column containing + timestamp strings. + Example: .. code-block:: python { "transformation": + "Datetime", "input_columns": ["feature_1"], "time_format": + "%Y-%m-%d" } + Arguments: + input_columns: A list with a single column to + perform the datetime transformation on. + output_columns: Names of output + columns, one for each datetime_features element. + time_format: Datetime format string. Time format is + a combination of Date + Time Delimiter (optional) + Time + (optional) directives. Valid date directives are as + follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # + 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' # + 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' # + 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # + 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' # + 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y' + # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # + 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' # + 11302018 * '%Y%m%d' # 20181130 Valid time delimiters + are as follows * 'T' * ' ' Valid time directives are as + follows * '%H:%M' # 23:59 * '%H:%M:%S' # + 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * + '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 * + '%H:%M:%S%z', # 23:59:58+0000 + datetime_features: List of datetime + features to be extract. Each entry must be one of * + 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR' + * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * + 'SECOND' Defaults to ['YEAR', 'MONTH', 'DAY', + 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR'] + Log: Performs the natural log on a numeric column. + Example: .. code-block:: python { "transformation": "Log", + "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the log transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + ZScale: Performs Z-scale normalization on a numeric column. + Example: .. code-block:: python { "transformation": + "ZScale", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the z-scale transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + Vocabulary: Converts strings to integers, where each unique string + gets a unique integer representation. + Example: .. code-block:: python { "transformation": + "Vocabulary", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the vocabulary transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the vocabulary + only to words whose number of occurrences in the input + exceeds frequency_threshold. If not specified, all words + in the vocabulary will be included. If both top_k and + frequency_threshold are specified, a word must satisfy + both conditions to be included. Defaults to None. + Categorical: Transforms categorical columns to integer columns. + Example: .. code-block:: python { "transformation": + "Categorical", "input_columns": ["feature_1"], "top_k": 10 } + Arguments: + input_columns: A list with a single column to + perform the categorical transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. + frequency_threshold: Limit the vocabulary + only to words whose number of occurrences in the input + exceeds frequency_threshold. If not specified, all words + in the vocabulary will be included. If both top_k and + frequency_threshold are specified, a word must satisfy + both conditions to be included. + Reduce: Given a column where each entry is a numeric array, + reduces arrays according to our reduce_mode. + Example: .. code-block:: python { "transformation": + "Reduce", "input_columns": ["feature_1"], "reduce_mode": + "MEAN", "output_columns": ["feature_1_mean"] } + Arguments: + input_columns: A list with a single column to + perform the reduce transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + reduce_mode: One of * 'MAX' * 'MIN' * + 'MEAN' * 'LAST_K' Defaults to 'MEAN'. + last_k: The number of last k elements when + 'LAST_K' reduce mode is used. Defaults to 1. + SplitString: Given a column of strings, splits strings into token + arrays. + Example: .. code-block:: python { "transformation": + "SplitString", "input_columns": ["feature_1"], "separator": + "$" } + Arguments: + input_columns: A list with a single column to + perform the split string transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + separator: Separator to split input string + into tokens. Defaults to ' '. + missing_token: Missing token to use when + no string is included. Defaults to ' _MISSING_ '. + NGram: Given a column of strings, splits strings into token arrays + where each token is an integer. + Example: .. code-block:: python { "transformation": "NGram", + "input_columns": ["feature_1"], "min_ngram_size": 1, + "max_ngram_size": 2, "separator": " " } + Arguments: + input_columns: A list with a single column to + perform the n-gram transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + min_ngram_size: Minimum n-gram size. Must + be a positive number and <= max_ngram_size. Defaults to + 1. + max_ngram_size: Maximum n-gram size. Must + be a positive number and >= min_ngram_size. Defaults to + 2. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the + dictionary's vocabulary only to words whose number of + occurrences in the input exceeds frequency_threshold. If + not specified, all words in the vocabulary will be + included. If both top_k and frequency_threshold are + specified, a word must satisfy both conditions to be + included. Defaults to None. + separator: Separator to split input string + into tokens. Defaults to ' '. + missing_token: Missing token to use when + no string is included. Defaults to ' _MISSING_ '. + Clip: Given a numeric column, clips elements such that elements < + min_value are assigned min_value, and elements > max_value are + assigned max_value. + Example: .. code-block:: python { "transformation": "Clip", + "input_columns": ["col1"], "output_columns": + ["col1_clipped"], "min_value": 1., "max_value": 10., } + Arguments: + input_columns: A list with a single column to + perform the n-gram transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + min_value: Number where all values below + min_value are set to min_value. If no min_value is + provided, min clipping will not occur. Defaults to None. + max_value: Number where all values above + max_value are set to max_value If no max_value is + provided, max clipping will not occur. Defaults to None. + MultiHotEncoding: Performs multi-hot encoding on a categorical + array column. + Example: .. code-block:: python { "transformation": + "MultiHotEncoding", "input_columns": ["col1"], } The number + of classes is determened by the largest number included in + the input if it is numeric or the total number of unique + values of the input if it is type str. If the input is has + type str and an element contians separator tokens, the input + will be split at separator indices, and the each element of + the split list will be considered a seperate class. For + example, + Input: .. code-block:: python [ ["foo bar"], # Example + 0 ["foo", "bar"], # Example 1 ["foo"], # Example + 2 ["bar"], # Example 3 ] + Output (with default separator=" "): .. code-block:: python [ + [1, 1], # Example 0 [1, 1], # Example 1 + [1, 0], # Example 2 [0, 1], # Example 3 ] + Arguments: + input_columns: A list with a single column to + perform the multi-hot-encoding on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the + dictionary's vocabulary only to words whose number of + occurrences in the input exceeds frequency_threshold. If + not specified, all words in the vocabulary will be + included. If both top_k and frequency_threshold are + specified, a word must satisfy both conditions to be + included. Defaults to None. + separator: Separator to split input string + into tokens. Defaults to ' '. + MaxAbsScale: Performs maximum absolute scaling on a numeric + column. + Example: .. code-block:: python { "transformation": + "MaxAbsScale", "input_columns": ["col1"], "output_columns": + ["col1_max_abs_scaled"] } + Arguments: + input_columns: A list with a single column to + perform max-abs-scale on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + Custom: Transformations defined in + tf_custom_transformation_definitions are included here in the + TensorFlow-based transformation configuration. For example, + given the following tf_custom_transformation_definitions: .. + code-block:: python [ { "transformation": "PlusX", + "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "plus_one_transform" } ] We can include the + following transformation: .. code-block:: python { + "transformation": "PlusX", "input_columns": ["col1"], + "output_columns": ["col1_max_abs_scaled"] "x": 5 } Note that + input_columns must still be included in our arguments and + output_columns is optional. All other arguments are those + defined in custom_transform_fn.py, which includes `"x"` in this + case. See tf_custom_transformation_definitions above. + legacy_transformations_path (Optional[str]) Deprecated. Prefer + tf_auto_transform_features. Path to a GCS file containing JSON + string for legacy style transformations. Note that + legacy_transformations_path and tf_auto_transform_features + cannot both be specified. + target_column: Target column of input data. + weight_column: Weight column of input data. + prediction_type: Model prediction type. One of + "classification", "regression", "time_series". + run_distill: Whether the distillation should be applied + to the training. + run_feature_selection: Whether the feature selection + should be applied to the dataset. + feature_selection_algorithm: The algorithm of feature + selection. One of "AMI", "CMIM", "JMIM", "MRMR", default to be "AMI". + The algorithms available are: AMI(Adjusted Mutual Information): + Reference: + https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html + Arrays are not yet supported in this algorithm. CMIM(Conditional + Mutual Information Maximization): Reference paper: Mohamed + Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using + Joint Mutual Information Maximisation,” Expert Systems with + Applications, vol. 42, issue 22, 1 December 2015, Pages + 8520-8532. JMIM(Joint Mutual Information Maximization): Reference + paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature + selection using Joint Mutual Information Maximisation,” Expert + Systems with Applications, vol. 42, issue 22, 1 December 2015, + Pages 8520-8532. MRMR(MIQ Minimum-redundancy + Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long, + and Chris Ding. "Feature selection based on mutual information + criteria of max-dependency, max-relevance, and min-redundancy." + IEEE Transactions on pattern analysis and machine intelligence + 27, no. + 8: 1226-1238. + materialized_examples_format: The format to use for the + materialized examples. Should be either 'tfrecords_gzip' (default) or + 'parquet'. + max_selected_features: Maximum number of features to + select. If specified, the transform config will be purged by only using + the selected features that ranked top in the feature ranking, which has + the ranking value for all supported features. If the number of input + features is smaller than max_selected_features specified, we will still + run the feature selection process and generate the feature ranking, no + features will be excluded. The value will be set to 1000 by default if + run_feature_selection is enabled. + data_source_csv_filenames: CSV input data source to run + feature transform on. + data_source_bigquery_table_path: BigQuery input data + source to run feature transform on. + bigquery_staging_full_dataset_id: Dataset in + "projectId.datasetId" format for storing intermediate-FTE BigQuery + tables. If the specified dataset does not exist in BigQuery, FTE will + create the dataset. If no bigquery_staging_full_dataset_id is specified, + all intermediate tables will be stored in a dataset created under the + provided project in the input data source's location during FTE + execution called + "vertex_feature_transform_engine_staging_{location.replace('-', '_')}". + All tables generated by FTE will have a 30 day TTL. + model_type: Model type, which we wish to engineer features + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or + tide. Defaults to the empty value, `None`. + multimodal_image_columns: List of multimodal image + columns. Defaults to an empty list. + multimodal_text_columns: List of multimodal text + columns. Defaults to an empty list. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + Dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + autodetect_csv_schema: If True, infers the column types + when importing CSVs into BigQuery. + + Returns: + dataset_stats: The stats of the dataset. + materialized_data: The materialized dataset. + transform_output: The transform output artifact. + split_example_counts: JSON string of data split example counts for train, + validate, and test splits. + bigquery_train_split_uri: BigQuery URI for the train split to pass to the + batch prediction component during distillation. + bigquery_validation_split_uri: BigQuery URI for the validation split to + pass to the batch prediction component during distillation. + bigquery_test_split_uri: BigQuery URI for the test split to pass to the + batch prediction component during evaluation. + bigquery_downsampled_test_split_uri: BigQuery URI for the downsampled test + split to pass to the batch prediction component during batch explain. + instance_schema_path: Schema of input data to the tf_model at serving + time. + training_schema_path: Schema of input data to the tf_model at training + time. + feature_ranking: The ranking of features, all features supported in the + dataset will be included. For "AMI" algorithm, array features won't be + available in the ranking as arrays are not supported yet. + gcp_resources: GCP resources created by this component. For more details, + see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + group_columns: A list of time series attribute column names that define + the time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + command=[], + args=[ + 'feature_transform_engine', + dsl.ConcatPlaceholder(items=['--project=', project]), + dsl.ConcatPlaceholder(items=['--location=', location]), + dsl.ConcatPlaceholder( + items=[ + '--dataset_level_custom_transformation_definitions=', + dataset_level_custom_transformation_definitions, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataset_level_transformations=', + dataset_level_transformations, + ] + ), + dsl.ConcatPlaceholder( + items=['--forecasting_time_column=', forecasting_time_column] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_time_series_identifier_column=', + forecasting_time_series_identifier_column, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_time_series_attribute_columns=', + forecasting_time_series_attribute_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_unavailable_at_forecast_columns=', + forecasting_unavailable_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_available_at_forecast_columns=', + forecasting_available_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_forecast_horizon=', + forecasting_forecast_horizon, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_context_window=', + forecasting_context_window, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_predefined_window_column=', + forecasting_predefined_window_column, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_window_stride_length=', + forecasting_window_stride_length, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_window_max_count=', + forecasting_window_max_count, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_holiday_regions=', + forecasting_holiday_regions, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_apply_windowing=', + forecasting_apply_windowing, + ] + ), + dsl.ConcatPlaceholder( + items=['--predefined_split_key=', predefined_split_key] + ), + dsl.ConcatPlaceholder( + items=['--stratified_split_key=', stratified_split_key] + ), + dsl.ConcatPlaceholder( + items=['--timestamp_split_key=', timestamp_split_key] + ), + dsl.ConcatPlaceholder( + items=['--training_fraction=', training_fraction] + ), + dsl.ConcatPlaceholder( + items=['--validation_fraction=', validation_fraction] + ), + dsl.ConcatPlaceholder(items=['--test_fraction=', test_fraction]), + dsl.ConcatPlaceholder( + items=[ + '--tf_transform_execution_engine=', + tf_transform_execution_engine, + ] + ), + dsl.IfPresentPlaceholder( + input_name='tf_auto_transform_features', + then=dsl.ConcatPlaceholder( + items=[ + '--tf_auto_transform_features=', + tf_auto_transform_features, + ] + ), + ), + dsl.ConcatPlaceholder( + items=[ + '--tf_custom_transformation_definitions=', + tf_custom_transformation_definitions, + ] + ), + dsl.ConcatPlaceholder( + items=['--tf_transformations_path=', tf_transformations_path] + ), + dsl.ConcatPlaceholder( + items=[ + '--legacy_transformations_path=', + legacy_transformations_path, + ] + ), + dsl.ConcatPlaceholder( + items=['--data_source_csv_filenames=', data_source_csv_filenames] + ), + dsl.ConcatPlaceholder( + items=[ + '--data_source_bigquery_table_path=', + data_source_bigquery_table_path, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_staging_full_dataset_id=', + bigquery_staging_full_dataset_id, + ] + ), + dsl.ConcatPlaceholder(items=['--target_column=', target_column]), + dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), + dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), + dsl.IfPresentPlaceholder( + input_name='model_type', + then=dsl.ConcatPlaceholder(items=['--model_type=', model_type]), + ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_image_columns=', + multimodal_image_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_text_columns=', + multimodal_text_columns, + ] + ), + dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), + dsl.ConcatPlaceholder( + items=['--run_feature_selection=', run_feature_selection] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_examples_format=', + materialized_examples_format, + ] + ), + dsl.ConcatPlaceholder( + items=['--max_selected_features=', max_selected_features] + ), + dsl.ConcatPlaceholder( + items=[ + '--feature_selection_staging_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection_staging_dir', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--feature_selection_algorithm=', + feature_selection_algorithm, + ] + ), + dsl.ConcatPlaceholder( + items=['--feature_ranking_path=', feature_ranking.uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.txt', + ] + ), + dsl.ConcatPlaceholder( + items=['--stats_result_path=', dataset_stats.uri] + ), + dsl.ConcatPlaceholder( + items=['--transform_output_artifact_path=', transform_output.uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--transform_output_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_examples_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--export_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/export', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized_data', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_artifact_path=', + materialized_data.uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_train_split_uri_path=', + bigquery_train_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_validation_split_uri_path=', + bigquery_validation_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=['--bigquery_test_split_uri_path=', bigquery_test_split_uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_downsampled_test_split_uri_path=', + bigquery_downsampled_test_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=['--split_example_counts_path=', split_example_counts] + ), + dsl.ConcatPlaceholder( + items=['--instance_schema_path=', instance_schema.path] + ), + dsl.ConcatPlaceholder( + items=['--training_schema_path=', training_schema.path] + ), + f'--job_name=feature-transform-engine-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + dsl.ConcatPlaceholder(items=['--dataflow_project=', project]), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_staging_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_tmp_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp', + ] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_max_num_workers=', dataflow_max_num_workers] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_machine_type=', dataflow_machine_type] + ), + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + dsl.ConcatPlaceholder( + items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + ] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_use_public_ips=', dataflow_use_public_ips] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_service_account=', dataflow_service_account] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_kms_key=', encryption_spec_key_name] + ), + dsl.ConcatPlaceholder( + items=['--autodetect_csv_schema=', autodetect_csv_schema] + ), + dsl.ConcatPlaceholder(items=['--gcp_resources_path=', gcp_resources]), + dsl.IfPresentPlaceholder( + input_name='group_columns', + then=dsl.ConcatPlaceholder( + items=['--group_columns=', group_columns] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_total_weight', + then=dsl.ConcatPlaceholder( + items=['--group_total_weight=', group_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=['--temporal_total_weight=', temporal_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=[ + '--group_temporal_total_weight=', + group_temporal_total_weight, + ] + ), + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..a9b09479a8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -0,0 +1,236 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabnet Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input + + +@dsl.container_component +def tabnet_hyperparameter_tuning_job( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + instance_schema_uri: dsl.OutputPath(str), + prediction_schema_uri: dsl.OutputPath(str), + trials: dsl.OutputPath(str), + prediction_docker_uri_output: dsl.OutputPath(str), + execution_metrics: dsl.OutputPath(dict), + weight_column: Optional[str] = '', + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + eval_frequency_secs: Optional[int] = 600, + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes TabNet hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + study_spec_metric_id: Metric to optimize, possible + values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + instance_schema_uri: The path to the instance schema. + prediction_schema_uri: The path to the prediction schema. + trials: The path to the hyperparameter tuning trials + prediction_docker_uri_output: The URI of the prediction container. + execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJobWithMetrics', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--execution_metrics', + execution_metrics, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabnet-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ( + ', "trial_job_spec": {"worker_pool_specs":' + ' [{"replica_count":"' + ), + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--prediction_docker_uri_artifact_path=', + prediction_docker_uri_output, + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--instance_schema_path=', + instance_schema_uri, + '", "--prediction_schema_path=', + prediction_schema_uri, + '", "--trials_path=', + trials, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--measurement_selection_type=', + study_spec_measurement_selection_type, + '", "--metric_goal=', + study_spec_metric_goal, + '", "--eval_steps=', + eval_steps, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '", "--generate_feature_importance=true"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..e687acd6bf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4661 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-tabnet-hyperparameter-tuning-job +# Description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - model-evaluation + inputs: + artifacts: + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: TabNet Hyperparameter Tuning + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + - model-upload + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - tabnet-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: tabnet-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_uri + producerTask: tabnet-hyperparameter-tuning-job + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_output + producerTask: tabnet-hyperparameter-tuning-job + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_uri + producerTask: tabnet-hyperparameter-tuning-job + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials + producerTask: tabnet-hyperparameter-tuning-job + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-tabnet-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-tabnet-study-spec-parameters + dependentTasks: + - training-configurator-and-validator + inputs: + artifacts: + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-tabnet-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + tabnet-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabnet-hyperparameter-tuning-job + dependentTasks: + - feature-transform-engine + - get-tabnet-study-spec-parameters + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + cache_data: + componentInputParameter: pipelinechannel--cache_data + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-tabnet-study-spec-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabnet-hyperparameter-tuning-job + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-tabnet-study-spec-parameters: + executorLabel: exec-get-tabnet-study-spec-parameters + inputDefinitions: + artifacts: + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Metadata generated by example gen. + parameters: + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to produce. + + ''classification'' or ''regression''.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-tabnet-hyperparameter-tuning-job: + executorLabel: exec-tabnet-hyperparameter-tuning-job + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible + + values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', + ''auc'', ''precision'', ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + execution_metrics: + description: Core metrics in dictionary of hyperparameter tuning job execution. + parameterType: STRUCT + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + instance_schema_uri: + description: The path to the instance schema. + parameterType: STRING + prediction_docker_uri_output: + description: The URI of the prediction container. + parameterType: STRING + prediction_schema_uri: + description: The path to the prediction schema. + parameterType: STRING + trials: + description: The path to the hyperparameter tuning trials + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-tabnet-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_tabnet_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_tabnet_study_spec_parameters(\n metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + \ max_trial_count: int,\n prediction_type: str,\n study_spec_parameters_override:\ + \ list, # Required for KFP validation; pylint:disable=g-bare-generic\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ + \ study_spec_parameters for a TabNet hyperparameter tuning job.\n\n Args:\n\ + \ metadata: Metadata generated by example gen.\n max_trial_count:\ + \ The desired total number of trials.\n prediction_type: The type of\ + \ prediction the model is to produce.\n 'classification' or 'regression'.\n\ + \ study_spec_parameters_override: List of dictionaries representing parameters\n\ + \ to optimize. The dictionary key is the parameter_id, which is passed\ + \ to\n training job as a command line argument, and the dictionary\ + \ value is the\n parameter specification of the metric.\n\n Returns:\n\ + \ List of final Vizier study_spec_parameters of type ParameterSpec.\n\ + \ \"\"\"\n # Define different search space constants\n tabnet_params_small_data_small_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [5000, 10000, 15000, 20000, 25000, 30000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 5000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.125,\ + \ 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 4},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {'values':\ + \ ['weighted_cross_entropy']},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_small_data_medium_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ + \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {\n \ + \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ + \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ + \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_small_data_large_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n \ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.00007,\ + \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n {\n\ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 300},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 7},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.0000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ + \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {\n \ + \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ + \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ + \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false', 'true']},\n \ + \ },\n ]\n tabnet_params_medium_data_small_search_space = [\n \ + \ {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 4096, 8192, 16384]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 300},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_medium_data_medium_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 400},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_medium_data_large_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ + \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 500},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 8},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false', 'true']},\n },\n ]\n tabnet_params_large_data_small_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [8192, 16384, 32768]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0002,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 400},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 10.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_large_data_medium_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [4096, 8192, 16384, 32768]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec': {'min_value':\ + \ 0.0001, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 200, 'max_value': 500},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 7},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_large_data_large_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 70000, 90000, 110000, 130000, 150000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [4096, 8192, 16384, 32768, 65536]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ + \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 700},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 8},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false', 'true']},\n },\n ]\n search_spaces = {\n 'tabnet_params_small_data_small_search_space':\ + \ (\n tabnet_params_small_data_small_search_space\n ),\n \ + \ 'tabnet_params_small_data_medium_search_space': (\n tabnet_params_small_data_medium_search_space\n\ + \ ),\n 'tabnet_params_small_data_large_search_space': (\n \ + \ tabnet_params_small_data_large_search_space\n ),\n 'tabnet_params_medium_data_small_search_space':\ + \ (\n tabnet_params_medium_data_small_search_space\n ),\n\ + \ 'tabnet_params_medium_data_medium_search_space': (\n tabnet_params_medium_data_medium_search_space\n\ + \ ),\n 'tabnet_params_medium_data_large_search_space': (\n \ + \ tabnet_params_medium_data_large_search_space\n ),\n 'tabnet_params_large_data_small_search_space':\ + \ (\n tabnet_params_large_data_small_search_space\n ),\n \ + \ 'tabnet_params_large_data_medium_search_space': (\n tabnet_params_large_data_medium_search_space\n\ + \ ),\n 'tabnet_params_large_data_large_search_space': (\n \ + \ tabnet_params_large_data_large_search_space\n ),\n }\n\n #\ + \ pylint: disable=g-import-not-at-top,import-outside-toplevel\n import\ + \ json\n import warnings\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \ with open(metadata, 'r') as f:\n metadata_path = f.read()\n metadata\ + \ = json.loads(metadata_path)\n # Calculate dataset size bucket. One of\ + \ 'small' (< 1M rows),\n # 'medium' (1M - 100M rows), or 'large' (> 100M\ + \ rows)\n num_rows = (\n metadata['num_examples']['train']\n \ + \ + metadata['num_examples']['valid']\n + metadata['num_examples']['test']\n\ + \ )\n dataset_size_bucket = 'medium'\n if num_rows < 10000000:\n dataset_size_bucket\ + \ = 'small'\n elif num_rows > 100000000:\n dataset_size_bucket = 'large'\n\ + \n # Calculate search space bucket using max_trial_count.\n # One of 'small'\ + \ (< 10), medium (1 - 100), and large (> 100).\n search_space = 'medium'\n\ + \ if max_trial_count < 10:\n search_space = 'small'\n elif max_trial_count\ + \ > 100:\n search_space = 'large'\n\n # Get params for classification.\n\ + \ params = search_spaces[\n f'tabnet_params_{dataset_size_bucket}_data_{search_space}_search_space'\n\ + \ ]\n\n # Format for regression. To get regression study_spec_parameters,\ + \ we need\n # to set `loss_function_type` to \u2018mae\u2019 (\u2018mae\u2019\ + \ and \u2018mse\u2019 for 'large'\n # search space), remove the `alpha_focal_loss`,\ + \ `gamma_focal_loss`\n # and `class_weight` parameters and increase the\ + \ max for\n # `sparsity_loss_weight` to 100.\n if prediction_type == 'regression':\n\ + \ formatted_params = []\n for param in params:\n if param['parameter_id']\ + \ in [\n 'alpha_focal_loss',\n 'gamma_focal_loss',\n \ + \ 'class_weight',\n ]:\n continue\n elif param['parameter_id']\ + \ == 'sparsity_loss_weight':\n param['double_value_spec']['max_value']\ + \ = 100\n elif param['parameter_id'] == 'loss_function_type':\n \ + \ if search_space == 'large':\n param['categorical_value_spec']['values']\ + \ = ['mae', 'mse']\n else:\n param['categorical_value_spec']['values']\ + \ = ['mae']\n formatted_params.append(param)\n else:\n formatted_params\ + \ = params\n\n # Create parameter_id -> parameter_config dictionary for\ + \ params to override\n # and override parameters.\n override_params =\ + \ {}\n for param in study_spec_parameters_override:\n override_params[param['parameter_id']]\ + \ = param\n\n study_spec_parameters = []\n for param in formatted_params:\n\ + \ study_spec_parameters.append(\n override_params.get(param['parameter_id'],\ + \ param)\n )\n\n extra_overrides = set(override_params) - set(\n \ + \ p['parameter_id'] for p in params\n )\n if extra_overrides:\n extra_override_str\ + \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ + \ {extra_override_str} were not found in the params and '\n 'will\ + \ be ignored.'\n )\n\n return study_spec_parameters\n\n" + image: python:3.7-slim + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-tabnet-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJobWithMetrics + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --execution_metrics + - '{{$.outputs.parameters[''execution_metrics''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabnet-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", + "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", + "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", + "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", + \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. + name: automl-tabular-tabnet-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible values: [ ''loss'', + + ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', + + ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py new file mode 100644 index 0000000000..e0ceeb08f9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -0,0 +1,300 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabnet Trainer component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def tabnet_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument + weight_column: Optional[str] = '', + max_steps: Optional[int] = -1, + max_train_secs: Optional[int] = -1, + large_category_dim: Optional[int] = 1, + large_category_thresh: Optional[int] = 300, + yeo_johnson_transform: Optional[bool] = True, + feature_dim: Optional[int] = 64, + feature_dim_ratio: Optional[float] = 0.5, + num_decision_steps: Optional[int] = 6, + relaxation_factor: Optional[float] = 1.5, + decay_every: Optional[float] = 100, + decay_rate: Optional[float] = 0.95, + gradient_thresh: Optional[float] = 2000, + sparsity_loss_weight: Optional[float] = 1e-05, + batch_momentum: Optional[float] = 0.95, + batch_size_ratio: Optional[float] = 0.25, + num_transformer_layers: Optional[int] = 4, + num_transformer_layers_ratio: Optional[float] = 0.25, + class_weight: Optional[float] = 1.0, + loss_function_type: Optional[str] = 'default', + alpha_focal_loss: Optional[float] = 0.25, + gamma_focal_loss: Optional[float] = 2.0, + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + batch_size: Optional[int] = 100, + measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + optimization_metric: Optional[str] = '', + eval_frequency_secs: Optional[int] = 600, + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains a TabNet model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the + trainer for. + learning_rate: The learning rate used by the linear optimizer. + large_category_dim: Embedding dimension for categorical + feature with large number of categories. + large_category_thresh: Threshold for number of categories + to apply large_category_dim embedding dimension to. + yeo_johnson_transform: Enables trainable Yeo-Johnson + power transform. + feature_dim: Dimensionality of the hidden representation + in feature transformation block. + feature_dim_ratio: The ratio of output dimension + (dimensionality of the outputs of each decision step) to feature + dimension. + num_decision_steps: Number of sequential decision steps. + relaxation_factor: Relaxation factor that promotes the + reuse of each feature at different decision steps. When it is 1, a + feature is enforced to be used only at one decision step and as it + increases, more flexibility is provided to use a feature at multiple + decision steps. + decay_every: Number of iterations for periodically + applying learning rate decaying. + decay_rate: Learning rate decaying. + gradient_thresh: Threshold for the norm of gradients for clipping. + sparsity_loss_weight: Weight of the loss for sparsity + regularization (increasing it will yield more sparse feature selection). + batch_momentum: Momentum in ghost batch normalization. + batch_size_ratio: The ratio of virtual batch size (size + of the ghost batch normalization) to batch size. + num_transformer_layers: The number of transformer layers + for each decision step. used only at one decision step and as it + increases, more flexibility is provided to use a feature at multiple + decision steps. + num_transformer_layers_ratio: The ratio of shared + transformer layer to transformer layers. + class_weight: The class weight is used to computes a + weighted cross entropy which is helpful in classify imbalanced dataset. + Only used for classification. + loss_function_type: Loss function type. Loss function in + classification [cross_entropy, weighted_cross_entropy, focal_loss], + default is cross_entropy. Loss function in regression: [rmse, mae, mse], + default is mse. + alpha_focal_loss: Alpha value (balancing factor) in + focal_loss function. Only used for classification. + gamma_focal_loss: Gamma value (modulating factor) for + focal loss for focal loss. Only used for classification. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use + if/when the service automatically selects the final measurement from + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabnet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--max_steps=', + max_steps, + '", "--max_train_secs=', + max_train_secs, + '", "--learning_rate=', + learning_rate, + '", "--large_category_dim=', + large_category_dim, + '", "--large_category_thresh=', + large_category_thresh, + '", "--yeo_johnson_transform=', + yeo_johnson_transform, + '", "--feature_dim=', + feature_dim, + '", "--feature_dim_ratio=', + feature_dim_ratio, + '", "--num_decision_steps=', + num_decision_steps, + '", "--relaxation_factor=', + relaxation_factor, + '", "--decay_every=', + decay_every, + '", "--decay_rate=', + decay_rate, + '", "--gradient_thresh=', + gradient_thresh, + '", "--sparsity_loss_weight=', + sparsity_loss_weight, + '", "--batch_momentum=', + batch_momentum, + '", "--batch_size_ratio=', + batch_size_ratio, + '", "--num_transformer_layers=', + num_transformer_layers, + '", "--num_transformer_layers_ratio=', + num_transformer_layers_ratio, + '", "--class_weight=', + class_weight, + '", "--loss_function_type=', + loss_function_type, + '", "--alpha_focal_loss=', + alpha_focal_loss, + '", "--gamma_focal_loss=', + gamma_focal_loss, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--batch_size=', + batch_size, + '", "--measurement_selection_type=', + measurement_selection_type, + '", "--optimization_metric=', + optimization_metric, + '", "--eval_frequency_secs=', + eval_frequency_secs, + ( + '", "--generate_feature_importance=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml new file mode 100644 index 0000000000..32f5b41c9e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -0,0 +1,4302 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-tabnet-trainer +# Description: The TabNet training pipeline. +# Inputs: +# alpha_focal_loss: float [Default: 0.25] +# batch_momentum: float [Default: 0.95] +# batch_size: int [Default: 100.0] +# batch_size_ratio: float [Default: 0.25] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# class_weight: float [Default: 1.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# decay_every: float [Default: 100.0] +# decay_rate: float [Default: 0.95] +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_dim: int [Default: 64.0] +# feature_dim_ratio: float [Default: 0.5] +# feature_selection_algorithm: str [Default: 'AMI'] +# gamma_focal_loss: float [Default: 2.0] +# gradient_thresh: float [Default: 2000.0] +# large_category_dim: int [Default: 1.0] +# large_category_thresh: int [Default: 300.0] +# learning_rate: float +# location: str +# loss_function_type: str [Default: 'default'] +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_selected_features: int [Default: -1.0] +# max_steps: int [Default: -1.0] +# max_train_secs: int [Default: -1.0] +# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# num_decision_steps: int [Default: 6.0] +# num_transformer_layers: int [Default: 4.0] +# num_transformer_layers_ratio: float [Default: 0.25] +# optimization_metric: str [Default: ''] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# relaxation_factor: float [Default: 1.5] +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# sparsity_loss_weight: float [Default: 1e-05] +# stratified_split_key: str [Default: ''] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# yeo_johnson_transform: bool [Default: True] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--tabnet-trainer-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - model-evaluation + inputs: + artifacts: + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: TabNet Trainer + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + pipelinechannel--tabnet-trainer-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - tabnet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - model-upload + - tabnet-trainer + inputs: + artifacts: + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + pipelinechannel--tabnet-trainer-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - tabnet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + tabnet-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabnet-trainer + dependentTasks: + - feature-transform-engine + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + alpha_focal_loss: + componentInputParameter: pipelinechannel--alpha_focal_loss + batch_momentum: + componentInputParameter: pipelinechannel--batch_momentum + batch_size: + componentInputParameter: pipelinechannel--batch_size + batch_size_ratio: + componentInputParameter: pipelinechannel--batch_size_ratio + cache_data: + componentInputParameter: pipelinechannel--cache_data + class_weight: + componentInputParameter: pipelinechannel--class_weight + decay_every: + componentInputParameter: pipelinechannel--decay_every + decay_rate: + componentInputParameter: pipelinechannel--decay_rate + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + feature_dim: + componentInputParameter: pipelinechannel--feature_dim + feature_dim_ratio: + componentInputParameter: pipelinechannel--feature_dim_ratio + gamma_focal_loss: + componentInputParameter: pipelinechannel--gamma_focal_loss + gradient_thresh: + componentInputParameter: pipelinechannel--gradient_thresh + large_category_dim: + componentInputParameter: pipelinechannel--large_category_dim + large_category_thresh: + componentInputParameter: pipelinechannel--large_category_thresh + learning_rate: + componentInputParameter: pipelinechannel--learning_rate + location: + componentInputParameter: pipelinechannel--location + loss_function_type: + componentInputParameter: pipelinechannel--loss_function_type + max_steps: + componentInputParameter: pipelinechannel--max_steps + max_train_secs: + componentInputParameter: pipelinechannel--max_train_secs + measurement_selection_type: + componentInputParameter: pipelinechannel--measurement_selection_type + num_decision_steps: + componentInputParameter: pipelinechannel--num_decision_steps + num_transformer_layers: + componentInputParameter: pipelinechannel--num_transformer_layers + num_transformer_layers_ratio: + componentInputParameter: pipelinechannel--num_transformer_layers_ratio + optimization_metric: + componentInputParameter: pipelinechannel--optimization_metric + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + relaxation_factor: + componentInputParameter: pipelinechannel--relaxation_factor + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + sparsity_loss_weight: + componentInputParameter: pipelinechannel--sparsity_loss_weight + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + yeo_johnson_transform: + componentInputParameter: pipelinechannel--yeo_johnson_transform + taskInfo: + name: tabnet-trainer + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--alpha_focal_loss: + parameterType: NUMBER_DOUBLE + pipelinechannel--batch_momentum: + parameterType: NUMBER_DOUBLE + pipelinechannel--batch_size: + parameterType: NUMBER_INTEGER + pipelinechannel--batch_size_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--class_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--decay_every: + parameterType: NUMBER_DOUBLE + pipelinechannel--decay_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_dim: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_dim_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--gamma_focal_loss: + parameterType: NUMBER_DOUBLE + pipelinechannel--gradient_thresh: + parameterType: NUMBER_DOUBLE + pipelinechannel--large_category_dim: + parameterType: NUMBER_INTEGER + pipelinechannel--large_category_thresh: + parameterType: NUMBER_INTEGER + pipelinechannel--learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--location: + parameterType: STRING + pipelinechannel--loss_function_type: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--max_train_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--measurement_selection_type: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_decision_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--num_transformer_layers: + parameterType: NUMBER_INTEGER + pipelinechannel--num_transformer_layers_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_metric: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--relaxation_factor: + parameterType: NUMBER_DOUBLE + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--sparsity_loss_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + pipelinechannel--yeo_johnson_transform: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-tabnet-trainer: + executorLabel: exec-tabnet-trainer + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + alpha_focal_loss: + defaultValue: 0.25 + description: 'Alpha value (balancing factor) in + + focal_loss function. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + batch_momentum: + defaultValue: 0.95 + description: Momentum in ghost batch normalization. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + batch_size_ratio: + defaultValue: 0.25 + description: 'The ratio of virtual batch size (size + + of the ghost batch normalization) to batch size.' + isOptional: true + parameterType: NUMBER_DOUBLE + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + class_weight: + defaultValue: 1.0 + description: 'The class weight is used to computes a + + weighted cross entropy which is helpful in classify imbalanced dataset. + + Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_every: + defaultValue: 100.0 + description: 'Number of iterations for periodically + + applying learning rate decaying.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_rate: + defaultValue: 0.95 + description: Learning rate decaying. + isOptional: true + parameterType: NUMBER_DOUBLE + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim: + defaultValue: 64.0 + description: 'Dimensionality of the hidden representation + + in feature transformation block.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim_ratio: + defaultValue: 0.5 + description: 'The ratio of output dimension + + (dimensionality of the outputs of each decision step) to feature + + dimension.' + isOptional: true + parameterType: NUMBER_DOUBLE + gamma_focal_loss: + defaultValue: 2.0 + description: 'Gamma value (modulating factor) for + + focal loss for focal loss. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + gradient_thresh: + defaultValue: 2000.0 + description: Threshold for the norm of gradients for clipping. + isOptional: true + parameterType: NUMBER_DOUBLE + large_category_dim: + defaultValue: 1.0 + description: 'Embedding dimension for categorical + + feature with large number of categories.' + isOptional: true + parameterType: NUMBER_INTEGER + large_category_thresh: + defaultValue: 300.0 + description: 'Threshold for number of categories + + to apply large_category_dim embedding dimension to.' + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + loss_function_type: + defaultValue: default + description: 'Loss function type. Loss function in + + classification [cross_entropy, weighted_cross_entropy, focal_loss], + + default is cross_entropy. Loss function in regression: [rmse, mae, mse], + + default is mse.' + isOptional: true + parameterType: STRING + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: 'Amount of time in seconds to run the + + trainer for.' + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use + + if/when the service automatically selects the final measurement from + + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + + or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + num_decision_steps: + defaultValue: 6.0 + description: Number of sequential decision steps. + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers: + defaultValue: 4.0 + description: 'The number of transformer layers + + for each decision step. used only at one decision step and as it + + increases, more flexibility is provided to use a feature at multiple + + decision steps.' + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers_ratio: + defaultValue: 0.25 + description: 'The ratio of shared + + transformer layer to transformer layers.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + relaxation_factor: + defaultValue: 1.5 + description: 'Relaxation factor that promotes the + + reuse of each feature at different decision steps. When it is 1, a + + feature is enforced to be used only at one decision step and as it + + increases, more flexibility is provided to use a feature at multiple + + decision steps.' + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + sparsity_loss_weight: + defaultValue: 1.0e-05 + description: 'Weight of the loss for sparsity + + regularization (increasing it will yield more sparse feature selection).' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + yeo_johnson_transform: + defaultValue: true + description: 'Enables trainable Yeo-Johnson + + power transform.' + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-tabnet-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabnet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", + "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", + "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", + "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--large_category_dim=", + "{{$.inputs.parameters[''large_category_dim'']}}", "\", \"--large_category_thresh=", + "{{$.inputs.parameters[''large_category_thresh'']}}", "\", \"--yeo_johnson_transform=", + "{{$.inputs.parameters[''yeo_johnson_transform'']}}", "\", \"--feature_dim=", + "{{$.inputs.parameters[''feature_dim'']}}", "\", \"--feature_dim_ratio=", + "{{$.inputs.parameters[''feature_dim_ratio'']}}", "\", \"--num_decision_steps=", + "{{$.inputs.parameters[''num_decision_steps'']}}", "\", \"--relaxation_factor=", + "{{$.inputs.parameters[''relaxation_factor'']}}", "\", \"--decay_every=", + "{{$.inputs.parameters[''decay_every'']}}", "\", \"--decay_rate=", "{{$.inputs.parameters[''decay_rate'']}}", + "\", \"--gradient_thresh=", "{{$.inputs.parameters[''gradient_thresh'']}}", + "\", \"--sparsity_loss_weight=", "{{$.inputs.parameters[''sparsity_loss_weight'']}}", + "\", \"--batch_momentum=", "{{$.inputs.parameters[''batch_momentum'']}}", + "\", \"--batch_size_ratio=", "{{$.inputs.parameters[''batch_size_ratio'']}}", + "\", \"--num_transformer_layers=", "{{$.inputs.parameters[''num_transformer_layers'']}}", + "\", \"--num_transformer_layers_ratio=", "{{$.inputs.parameters[''num_transformer_layers_ratio'']}}", + "\", \"--class_weight=", "{{$.inputs.parameters[''class_weight'']}}", "\", + \"--loss_function_type=", "{{$.inputs.parameters[''loss_function_type'']}}", + "\", \"--alpha_focal_loss=", "{{$.inputs.parameters[''alpha_focal_loss'']}}", + "\", \"--gamma_focal_loss=", "{{$.inputs.parameters[''gamma_focal_loss'']}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", + "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", + "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The TabNet training pipeline. + name: automl-tabular-tabnet-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--alpha_focal_loss: + componentInputParameter: alpha_focal_loss + pipelinechannel--batch_momentum: + componentInputParameter: batch_momentum + pipelinechannel--batch_size: + componentInputParameter: batch_size + pipelinechannel--batch_size_ratio: + componentInputParameter: batch_size_ratio + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--class_weight: + componentInputParameter: class_weight + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--decay_every: + componentInputParameter: decay_every + pipelinechannel--decay_rate: + componentInputParameter: decay_rate + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_dim: + componentInputParameter: feature_dim + pipelinechannel--feature_dim_ratio: + componentInputParameter: feature_dim_ratio + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--gamma_focal_loss: + componentInputParameter: gamma_focal_loss + pipelinechannel--gradient_thresh: + componentInputParameter: gradient_thresh + pipelinechannel--large_category_dim: + componentInputParameter: large_category_dim + pipelinechannel--large_category_thresh: + componentInputParameter: large_category_thresh + pipelinechannel--learning_rate: + componentInputParameter: learning_rate + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--loss_function_type: + componentInputParameter: loss_function_type + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_steps: + componentInputParameter: max_steps + pipelinechannel--max_train_secs: + componentInputParameter: max_train_secs + pipelinechannel--measurement_selection_type: + componentInputParameter: measurement_selection_type + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--num_decision_steps: + componentInputParameter: num_decision_steps + pipelinechannel--num_transformer_layers: + componentInputParameter: num_transformer_layers + pipelinechannel--num_transformer_layers_ratio: + componentInputParameter: num_transformer_layers_ratio + pipelinechannel--optimization_metric: + componentInputParameter: optimization_metric + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--relaxation_factor: + componentInputParameter: relaxation_factor + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--sparsity_loss_weight: + componentInputParameter: sparsity_loss_weight + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + pipelinechannel--yeo_johnson_transform: + componentInputParameter: yeo_johnson_transform + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + alpha_focal_loss: + defaultValue: 0.25 + description: 'Alpha value (balancing factor) in focal_loss function. + + Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + batch_momentum: + defaultValue: 0.95 + description: Momentum in ghost batch normalization. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + batch_size_ratio: + defaultValue: 0.25 + description: 'The ratio of virtual batch size (size of the ghost batch + + normalization) to batch size.' + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + class_weight: + defaultValue: 1.0 + description: 'The class weight is used to computes a weighted cross entropy + + which is helpful in classify imbalanced dataset. Only used for + + classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + decay_every: + defaultValue: 100.0 + description: 'Number of iterations for periodically applying learning rate + + decaying.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_rate: + defaultValue: 0.95 + description: Learning rate decaying. + isOptional: true + parameterType: NUMBER_DOUBLE + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim: + defaultValue: 64.0 + description: 'Dimensionality of the hidden representation in feature + + transformation block.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim_ratio: + defaultValue: 0.5 + description: 'The ratio of output dimension (dimensionality of the + + outputs of each decision step) to feature dimension.' + isOptional: true + parameterType: NUMBER_DOUBLE + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + gamma_focal_loss: + defaultValue: 2.0 + description: 'Gamma value (modulating factor) for focal loss for focal + + loss. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + gradient_thresh: + defaultValue: 2000.0 + description: Threshold for the norm of gradients for clipping. + isOptional: true + parameterType: NUMBER_DOUBLE + large_category_dim: + defaultValue: 1.0 + description: 'Embedding dimension for categorical feature with large + + number of categories.' + isOptional: true + parameterType: NUMBER_INTEGER + large_category_thresh: + defaultValue: 300.0 + description: 'Threshold for number of categories to apply + + large_category_dim embedding dimension to.' + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + loss_function_type: + defaultValue: default + description: 'Loss function type. Loss function in classification + + [cross_entropy, weighted_cross_entropy, focal_loss], default is + + cross_entropy. Loss function in regression: [rmse, mae, mse], default is + + mse.' + isOptional: true + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: Amount of time in seconds to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use if/when the service + + automatically selects the final measurement from previously reported + + intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + num_decision_steps: + defaultValue: 6.0 + description: Number of sequential decision steps. + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers: + defaultValue: 4.0 + description: 'The number of transformer layers for each decision + + step. used only at one decision step and as it increases, more flexibility + + is provided to use a feature at multiple decision steps.' + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers_ratio: + defaultValue: 0.25 + description: 'The ratio of shared transformer layer to + + transformer layers.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + relaxation_factor: + defaultValue: 1.5 + description: 'Relaxation factor that promotes the reuse of each feature + + at different decision steps. When it is 1, a feature is enforced to be + + used only at one decision step and as it increases, more flexibility is + + provided to use a feature at multiple decision steps.' + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + sparsity_loss_weight: + defaultValue: 1.0e-05 + description: 'Weight of the loss for sparsity regularization + + (increasing it will yield more sparse feature selection).' + isOptional: true + parameterType: NUMBER_DOUBLE + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + yeo_johnson_transform: + defaultValue: true + description: Enables trainable Yeo-Johnson power transform. + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py new file mode 100644 index 0000000000..096c5e378c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py @@ -0,0 +1,3360 @@ +"""Util functions for AutoML Tabular pipeline.""" + +import json +import os +import pathlib +from typing import Any, Dict, List, Optional, Tuple, Union +import uuid +import warnings + +_DEFAULT_NUM_PARALLEL_TRAILS = 35 +_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 +_NUM_FOLDS = 5 +_DISTILL_TOTAL_TRIALS = 100 +_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 +_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 +_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 +_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 +_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' +_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 +_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 +_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 + +# Needed because we reference the AutoML Tabular V1 pipeline. +_GCPC_STAGING_PATH = pathlib.Path( + __file__ +).parent.parent.parent.parent.resolve() +_GCPC_GA_TABULAR_PATH = str(_GCPC_STAGING_PATH / 'v1' / 'automl' / 'tabular') + + +def _update_parameters( + parameter_values: Dict[str, Any], new_params: Dict[str, Any] +): + parameter_values.update( + {param: value for param, value in new_params.items() if value is not None} + ) + + +def _generate_model_display_name() -> str: + """Automatically generates a model_display_name. + + Returns: + model_display_name. + """ + return f'tabular-workflow-model-{uuid.uuid4()}' + + +# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag +# to signify FTE usage instead of the presence of num_selected_features. +def _get_default_pipeline_params( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[float] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + max_selected_features: Optional[int] = None, + apply_feature_selection_tuning: bool = False, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Dict[str, Any]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. If specified, + enable_probabilistic_inference and run_distillation cannot be enabled. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not cv_trainer_worker_pool_specs_override: + cv_trainer_worker_pool_specs_override = [] + if not quantiles: + quantiles = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'optimization_objective': optimization_objective, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'weight_column': weight_column, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': ( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'dataflow_service_account': dataflow_service_account, + 'encryption_spec_key_name': encryption_spec_key_name, + 'max_selected_features': max_selected_features, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'quantiles': quantiles, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + } + parameter_values.update( + {param: value for param, value in parameters.items() if value is not None} + ) + + if run_evaluation: + eval_parameters = { + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_batch_explain_machine_type': ( + evaluation_batch_explain_machine_type + ), + 'evaluation_batch_explain_starting_replica_count': ( + evaluation_batch_explain_starting_replica_count + ), + 'evaluation_batch_explain_max_replica_count': ( + evaluation_batch_explain_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + } + parameter_values.update( + { + param: value + for param, value in eval_parameters.items() + if value is not None + } + ) + + # V1 pipeline without FTE + if num_selected_features is None: + if not additional_experiments: + additional_experiments = {} + + parameters = { + 'transformations': transformations, + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'additional_experiments': additional_experiments, + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + if apply_feature_selection_tuning: + parameter_values.update({ + 'apply_feature_selection_tuning': apply_feature_selection_tuning, + }) + + if run_distillation: + distillation_parameters = { + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + } + parameter_values.update( + { + param: value + for param, value in distillation_parameters.items() + if value is not None + } + ) + + # V2 pipeline (with FTE) + else: + if run_distillation: + raise ValueError( + 'Distillation is currently not supported' + ' when num_selected_features is specified.' + ) + + parameters = { + 'num_selected_features': num_selected_features, + 'dataset_level_custom_transformation_definitions': [], + 'dataset_level_transformations': [], + 'tf_auto_transform_features': {}, + 'tf_custom_transformation_definitions': [], + 'legacy_transformations_path': transformations, + 'feature_transform_engine_dataflow_machine_type': ( + transform_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + transform_dataflow_disk_size_gb + ), + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + return parameter_values + + +def get_automl_tabular_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=quantiles, + enable_probabilistic_inference=enable_probabilistic_inference, + num_selected_features=num_selected_features, + model_display_name=model_display_name, + model_description=model_description, + ) + + # V1 pipeline without FTE + if num_selected_features is None: + pipeline_definition_path = os.path.join( + _GCPC_GA_TABULAR_PATH, 'automl_tabular_pipeline.yaml' + ) + + # V2 pipeline with FTE + else: + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'automl_tabular_v2_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_automl_tabular_feature_selection_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + max_selected_features: int = 1000, + apply_feature_selection_tuning: bool = False, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + model_display_name = ( + model_display_name + if model_display_name + else _generate_model_display_name() + ) + + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + max_selected_features=max_selected_features, + apply_feature_selection_tuning=apply_feature_selection_tuning, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + model_display_name=model_display_name, + model_description=model_description, + ) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'automl_tabular_feature_selection_pipeline.yaml', + ) + return pipeline_definition_path, parameter_values + + +def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: + """Convert json input dict to encoded parameter string. + + This function is required due to the limitation on YAML component definition + that YAML definition does not have a keyword for apply quote escape, so the + JSON argument's quote must be manually escaped using this function. + + Args: + input_dict: The input json dictionary. + + Returns: + The encoded string used for parameter. + """ + if not input_dict: + return '' + out = json.dumps(json.dumps(input_dict)) + return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo + + +def get_skip_architecture_search_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_tuning_result_artifact_uri: str, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips architecture search. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + + return get_automl_tabular_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=None, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=[], + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override={}, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=None, + distill_batch_predict_machine_type=None, + distill_batch_predict_starting_replica_count=None, + distill_batch_predict_max_replica_count=None, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=[], + enable_probabilistic_inference=False, + ) + + +def get_wide_and_deep_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + dnn_learning_rate: float, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + optimizer_type: str = 'adam', + max_steps: int = -1, + max_train_secs: int = -1, + l1_regularization_strength: float = 0, + l2_regularization_strength: float = 0, + l2_shrinkage_regularization_strength: float = 0, + beta_1: float = 0.9, + beta_2: float = 0.999, + hidden_units: str = '30,30,30', + use_wide: bool = True, + embed_categories: bool = True, + dnn_dropout: float = 0, + dnn_optimizer_type: str = 'adam', + dnn_l1_regularization_strength: float = 0, + dnn_l2_regularization_strength: float = 0, + dnn_l2_shrinkage_regularization_strength: float = 0, + dnn_beta_1: float = 0.9, + dnn_beta_2: float = 0.999, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + batch_size: int = 100, + measurement_selection_type: Optional[str] = None, + optimization_metric: Optional[str] = None, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the Wide & Deep training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + 'classification' or 'regression'. + learning_rate: The learning rate used by the linear optimizer. + dnn_learning_rate: The learning rate for training the deep part of the + model. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + optimizer_type: The type of optimizer to use. Choices are "adam", "ftrl" and + "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. + l1_regularization_strength: L1 regularization strength for + optimizer_type="ftrl". + l2_regularization_strength: L2 regularization strength for + optimizer_type="ftrl". + l2_shrinkage_regularization_strength: L2 shrinkage regularization strength + for optimizer_type="ftrl". + beta_1: Beta 1 value for optimizer_type="adam". + beta_2: Beta 2 value for optimizer_type="adam". + hidden_units: Hidden layer sizes to use for DNN feature columns, provided in + comma-separated layers. + use_wide: If set to true, the categorical columns will be used in the wide + part of the DNN model. + embed_categories: If set to true, the categorical columns will be used + embedded and used in the deep part of the model. Embedding size is the + square root of the column cardinality. + dnn_dropout: The probability we will drop out a given coordinate. + dnn_optimizer_type: The type of optimizer to use for the deep part of the + model. Choices are "adam", "ftrl" and "sgd". for the Adam, FTRL, and + Gradient Descent Optimizers, respectively. + dnn_l1_regularization_strength: L1 regularization strength for + dnn_optimizer_type="ftrl". + dnn_l2_regularization_strength: L2 regularization strength for + dnn_optimizer_type="ftrl". + dnn_l2_shrinkage_regularization_strength: L2 shrinkage regularization + strength for dnn_optimizer_type="ftrl". + dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". + dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use if/when the service + automatically selects the final measurement from previously reported + intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = {} + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'learning_rate': learning_rate, + 'dnn_learning_rate': dnn_learning_rate, + 'optimizer_type': optimizer_type, + 'max_steps': max_steps, + 'max_train_secs': max_train_secs, + 'l1_regularization_strength': l1_regularization_strength, + 'l2_regularization_strength': l2_regularization_strength, + 'l2_shrinkage_regularization_strength': ( + l2_shrinkage_regularization_strength + ), + 'beta_1': beta_1, + 'beta_2': beta_2, + 'hidden_units': hidden_units, + 'use_wide': use_wide, + 'embed_categories': embed_categories, + 'dnn_dropout': dnn_dropout, + 'dnn_optimizer_type': dnn_optimizer_type, + 'dnn_l1_regularization_strength': dnn_l1_regularization_strength, + 'dnn_l2_regularization_strength': dnn_l2_regularization_strength, + 'dnn_l2_shrinkage_regularization_strength': ( + dnn_l2_shrinkage_regularization_strength + ), + 'dnn_beta_1': dnn_beta_1, + 'dnn_beta_2': dnn_beta_2, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'batch_size': batch_size, + 'measurement_selection_type': measurement_selection_type, + 'optimization_metric': optimization_metric, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'wide_and_deep_trainer_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + algorithm: str, + enable_profiler: bool = False, + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the built-in algorithm HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + algorithm: Algorithm to train. One of "tabnet" and "wide_and_deep". + enable_profiler: Enables profiling and saves a trace during evaluation. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'This method is deprecated. Please use' + ' get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters or' + ' get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters' + ' instead.' + ) + + if algorithm == 'tabnet': + return get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + study_spec_metric_id=study_spec_metric_id, + study_spec_metric_goal=study_spec_metric_goal, + study_spec_parameters_override=study_spec_parameters_override, + max_trial_count=max_trial_count, + parallel_trial_count=parallel_trial_count, + transform_config=transform_config, + dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, + dataset_level_transformations=dataset_level_transformations, + predefined_split_key=predefined_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + tf_transform_execution_engine=tf_transform_execution_engine, + tf_auto_transform_features=tf_auto_transform_features, + tf_custom_transformation_definitions=tf_custom_transformation_definitions, + tf_transformations_path=tf_transformations_path, + enable_profiler=enable_profiler, + seed=seed, + eval_steps=eval_steps, + eval_frequency_secs=eval_frequency_secs, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, + weight_column=weight_column, + max_failed_trial_count=max_failed_trial_count, + study_spec_algorithm=study_spec_algorithm, + study_spec_measurement_selection_type=study_spec_measurement_selection_type, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + worker_pool_specs_override=worker_pool_specs_override, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + elif algorithm == 'wide_and_deep': + return get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + study_spec_metric_id=study_spec_metric_id, + study_spec_metric_goal=study_spec_metric_goal, + study_spec_parameters_override=study_spec_parameters_override, + max_trial_count=max_trial_count, + parallel_trial_count=parallel_trial_count, + transform_config=transform_config, + dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, + dataset_level_transformations=dataset_level_transformations, + predefined_split_key=predefined_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + tf_transform_execution_engine=tf_transform_execution_engine, + tf_auto_transform_features=tf_auto_transform_features, + tf_custom_transformation_definitions=tf_custom_transformation_definitions, + tf_transformations_path=tf_transformations_path, + enable_profiler=enable_profiler, + seed=seed, + eval_steps=eval_steps, + eval_frequency_secs=eval_frequency_secs, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, + weight_column=weight_column, + max_failed_trial_count=max_failed_trial_count, + study_spec_algorithm=study_spec_algorithm, + study_spec_measurement_selection_type=study_spec_measurement_selection_type, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + worker_pool_specs_override=worker_pool_specs_override, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + else: + raise ValueError( + 'Invalid algorithm provided. Supported values are "tabnet" and' + ' "wide_and_deep".' + ) + + +def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the TabNet HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'study_spec_parameters_override': study_spec_parameters_override, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'tabnet_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the Wide & Deep algorithm HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'study_spec_parameters_override': study_spec_parameters_override, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'wide_and_deep_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_tabnet_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + max_steps: int = -1, + max_train_secs: int = -1, + large_category_dim: int = 1, + large_category_thresh: int = 300, + yeo_johnson_transform: bool = True, + feature_dim: int = 64, + feature_dim_ratio: float = 0.5, + num_decision_steps: int = 6, + relaxation_factor: float = 1.5, + decay_every: float = 100, + decay_rate: float = 0.95, + gradient_thresh: float = 2000, + sparsity_loss_weight: float = 0.00001, + batch_momentum: float = 0.95, + batch_size_ratio: float = 0.25, + num_transformer_layers: int = 4, + num_transformer_layers_ratio: float = 0.25, + class_weight: float = 1.0, + loss_function_type: str = 'default', + alpha_focal_loss: float = 0.25, + gamma_focal_loss: float = 2.0, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + batch_size: int = 100, + measurement_selection_type: Optional[str] = None, + optimization_metric: Optional[str] = None, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the TabNet training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + learning_rate: The learning rate used by the linear optimizer. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. + large_category_dim: Embedding dimension for categorical feature with large + number of categories. + large_category_thresh: Threshold for number of categories to apply + large_category_dim embedding dimension to. + yeo_johnson_transform: Enables trainable Yeo-Johnson power transform. + feature_dim: Dimensionality of the hidden representation in feature + transformation block. + feature_dim_ratio: The ratio of output dimension (dimensionality of the + outputs of each decision step) to feature dimension. + num_decision_steps: Number of sequential decision steps. + relaxation_factor: Relaxation factor that promotes the reuse of each feature + at different decision steps. When it is 1, a feature is enforced to be + used only at one decision step and as it increases, more flexibility is + provided to use a feature at multiple decision steps. + decay_every: Number of iterations for periodically applying learning rate + decaying. + decay_rate: Learning rate decaying. + gradient_thresh: Threshold for the norm of gradients for clipping. + sparsity_loss_weight: Weight of the loss for sparsity regularization + (increasing it will yield more sparse feature selection). + batch_momentum: Momentum in ghost batch normalization. + batch_size_ratio: The ratio of virtual batch size (size of the ghost batch + normalization) to batch size. + num_transformer_layers: The number of transformer layers for each decision + step. used only at one decision step and as it increases, more flexibility + is provided to use a feature at multiple decision steps. + num_transformer_layers_ratio: The ratio of shared transformer layer to + transformer layers. + class_weight: The class weight is used to computes a weighted cross entropy + which is helpful in classify imbalanced dataset. Only used for + classification. + loss_function_type: Loss function type. Loss function in classification + [cross_entropy, weighted_cross_entropy, focal_loss], default is + cross_entropy. Loss function in regression: [rmse, mae, mse], default is + mse. + alpha_focal_loss: Alpha value (balancing factor) in focal_loss function. + Only used for classification. + gamma_focal_loss: Gamma value (modulating factor) for focal loss for focal + loss. Only used for classification. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use if/when the service + automatically selects the final measurement from previously reported + intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = {} + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'learning_rate': learning_rate, + 'max_steps': max_steps, + 'max_train_secs': max_train_secs, + 'large_category_dim': large_category_dim, + 'large_category_thresh': large_category_thresh, + 'yeo_johnson_transform': yeo_johnson_transform, + 'feature_dim': feature_dim, + 'feature_dim_ratio': feature_dim_ratio, + 'num_decision_steps': num_decision_steps, + 'relaxation_factor': relaxation_factor, + 'decay_every': decay_every, + 'decay_rate': decay_rate, + 'gradient_thresh': gradient_thresh, + 'sparsity_loss_weight': sparsity_loss_weight, + 'batch_momentum': batch_momentum, + 'batch_size_ratio': batch_size_ratio, + 'num_transformer_layers': num_transformer_layers, + 'num_transformer_layers_ratio': num_transformer_layers_ratio, + 'class_weight': class_weight, + 'loss_function_type': loss_function_type, + 'alpha_focal_loss': alpha_focal_loss, + 'gamma_focal_loss': gamma_focal_loss, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'batch_size': batch_size, + 'measurement_selection_type': measurement_selection_type, + 'optimization_metric': optimization_metric, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'tabnet_trainer_pipeline.yaml' + ) + + return pipeline_definition_path, parameter_values + + +def get_tabnet_study_spec_parameters_override( + dataset_size_bucket: str, prediction_type: str, training_budget_bucket: str +) -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for a TabNet hyperparameter tuning job. + + Args: + dataset_size_bucket: Size of the dataset. One of "small" (< 1M rows), + "medium" (1M - 100M rows), or "large" (> 100M rows). + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + training_budget_bucket: Bucket of the estimated training budget. One of + "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This + parameter is only used as a hint for the hyperparameter search space, + unrelated to the real cost. + + Returns: + List of study_spec_parameters_override. + """ + + if dataset_size_bucket not in ['small', 'medium', 'large']: + raise ValueError( + 'Invalid dataset_size_bucket provided. Supported values ' + ' are "small", "medium" or "large".' + ) + if training_budget_bucket not in ['small', 'medium', 'large']: + raise ValueError( + 'Invalid training_budget_bucket provided. Supported values ' + 'are "small", "medium" or "large".' + ) + + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + f'configs/tabnet_params_{dataset_size_bucket}_data_{training_budget_bucket}_search_space.json', + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + if prediction_type == 'regression': + return _format_tabnet_regression_study_spec_parameters_override( + params, training_budget_bucket + ) + return params + + +def _format_tabnet_regression_study_spec_parameters_override( + params: List[Dict[str, Any]], training_budget_bucket: str +) -> List[Dict[str, Any]]: + """Get regression study_spec_parameters_override for a TabNet hyperparameter tuning job. + + Args: + params: List of dictionaries representing parameters to optimize. The + dictionary key is the parameter_id, which is passed to training job as a + command line argument, and the dictionary value is the parameter + specification of the metric. + training_budget_bucket: Bucket of the estimated training budget. One of + "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This + parameter is only used as a hint for the hyperparameter search space, + unrelated to the real cost. + + Returns: + List of study_spec_parameters_override for regression. + """ + + # To get regression study_spec_parameters, we need to set + # `loss_function_type` to ‘mae’ (‘mae’ and ‘mse’ for "large" search space), + # remove the `alpha_focal_loss`, `gamma_focal_loss` + # and `class_weight` parameters and increase the max for + # `sparsity_loss_weight` to 100. + formatted_params = [] + for param in params: + if param['parameter_id'] in [ + 'alpha_focal_loss', + 'gamma_focal_loss', + 'class_weight', + ]: + continue + elif param['parameter_id'] == 'sparsity_loss_weight': + param['double_value_spec']['max_value'] = 100 + elif param['parameter_id'] == 'loss_function_type': + if training_budget_bucket == 'large': + param['categorical_value_spec']['values'] = ['mae', 'mse'] + else: + param['categorical_value_spec']['values'] = ['mae'] + + formatted_params.append(param) + + return formatted_params + + +def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for a Wide & Deep hyperparameter tuning job. + + Returns: + List of study_spec_parameters_override. + """ + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'configs/wide_and_deep_params.json', + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + return params + + +def get_xgboost_study_spec_parameters_override() -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for an XGBoost hyperparameter tuning job. + + Returns: + List of study_spec_parameters_override. + """ + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'configs/xgboost_params.json' + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + return params + + +def get_xgboost_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + objective: str, + eval_metric: Optional[str] = None, + num_boost_round: Optional[int] = None, + early_stopping_rounds: Optional[int] = None, + base_score: Optional[float] = None, + disable_default_eval_metric: Optional[int] = None, + seed: Optional[int] = None, + seed_per_iteration: Optional[bool] = None, + booster: Optional[str] = None, + eta: Optional[float] = None, + gamma: Optional[float] = None, + max_depth: Optional[int] = None, + min_child_weight: Optional[float] = None, + max_delta_step: Optional[float] = None, + subsample: Optional[float] = None, + colsample_bytree: Optional[float] = None, + colsample_bylevel: Optional[float] = None, + colsample_bynode: Optional[float] = None, + reg_lambda: Optional[float] = None, + reg_alpha: Optional[float] = None, + tree_method: Optional[str] = None, + scale_pos_weight: Optional[float] = None, + updater: Optional[str] = None, + refresh_leaf: Optional[int] = None, + process_type: Optional[str] = None, + grow_policy: Optional[str] = None, + sampling_method: Optional[str] = None, + monotone_constraints: Optional[str] = None, + interaction_constraints: Optional[str] = None, + sample_type: Optional[str] = None, + normalize_type: Optional[str] = None, + rate_drop: Optional[float] = None, + one_drop: Optional[int] = None, + skip_drop: Optional[float] = None, + num_parallel_tree: Optional[int] = None, + feature_selector: Optional[str] = None, + top_k: Optional[int] = None, + max_cat_to_onehot: Optional[int] = None, + max_leaves: Optional[int] = None, + max_bin: Optional[int] = None, + tweedie_variance_power: Optional[float] = None, + huber_slope: Optional[float] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: Optional[bool] = None, + feature_selection_algorithm: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: Optional[str] = None, + training_machine_type: Optional[str] = None, + training_total_replica_count: Optional[int] = None, + training_accelerator_type: Optional[str] = None, + training_accelerator_count: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + run_evaluation: Optional[bool] = None, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, +): + """Get the XGBoost training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + objective: Specifies the learning task and the learning objective. Must be + one of [reg:squarederror, reg:squaredlogerror, + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + binary:logistic, multi:softprob]. + eval_metric: Evaluation metrics for validation data represented as a + comma-separated string. + num_boost_round: Number of boosting iterations. + early_stopping_rounds: Activates early stopping. Validation error needs to + decrease at least every early_stopping_rounds round(s) to continue + training. + base_score: The initial prediction score of all instances, global bias. + disable_default_eval_metric: Flag to disable default metric. Set to >0 to + disable. Default to 0. + seed: Random seed. + seed_per_iteration: Seed PRNG determnisticly via iterator number. + booster: Which booster to use, can be gbtree, gblinear or dart. gbtree and + dart use tree based model while gblinear uses linear function. + eta: Learning rate. + gamma: Minimum loss reduction required to make a further partition on a leaf + node of the tree. + max_depth: Maximum depth of a tree. + min_child_weight: Minimum sum of instance weight(hessian) needed in a child. + max_delta_step: Maximum delta step we allow each tree's weight estimation to + be. + subsample: Subsample ratio of the training instance. + colsample_bytree: Subsample ratio of columns when constructing each tree. + colsample_bylevel: Subsample ratio of columns for each split, in each level. + colsample_bynode: Subsample ratio of columns for each node (split). + reg_lambda: L2 regularization term on weights. + reg_alpha: L1 regularization term on weights. + tree_method: The tree construction algorithm used in XGBoost. Choices: + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"]. + scale_pos_weight: Control the balance of positive and negative weights. + updater: A comma separated string defining the sequence of tree updaters to + run. + refresh_leaf: Refresh updater plugin. Update tree leaf and nodes's stats if + True. When it is False, only node stats are updated. + process_type: A type of boosting process to run. Choices:["default", + "update"] + grow_policy: Controls a way new nodes are added to the tree. Only supported + if tree_method is hist. Choices:["depthwise", "lossguide"] + sampling_method: The method to use to sample the training instances. + monotone_constraints: Constraint of variable monotonicity. + interaction_constraints: Constraints for interaction representing permitted + interactions. + sample_type: [dart booster only] Type of sampling algorithm. + Choices:["uniform", "weighted"] + normalize_type: [dart booster only] Type of normalization algorithm, + Choices:["tree", "forest"] + rate_drop: [dart booster only] Dropout rate.' + one_drop: [dart booster only] When this flag is enabled, at least one tree + is always dropped during the dropout (allows Binomial-plus-one or + epsilon-dropout from the original DART paper). + skip_drop: [dart booster only] Probability of skipping the dropout procedure + during a boosting iteration. + num_parallel_tree: Number of parallel trees constructed during each + iteration. This option is used to support boosted random forest. + feature_selector: [linear booster only] Feature selection and ordering + method. + top_k: The number of top features to select in greedy and thrifty feature + selector. The value of 0 means using all the features. + max_cat_to_onehot: A threshold for deciding whether XGBoost should use + one-hot encoding based split for categorical data. + max_leaves: Maximum number of nodes to be added. + max_bin: Maximum number of discrete bins to bucket continuous features. + tweedie_variance_power: Parameter that controls the variance of the Tweedie + distribution. + huber_slope: A parameter used for Pseudo-Huber loss to define the delta + term. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + training_machine_type: Machine type. + training_total_replica_count: Number of workers. + training_accelerator_type: Accelerator type. + training_accelerator_count: Accelerator count. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = {} + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'objective': objective, + 'eval_metric': eval_metric, + 'num_boost_round': num_boost_round, + 'early_stopping_rounds': early_stopping_rounds, + 'base_score': base_score, + 'disable_default_eval_metric': disable_default_eval_metric, + 'seed': seed, + 'seed_per_iteration': seed_per_iteration, + 'booster': booster, + 'eta': eta, + 'gamma': gamma, + 'max_depth': max_depth, + 'min_child_weight': min_child_weight, + 'max_delta_step': max_delta_step, + 'subsample': subsample, + 'colsample_bytree': colsample_bytree, + 'colsample_bylevel': colsample_bylevel, + 'colsample_bynode': colsample_bynode, + 'reg_lambda': reg_lambda, + 'reg_alpha': reg_alpha, + 'tree_method': tree_method, + 'scale_pos_weight': scale_pos_weight, + 'updater': updater, + 'refresh_leaf': refresh_leaf, + 'process_type': process_type, + 'grow_policy': grow_policy, + 'sampling_method': sampling_method, + 'monotone_constraints': monotone_constraints, + 'interaction_constraints': interaction_constraints, + 'sample_type': sample_type, + 'normalize_type': normalize_type, + 'rate_drop': rate_drop, + 'one_drop': one_drop, + 'skip_drop': skip_drop, + 'num_parallel_tree': num_parallel_tree, + 'feature_selector': feature_selector, + 'top_k': top_k, + 'max_cat_to_onehot': max_cat_to_onehot, + 'max_leaves': max_leaves, + 'max_bin': max_bin, + 'tweedie_variance_power': tweedie_variance_power, + 'huber_slope': huber_slope, + 'weight_column': weight_column, + 'training_machine_type': training_machine_type, + 'training_total_replica_count': training_total_replica_count, + 'training_accelerator_type': training_accelerator_type, + 'training_accelerator_count': training_accelerator_count, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'xgboost_trainer_pipeline.yaml' + ) + + return pipeline_definition_path, parameter_values + + +def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + objective: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + max_trial_count: int, + parallel_trial_count: int, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + eval_metric: Optional[str] = None, + disable_default_eval_metric: Optional[int] = None, + seed: Optional[int] = None, + seed_per_iteration: Optional[bool] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: Optional[bool] = None, + feature_selection_algorithm: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: Optional[str] = None, + max_failed_trial_count: Optional[int] = None, + training_machine_type: Optional[str] = None, + training_total_replica_count: Optional[int] = None, + training_accelerator_type: Optional[str] = None, + training_accelerator_count: Optional[int] = None, + study_spec_algorithm: Optional[str] = None, + study_spec_measurement_selection_type: Optional[str] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + run_evaluation: Optional[bool] = None, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, +): + """Get the XGBoost HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + objective: Specifies the learning task and the learning objective. Must be + one of [reg:squarederror, reg:squaredlogerror, + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + binary:logistic, multi:softprob]. + study_spec_metric_id: Metric to optimize. For options, please look under + 'eval_metric' at + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + eval_metric: Evaluation metrics for validation data represented as a + comma-separated string. + disable_default_eval_metric: Flag to disable default metric. Set to >0 to + disable. Default to 0. + seed: Random seed. + seed_per_iteration: Seed PRNG determnisticly via iterator number. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + training_machine_type: Machine type. + training_total_replica_count: Number of workers. + training_accelerator_type: Accelerator type. + training_accelerator_count: Accelerator count. + study_spec_algorithm: The search algorithm specified for the study. One of + 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = {} + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'objective': objective, + 'eval_metric': eval_metric, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'study_spec_parameters_override': ( + study_spec_parameters_override + if study_spec_parameters_override + else [] + ), + 'disable_default_eval_metric': disable_default_eval_metric, + 'seed': seed, + 'seed_per_iteration': seed_per_iteration, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'training_machine_type': training_machine_type, + 'training_total_replica_count': training_total_replica_count, + 'training_accelerator_type': training_accelerator_type, + 'training_accelerator_count': training_accelerator_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'xgboost_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..6f76075d48 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -0,0 +1,236 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Wide and Deep Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input + + +@dsl.container_component +def wide_and_deep_hyperparameter_tuning_job( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + instance_schema_uri: dsl.OutputPath(str), + prediction_schema_uri: dsl.OutputPath(str), + trials: dsl.OutputPath(str), + prediction_docker_uri_output: dsl.OutputPath(str), + execution_metrics: dsl.OutputPath(dict), + weight_column: Optional[str] = '', + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + eval_frequency_secs: Optional[int] = 600, + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes Wide & Deep hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + study_spec_metric_id: Metric to optimize, , possible + values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + instance_schema_uri: The path to the instance schema. + prediction_schema_uri: The path to the prediction schema. + trials: The path to the hyperparameter tuning trials + prediction_docker_uri_output: The URI of the prediction container. + execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJobWithMetrics', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--execution_metrics', + execution_metrics, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "wide-and-deep-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ( + ', "trial_job_spec": {"worker_pool_specs":' + ' [{"replica_count":"' + ), + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--prediction_docker_uri_artifact_path=', + prediction_docker_uri_output, + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--instance_schema_path=', + instance_schema_uri, + '", "--prediction_schema_path=', + prediction_schema_uri, + '", "--trials_path=', + trials, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--measurement_selection_type=', + study_spec_measurement_selection_type, + '", "--metric_goal=', + study_spec_metric_goal, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..f6c3308c7f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4018 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-wide-and-deep-hyperparameter-tuning-job +# Description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - wide-and-deep-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: wide-and-deep-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_uri + producerTask: wide-and-deep-hyperparameter-tuning-job + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_output + producerTask: wide-and-deep-hyperparameter-tuning-job + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_uri + producerTask: wide-and-deep-hyperparameter-tuning-job + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials + producerTask: wide-and-deep-hyperparameter-tuning-job + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-wide-and-deep-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-wide-and-deep-study-spec-parameters + inputs: + parameters: + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-wide-and-deep-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + wide-and-deep-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-wide-and-deep-hyperparameter-tuning-job + dependentTasks: + - feature-transform-engine + - get-wide-and-deep-study-spec-parameters + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + cache_data: + componentInputParameter: pipelinechannel--cache_data + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-wide-and-deep-study-spec-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: wide-and-deep-hyperparameter-tuning-job + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-wide-and-deep-study-spec-parameters: + executorLabel: exec-get-wide-and-deep-study-spec-parameters + inputDefinitions: + parameters: + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-wide-and-deep-hyperparameter-tuning-job: + executorLabel: exec-wide-and-deep-hyperparameter-tuning-job + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, , possible + + values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', + ''auc'', ''precision'', ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + execution_metrics: + description: Core metrics in dictionary of hyperparameter tuning job execution. + parameterType: STRUCT + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + instance_schema_uri: + description: The path to the instance schema. + parameterType: STRING + prediction_docker_uri_output: + description: The URI of the prediction container. + parameterType: STRING + prediction_schema_uri: + description: The path to the prediction schema. + parameterType: STRING + trials: + description: The path to the hyperparameter tuning trials + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-wide-and-deep-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_wide_and_deep_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_wide_and_deep_study_spec_parameters(\n study_spec_parameters_override:\ + \ list # Required for KFP validation; pylint:disable=g-bare-generic\n)\ + \ -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ + \ study_spec_parameters for a Wide & Deep hyperparameter tuning job.\n\n\ + \ Args:\n study_spec_parameters_override: List of dictionaries representing\ + \ parameters\n to optimize. The dictionary key is the parameter_id,\ + \ which is passed to\n training job as a command line argument, and\ + \ the dictionary value is the\n parameter specification of the metric.\n\ + \n Returns:\n List of final Vizier study_spec_parameters of type ParameterSpec.\n\ + \ \"\"\"\n default_params = [\n {\n 'parameter_id': 'max_steps',\n\ + \ 'discrete_value_spec': {\n 'values': [5000, 10000,\ + \ 20000, 30000, 40000, 50000]\n },\n },\n {\n \ + \ 'parameter_id': 'max_train_secs',\n 'discrete_value_spec':\ + \ {'values': [-1]},\n },\n {\n 'parameter_id': 'learning_rate',\n\ + \ 'double_value_spec': {'min_value': 0.0001, 'max_value': 0.0005},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'optimizer_type',\n 'categorical_value_spec':\ + \ {'values': ['adam', 'ftrl', 'sgd']},\n },\n {\n 'parameter_id':\ + \ 'l1_regularization_strength',\n 'discrete_value_spec': {'values':\ + \ [0, 0.01, 0.02]},\n },\n {\n 'parameter_id': 'l2_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'l2_shrinkage_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'beta_1',\n 'discrete_value_spec':\ + \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ + \ 'beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9, 0.999]},\n\ + \ },\n {\n 'parameter_id': 'hidden_units',\n \ + \ 'categorical_value_spec': {'values': ['30,30,30']},\n },\n \ + \ {\n 'parameter_id': 'use_wide',\n 'categorical_value_spec':\ + \ {'values': ['true', 'false']},\n },\n {\n 'parameter_id':\ + \ 'embed_categories',\n 'categorical_value_spec': {'values': ['true',\ + \ 'false']},\n },\n {\n 'parameter_id': 'dnn_dropout',\n\ + \ 'discrete_value_spec': {'values': [0, 0.1, 0.2]},\n },\n\ + \ {\n 'parameter_id': 'dnn_learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.0001, 'max_value': 0.0005},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'dnn_optimizer_type',\n \ + \ 'categorical_value_spec': {'values': ['adam', 'ftrl', 'sgd']},\n\ + \ },\n {\n 'parameter_id': 'dnn_l1_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_l2_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_l2_shrinkage_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_beta_1',\n 'discrete_value_spec':\ + \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ + \ 'dnn_beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9,\ + \ 0.999]},\n },\n {\n 'parameter_id': 'batch_size',\n\ + \ 'discrete_value_spec': {'values': [1024, 2048, 4096, 8192, 16384]},\n\ + \ },\n ]\n # pylint:disable=g-import-not-at-top,redefined-outer-name\n\ + \ import warnings\n # pylint:enable=g-import-not-at-top,redefined-outer-name\n\ + \n override_params = {}\n for param in study_spec_parameters_override:\n\ + \ override_params[param['parameter_id']] = param\n\n study_spec_parameters\ + \ = []\n for param in default_params:\n study_spec_parameters.append(\n\ + \ override_params.get(param['parameter_id'], param)\n )\n\n extra_overrides\ + \ = set(override_params) - set(\n p['parameter_id'] for p in default_params\n\ + \ )\n if extra_overrides:\n extra_override_str = ', '.join(extra_overrides)\n\ + \ warnings.warn(\n f'The overrides {extra_override_str} were not\ + \ found in the params and '\n 'will be ignored.'\n )\n\n return\ + \ study_spec_parameters\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-wide-and-deep-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJobWithMetrics + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --execution_metrics + - '{{$.outputs.parameters[''execution_metrics''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"wide-and-deep-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", + "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", + "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", + "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", + \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\", \"--seed=", "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", + "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. + name: automl-tabular-wide-and-deep-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible values: [ ''loss'', + + ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', + + ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py new file mode 100644 index 0000000000..19eaddb481 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -0,0 +1,281 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Wide and Deep Trainer component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def wide_and_deep_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + dnn_learning_rate: float, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument + weight_column: Optional[str] = '', + max_steps: Optional[int] = -1, + max_train_secs: Optional[int] = -1, + optimizer_type: Optional[str] = 'adam', + l1_regularization_strength: Optional[float] = 0, + l2_regularization_strength: Optional[float] = 0, + l2_shrinkage_regularization_strength: Optional[float] = 0, + beta_1: Optional[float] = 0.9, + beta_2: Optional[float] = 0.999, + hidden_units: Optional[str] = '30,30,30', + use_wide: Optional[bool] = True, + embed_categories: Optional[bool] = True, + dnn_dropout: Optional[float] = 0, + dnn_optimizer_type: Optional[str] = 'ftrl', + dnn_l1_regularization_strength: Optional[float] = 0, + dnn_l2_regularization_strength: Optional[float] = 0, + dnn_l2_shrinkage_regularization_strength: Optional[float] = 0, + dnn_beta_1: Optional[float] = 0.9, + dnn_beta_2: Optional[float] = 0.999, + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + batch_size: Optional[int] = 100, + measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + optimization_metric: Optional[str] = '', + eval_frequency_secs: Optional[int] = 600, + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains a Wide & Deep model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the + trainer for. + learning_rate: The learning rate used by the linear optimizer. + optimizer_type: The type of optimizer to use. Choices are + "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent + Optimizers, respectively. + l1_regularization_strength: L1 regularization strength + for optimizer_type="ftrl". + l2_regularization_strength: L2 regularization strength + for optimizer_type="ftrl" + l2_shrinkage_regularization_strength: L2 shrinkage + regularization strength for optimizer_type="ftrl". + beta_1: Beta 1 value for optimizer_type="adam". + beta_2: Beta 2 value for optimizer_type="adam". + hidden_units: Hidden layer sizes to use for DNN feature + columns, provided in comma-separated layers. + use_wide: If set to true, the categorical columns will be + used in the wide part of the DNN model. + embed_categories: If set to true, the categorical columns + will be used embedded and used in the deep part of the model. Embedding + size is the square root of the column cardinality. + dnn_dropout: The probability we will drop out a given + coordinate. + dnn_learning_rate: The learning rate for training the + deep part of the model. + dnn_optimizer_type: The type of optimizer to use for the + deep part of the model. Choices are "adam", "ftrl" and "sgd". for the + Adam, FTRL, and Gradient Descent Optimizers, respectively. + dnn_l1_regularization_strength: L1 regularization + strength for dnn_optimizer_type="ftrl". + dnn_l2_regularization_strength: L2 regularization + strength for dnn_optimizer_type="ftrl". + dnn_l2_shrinkage_regularization_strength: L2 shrinkage + regularization strength for dnn_optimizer_type="ftrl". + dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". + dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use + if/when the service automatically selects the final measurement from + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "wide-and-deep-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--max_steps=', + max_steps, + '", "--max_train_secs=', + max_train_secs, + '", "--learning_rate=', + learning_rate, + '", "--optimizer_type=', + optimizer_type, + '", "--l1_regularization_strength=', + l1_regularization_strength, + '", "--l2_regularization_strength=', + l2_regularization_strength, + '", "--l2_shrinkage_regularization_strength=', + l2_shrinkage_regularization_strength, + '", "--beta_1=', + beta_1, + '", "--beta_2=', + beta_2, + '", "--hidden_units=', + hidden_units, + '", "--use_wide=', + use_wide, + '", "--embed_categories=', + embed_categories, + '", "--dnn_dropout=', + dnn_dropout, + '", "--dnn_learning_rate=', + dnn_learning_rate, + '", "--dnn_optimizer_type=', + dnn_optimizer_type, + '", "--dnn_l1_regularization_strength=', + dnn_l1_regularization_strength, + '", "--dnn_l2_regularization_strength=', + dnn_l2_regularization_strength, + '", "--dnn_l2_shrinkage_regularization_strength=', + dnn_l2_shrinkage_regularization_strength, + '", "--dnn_beta_1=', + dnn_beta_1, + '", "--dnn_beta_2=', + dnn_beta_2, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--batch_size=', + batch_size, + '", "--measurement_selection_type=', + measurement_selection_type, + '", "--optimization_metric=', + optimization_metric, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml new file mode 100644 index 0000000000..748711a0dd --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -0,0 +1,4048 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-wide-and-deep-trainer +# Description: The Wide & Deep training pipeline. +# Inputs: +# batch_size: int [Default: 100.0] +# beta_1: float [Default: 0.9] +# beta_2: float [Default: 0.999] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# dnn_beta_1: float [Default: 0.9] +# dnn_beta_2: float [Default: 0.999] +# dnn_dropout: float [Default: 0.0] +# dnn_l1_regularization_strength: float [Default: 0.0] +# dnn_l2_regularization_strength: float [Default: 0.0] +# dnn_l2_shrinkage_regularization_strength: float [Default: 0.0] +# dnn_learning_rate: float +# dnn_optimizer_type: str [Default: 'adam'] +# embed_categories: bool [Default: True] +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# hidden_units: str [Default: '30,30,30'] +# l1_regularization_strength: float [Default: 0.0] +# l2_regularization_strength: float [Default: 0.0] +# l2_shrinkage_regularization_strength: float [Default: 0.0] +# learning_rate: float +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_selected_features: int [Default: -1.0] +# max_steps: int [Default: -1.0] +# max_train_secs: int [Default: -1.0] +# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_metric: str [Default: ''] +# optimizer_type: str [Default: 'adam'] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# use_wide: bool [Default: True] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--wide-and-deep-trainer-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - wide-and-deep-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - wide-and-deep-trainer + inputs: + artifacts: + pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - wide-and-deep-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + wide-and-deep-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-wide-and-deep-trainer + dependentTasks: + - feature-transform-engine + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + batch_size: + componentInputParameter: pipelinechannel--batch_size + beta_1: + componentInputParameter: pipelinechannel--beta_1 + beta_2: + componentInputParameter: pipelinechannel--beta_2 + cache_data: + componentInputParameter: pipelinechannel--cache_data + dnn_beta_1: + componentInputParameter: pipelinechannel--dnn_beta_1 + dnn_beta_2: + componentInputParameter: pipelinechannel--dnn_beta_2 + dnn_dropout: + componentInputParameter: pipelinechannel--dnn_dropout + dnn_l1_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l1_regularization_strength + dnn_l2_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l2_regularization_strength + dnn_l2_shrinkage_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l2_shrinkage_regularization_strength + dnn_learning_rate: + componentInputParameter: pipelinechannel--dnn_learning_rate + dnn_optimizer_type: + componentInputParameter: pipelinechannel--dnn_optimizer_type + embed_categories: + componentInputParameter: pipelinechannel--embed_categories + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + hidden_units: + componentInputParameter: pipelinechannel--hidden_units + l1_regularization_strength: + componentInputParameter: pipelinechannel--l1_regularization_strength + l2_regularization_strength: + componentInputParameter: pipelinechannel--l2_regularization_strength + l2_shrinkage_regularization_strength: + componentInputParameter: pipelinechannel--l2_shrinkage_regularization_strength + learning_rate: + componentInputParameter: pipelinechannel--learning_rate + location: + componentInputParameter: pipelinechannel--location + max_steps: + componentInputParameter: pipelinechannel--max_steps + max_train_secs: + componentInputParameter: pipelinechannel--max_train_secs + measurement_selection_type: + componentInputParameter: pipelinechannel--measurement_selection_type + optimization_metric: + componentInputParameter: pipelinechannel--optimization_metric + optimizer_type: + componentInputParameter: pipelinechannel--optimizer_type + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + use_wide: + componentInputParameter: pipelinechannel--use_wide + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: wide-and-deep-trainer + inputDefinitions: + parameters: + pipelinechannel--batch_size: + parameterType: NUMBER_INTEGER + pipelinechannel--beta_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--beta_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--dnn_beta_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_beta_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_dropout: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l1_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l2_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l2_shrinkage_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_optimizer_type: + parameterType: STRING + pipelinechannel--embed_categories: + parameterType: BOOLEAN + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--hidden_units: + parameterType: STRING + pipelinechannel--l1_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--l2_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--l2_shrinkage_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--max_train_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--measurement_selection_type: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_metric: + parameterType: STRING + pipelinechannel--optimizer_type: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--use_wide: + parameterType: BOOLEAN + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-wide-and-deep-trainer: + executorLabel: exec-wide-and-deep-trainer + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + beta_1: + defaultValue: 0.9 + description: Beta 1 value for optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + beta_2: + defaultValue: 0.999 + description: Beta 2 value for optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + dnn_beta_1: + defaultValue: 0.9 + description: Beta 1 value for dnn_optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_beta_2: + defaultValue: 0.999 + description: Beta 2 value for dnn_optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_dropout: + defaultValue: 0.0 + description: 'The probability we will drop out a given + + coordinate.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization + + strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization + + strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage + + regularization strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_learning_rate: + description: 'The learning rate for training the + + deep part of the model.' + parameterType: NUMBER_DOUBLE + dnn_optimizer_type: + defaultValue: ftrl + description: 'The type of optimizer to use for the + + deep part of the model. Choices are "adam", "ftrl" and "sgd". for the + + Adam, FTRL, and Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + embed_categories: + defaultValue: true + description: 'If set to true, the categorical columns + + will be used embedded and used in the deep part of the model. Embedding + + size is the square root of the column cardinality.' + isOptional: true + parameterType: BOOLEAN + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + hidden_units: + defaultValue: 30,30,30 + description: 'Hidden layer sizes to use for DNN feature + + columns, provided in comma-separated layers.' + isOptional: true + parameterType: STRING + l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength + + for optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength + + for optimizer_type="ftrl"' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage + + regularization strength for optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: 'Amount of time in seconds to run the + + trainer for.' + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use + + if/when the service automatically selects the final measurement from + + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + + or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use. Choices are + + "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent + + Optimizers, respectively.' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + use_wide: + defaultValue: true + description: 'If set to true, the categorical columns will be + + used in the wide part of the DNN model.' + isOptional: true + parameterType: BOOLEAN + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-wide-and-deep-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"wide-and-deep-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", + "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", + "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", + "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--optimizer_type=", + "{{$.inputs.parameters[''optimizer_type'']}}", "\", \"--l1_regularization_strength=", + "{{$.inputs.parameters[''l1_regularization_strength'']}}", "\", \"--l2_regularization_strength=", + "{{$.inputs.parameters[''l2_regularization_strength'']}}", "\", \"--l2_shrinkage_regularization_strength=", + "{{$.inputs.parameters[''l2_shrinkage_regularization_strength'']}}", "\", + \"--beta_1=", "{{$.inputs.parameters[''beta_1'']}}", "\", \"--beta_2=", + "{{$.inputs.parameters[''beta_2'']}}", "\", \"--hidden_units=", "{{$.inputs.parameters[''hidden_units'']}}", + "\", \"--use_wide=", "{{$.inputs.parameters[''use_wide'']}}", "\", \"--embed_categories=", + "{{$.inputs.parameters[''embed_categories'']}}", "\", \"--dnn_dropout=", + "{{$.inputs.parameters[''dnn_dropout'']}}", "\", \"--dnn_learning_rate=", + "{{$.inputs.parameters[''dnn_learning_rate'']}}", "\", \"--dnn_optimizer_type=", + "{{$.inputs.parameters[''dnn_optimizer_type'']}}", "\", \"--dnn_l1_regularization_strength=", + "{{$.inputs.parameters[''dnn_l1_regularization_strength'']}}", "\", \"--dnn_l2_regularization_strength=", + "{{$.inputs.parameters[''dnn_l2_regularization_strength'']}}", "\", \"--dnn_l2_shrinkage_regularization_strength=", + "{{$.inputs.parameters[''dnn_l2_shrinkage_regularization_strength'']}}", + "\", \"--dnn_beta_1=", "{{$.inputs.parameters[''dnn_beta_1'']}}", "\", \"--dnn_beta_2=", + "{{$.inputs.parameters[''dnn_beta_2'']}}", "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", + "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", + "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The Wide & Deep training pipeline. + name: automl-tabular-wide-and-deep-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--batch_size: + componentInputParameter: batch_size + pipelinechannel--beta_1: + componentInputParameter: beta_1 + pipelinechannel--beta_2: + componentInputParameter: beta_2 + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--dnn_beta_1: + componentInputParameter: dnn_beta_1 + pipelinechannel--dnn_beta_2: + componentInputParameter: dnn_beta_2 + pipelinechannel--dnn_dropout: + componentInputParameter: dnn_dropout + pipelinechannel--dnn_l1_regularization_strength: + componentInputParameter: dnn_l1_regularization_strength + pipelinechannel--dnn_l2_regularization_strength: + componentInputParameter: dnn_l2_regularization_strength + pipelinechannel--dnn_l2_shrinkage_regularization_strength: + componentInputParameter: dnn_l2_shrinkage_regularization_strength + pipelinechannel--dnn_learning_rate: + componentInputParameter: dnn_learning_rate + pipelinechannel--dnn_optimizer_type: + componentInputParameter: dnn_optimizer_type + pipelinechannel--embed_categories: + componentInputParameter: embed_categories + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--hidden_units: + componentInputParameter: hidden_units + pipelinechannel--l1_regularization_strength: + componentInputParameter: l1_regularization_strength + pipelinechannel--l2_regularization_strength: + componentInputParameter: l2_regularization_strength + pipelinechannel--l2_shrinkage_regularization_strength: + componentInputParameter: l2_shrinkage_regularization_strength + pipelinechannel--learning_rate: + componentInputParameter: learning_rate + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_steps: + componentInputParameter: max_steps + pipelinechannel--max_train_secs: + componentInputParameter: max_train_secs + pipelinechannel--measurement_selection_type: + componentInputParameter: measurement_selection_type + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--optimization_metric: + componentInputParameter: optimization_metric + pipelinechannel--optimizer_type: + componentInputParameter: optimizer_type + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--use_wide: + componentInputParameter: use_wide + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + beta_1: + defaultValue: 0.9 + description: Beta 1 value for optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + beta_2: + defaultValue: 0.999 + description: Beta 2 value for optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + dnn_beta_1: + defaultValue: 0.9 + description: Beta 1 value for dnn_optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_beta_2: + defaultValue: 0.999 + description: Beta 2 value for dnn_optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_dropout: + defaultValue: 0.0 + description: The probability we will drop out a given coordinate. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength for + + dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength for + + dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage regularization + + strength for dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_learning_rate: + description: 'The learning rate for training the deep part of the + + model.' + parameterType: NUMBER_DOUBLE + dnn_optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use for the deep part of the + + model. Choices are ''adam'', ''ftrl'' and ''sgd''. for the Adam, FTRL, and + + Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + embed_categories: + defaultValue: true + description: 'If set to true, the categorical columns will be used + + embedded and used in the deep part of the model. Embedding size is the + + square root of the column cardinality.' + isOptional: true + parameterType: BOOLEAN + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + hidden_units: + defaultValue: 30,30,30 + description: 'Hidden layer sizes to use for DNN feature columns, provided + in + + comma-separated layers.' + isOptional: true + parameterType: STRING + l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength for + + optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength for + + optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage regularization strength + + for optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: Amount of time in seconds to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use if/when the service + + automatically selects the final measurement from previously reported + + intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use. Choices are "adam", "ftrl" and + + "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + use_wide: + defaultValue: true + description: 'If set to true, the categorical columns will be used in the + wide + + part of the DNN model.' + isOptional: true + parameterType: BOOLEAN + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..a96e46d984 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py @@ -0,0 +1,124 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML XGBoost Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def xgboost_hyperparameter_tuning_job( + project: str, + location: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + worker_pool_specs: list, + gcp_resources: dsl.OutputPath(str), + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes XGBoost hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + study_spec_metric_id: Metric to optimize. For options, + please look under 'eval_metric' at + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + worker_pool_specs: The worker pool specs. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "xgboost-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ', "trial_job_spec": {"worker_pool_specs": ', + worker_pool_specs, + '}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..8c3017aa09 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4332 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-xgboost-hyperparameter-tuning-job +# Description: The XGBoost HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_default_eval_metric: int [Default: 0.0] +# encryption_spec_key_name: str [Default: ''] +# eval_metric: str [Default: ''] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# objective: str +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 0.0] +# seed_per_iteration: bool [Default: False] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# training_accelerator_count: int [Default: 0.0] +# training_accelerator_type: str [Default: ''] +# training_fraction: float [Default: -1.0] +# training_machine_type: str [Default: 'c2-standard-16'] +# training_total_replica_count: int [Default: 1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--get-prediction-type-for-xgboost-Output: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + - get-prediction-type-for-xgboost + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--get-prediction-type-for-xgboost-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - get-prediction-type-for-xgboost + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + generate-xgboost-hyperparameter-tuning-worker-pool-specs: + cachingOptions: + enableCache: true + componentRef: + name: comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs + dependentTasks: + - feature-transform-engine + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + accelerator_count: + componentInputParameter: pipelinechannel--training_accelerator_count + accelerator_type: + componentInputParameter: pipelinechannel--training_accelerator_type + disable_default_eval_metric: + componentInputParameter: pipelinechannel--disable_default_eval_metric + eval_metric: + componentInputParameter: pipelinechannel--eval_metric + machine_type: + componentInputParameter: pipelinechannel--training_machine_type + objective: + componentInputParameter: pipelinechannel--objective + seed: + componentInputParameter: pipelinechannel--seed + seed_per_iteration: + componentInputParameter: pipelinechannel--seed_per_iteration + target_column: + componentInputParameter: pipelinechannel--target_column + total_replica_count: + componentInputParameter: pipelinechannel--training_total_replica_count + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: generate-xgboost-hyperparameter-tuning-worker-pool-specs + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - generate-xgboost-hyperparameter-tuning-worker-pool-specs + - xgboost-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: xgboost-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_artifact_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + read_value_from_file: + runtimeValue: + constant: 1.0 + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-prediction-type-for-xgboost: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-type-for-xgboost + inputs: + parameters: + objective: + componentInputParameter: pipelinechannel--objective + taskInfo: + name: get-prediction-type-for-xgboost + get-xgboost-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-xgboost-study-spec-parameters + inputs: + parameters: + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-xgboost-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + - get-prediction-type-for-xgboost + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + xgboost-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-xgboost-hyperparameter-tuning-job + dependentTasks: + - generate-xgboost-hyperparameter-tuning-worker-pool-specs + - get-xgboost-study-spec-parameters + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + project: + componentInputParameter: pipelinechannel--project + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-xgboost-study-spec-parameters + worker_pool_specs: + taskOutputParameter: + outputParameterKey: worker_pool_specs + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + taskInfo: + name: xgboost-hyperparameter-tuning-job + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_default_eval_metric: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_metric: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--objective: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--seed_per_iteration: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_accelerator_count: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_type: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_machine_type: + parameterType: STRING + pipelinechannel--training_total_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs: + executorLabel: exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path to JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized validation + + split.' + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized train + + split.' + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to transform output. + parameters: + accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + objective: + description: Required. Specifies the learning task and the learning objective. + parameterType: STRING + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Required. Target column name. + parameterType: STRING + total_replica_count: + description: Number of workers. + parameterType: NUMBER_INTEGER + weight_column: + defaultValue: '' + description: Weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + job_dir: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + instance_schema_path: + parameterType: STRING + instance_schema_uri: + parameterType: STRING + prediction_docker_uri_artifact_path: + parameterType: STRING + prediction_docker_uri_output: + parameterType: STRING + prediction_schema_path: + parameterType: STRING + prediction_schema_uri: + parameterType: STRING + trials: + parameterType: STRING + trials_path: + parameterType: STRING + worker_pool_specs: + parameterType: LIST + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-prediction-type-for-xgboost: + executorLabel: exec-get-prediction-type-for-xgboost + inputDefinitions: + parameters: + objective: + description: The XGBoost training objective + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-xgboost-study-spec-parameters: + executorLabel: exec-get-xgboost-study-spec-parameters + inputDefinitions: + parameters: + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-xgboost-hyperparameter-tuning-job: + executorLabel: exec-xgboost-hyperparameter-tuning-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize. For options, + + please look under ''eval_metric'' at + + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + worker_pool_specs: + description: The worker pool specs. + parameterType: LIST + outputDefinitions: + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _generate_xgboost_hyperparameter_tuning_worker_pool_specs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _generate_xgboost_hyperparameter_tuning_worker_pool_specs(\n\ + \ total_replica_count: int,\n target_column: str,\n objective:\ + \ str,\n materialized_train_split: dsl.InputPath('MaterializedSplit'),\n\ + \ materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n transform_output:\ + \ dsl.InputPath('TransformOutput'),\n training_schema_uri: dsl.InputPath('DatasetSchema'),\n\ + \ instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ + \ job_dir: dsl.OutputPath('JobDir'),\n instance_schema_uri: dsl.OutputPath(str),\n\ + \ prediction_schema_uri: dsl.OutputPath(str),\n trials: dsl.OutputPath(str),\n\ + \ prediction_docker_uri_output: dsl.OutputPath(str),\n machine_type:\ + \ str = 'c2-standard-16',\n accelerator_type: str = '',\n accelerator_count:\ + \ int = 0,\n weight_column: str = '',\n eval_metric: str = '',\n \ + \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('worker_pool_specs',\ + \ list), # pylint:disable=g-bare-generic\n ('instance_schema_path',\ + \ str),\n ('prediction_schema_path', str),\n ('trials_path',\ + \ str),\n ('prediction_docker_uri_artifact_path', str),\n ],\n\ + ):\n \"\"\"Generates worker pool specs for XGBoost hyperparameter tuning.\n\ + \n For single machine XGBoost training, returns one worker pool spec for\ + \ master.\n For distributed XGBoost training, returns two worker pool specs,\ + \ the first one\n for master and the second one for the remaining workers.\n\ + \n Args:\n total_replica_count: Number of workers.\n target_column:\ + \ Required. Target column name.\n objective: Required. Specifies the\ + \ learning task and the learning objective.\n materialized_train_split:\ + \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ + \ Required. The path to the materialized validation\n split.\n transform_output:\ + \ Required. The path to transform output.\n training_schema_uri: Required.\ + \ The path to the training schema.\n instance_baseline: Path to JSON\ + \ file for baseline values.\n job_dir: Job dir path.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ trials: The trials uri.\n prediction_docker_uri_output: The prediction\ + \ docker container uri.\n machine_type: Machine type.\n accelerator_type:\ + \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ + \ Weight column name.\n eval_metric: Evaluation metrics for validation\ + \ data represented as a\n comma-separated string.\n disable_default_eval_metric:\ + \ Flag to disable default metric. Set to >0 to\n disable. Default to\ + \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ + \ via iterator number.\n\n Raises:\n ValueError: If accelerator_count\ + \ <= 0 and accelerator_type is specified.\n\n Returns:\n Output parameters.\n\ + \ \"\"\"\n import copy\n import collections\n import re\n\n def get_gcs_path(path):\n\ + \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ + \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ + \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ + \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ + \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ + \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ + \ f'--trials_path={get_gcs_path(trials)}',\n f'--prediction_docker_uri_artifact_path={get_gcs_path(prediction_docker_uri_output)}',\n\ + \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ + \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ + \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ + \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ + \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ + \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ + \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ + \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ + \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325',\n\ + \ ],\n },\n }\n\n # Add optional arguments if set\n if\ + \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ + \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ + \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ + \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ + \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ + \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ + \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ + \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ + \n return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'worker_pool_specs',\n 'instance_schema_path',\n 'prediction_schema_path',\n\ + \ 'trials_path',\n 'prediction_docker_uri_artifact_path',\n\ + \ ],\n )(\n worker_pool_specs_lst,\n get_gcs_path(instance_schema_uri),\n\ + \ get_gcs_path(prediction_schema_uri),\n get_gcs_path(trials),\n\ + \ get_gcs_path(prediction_docker_uri_output),\n )\n\n" + image: python:3.7 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-prediction-type-for-xgboost: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_type_for_xgboost + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ + \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ + \ objective: The XGBoost training objective\n\n Returns:\n A string.\ + \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ + \ or objective.startswith('multi'):\n return 'classification'\n elif\ + \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ + \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ + \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ + \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ + \ ' multi:softprob].'\n )\n\n" + image: python:3.7 + exec-get-xgboost-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_xgboost_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_xgboost_study_spec_parameters(\n study_spec_parameters_override:\ + \ list, # Required for KFP validation; pylint:disable=g-bare-generic,unused-argument\n\ + ) -> list: # Required for KFP validation; pylint:disable=g-bare-generic\n\ + \ \"\"\"Get study_spec_parameters for an XGBoost hyperparameter tuning\ + \ job.\n\n Args:\n study_spec_parameters_override: List of dictionaries\ + \ representing parameters\n to optimize. The dictionary key is the\ + \ parameter_id, which is passed to\n training job as a command line\ + \ argument, and the dictionary value is the\n parameter specification\ + \ of the metric.\n\n Returns:\n List of final Vizier study_spec_parameters\ + \ of type ParameterSpec.\n \"\"\"\n # pylint:disable=g-import-not-at-top,redefined-outer-name,reimported\n\ + \ import functools\n import math\n from typing import Any, Dict, List,\ + \ Optional\n # pylint:enable=g-import-not-at-top,redefined-outer-name,reimported\n\ + \n # Need to define constants within the component function\n # pylint:disable=invalid-name\n\ + \ _GBTREE_BOOSTER = 'gbtree'\n _GBLINEAR_BOOSTER = 'gblinear'\n _DART_BOOSTER\ + \ = 'dart'\n _XGBOOST_BOOSTER_PARAMETERS_MAP = {\n 'eta': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'gamma': [_GBTREE_BOOSTER, _DART_BOOSTER],\n \ + \ 'max_depth': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'min_child_weight':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_delta_step': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'subsample': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'colsample_bytree': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bylevel':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bynode': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'lambda': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n\ + \ 'alpha': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n \ + \ 'tree_method': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'scale_pos_weight':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'updater': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER, _GBLINEAR_BOOSTER],\n 'refresh_leaf': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'process_type': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'grow_policy': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'sampling_method':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'monotone_constraints': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'interaction_constraints': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'sample_type': [_DART_BOOSTER],\n 'normalize_type': [_DART_BOOSTER],\n\ + \ 'rate_drop': [_DART_BOOSTER],\n 'one_drop': [_DART_BOOSTER],\n\ + \ 'skip_drop': [_DART_BOOSTER],\n 'num_parallel_tree': [_GBLINEAR_BOOSTER],\n\ + \ 'feature_selector': [_GBLINEAR_BOOSTER],\n 'top_k': [_GBLINEAR_BOOSTER],\n\ + \ 'max_leaves': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_bin':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n }\n _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS\ + \ = frozenset(\n ['updater', 'monotone_constraints', 'interaction_constraints']\n\ + \ )\n\n def _validate_float_spec(\n parameter_spec: Dict[str, Any],\ + \ lower_bound: float, upper_bound: float\n ) -> None:\n msg = (\n \ + \ f'Parameter spec for {parameter_spec[\"parameter_id\"]} must contain\ + \ '\n 'double_value_spec or discrete_value_spec with float values\ + \ within '\n f'the range of {lower_bound} and {upper_bound} (inclusive)'\n\ + \ )\n if 'double_value_spec' in parameter_spec:\n float_spec\ + \ = parameter_spec['double_value_spec']\n if float_spec['min_value']\ + \ < lower_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {float_spec[\"min_value\"]} for min_value.'\n )\n if float_spec['max_value']\ + \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {float_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ + \ in parameter_spec:\n float_spec = parameter_spec['discrete_value_spec']\n\ + \ float_values = float_spec['values']\n for val in float_values:\n\ + \ if val < lower_bound or val > upper_bound:\n raise ValueError(f'{msg},\ + \ but got {val} in {float_values}.')\n else:\n raise ValueError(\n\ + \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"\ + ]}. {msg}.'\n )\n\n def _validate_int_spec(\n parameter_spec:\ + \ Dict[str, Any],\n lower_bound: Optional[int],\n upper_bound:\ + \ Optional[int],\n ) -> None:\n msg = (\n f'Parameter spec for\ + \ {parameter_spec[\"parameter_id\"]} must contain '\n 'integer_value_spec\ + \ or discrete_value_spec with integer values within '\n f'the range\ + \ of {lower_bound} and {upper_bound} (inclusive)'\n )\n if 'integer_value_spec'\ + \ in parameter_spec:\n int_spec = parameter_spec['integer_value_spec']\n\ + \ if lower_bound is not None and int_spec['min_value'] < lower_bound:\n\ + \ raise ValueError(\n f'{msg}, but got {int_spec[\"min_value\"\ + ]} for min_value.'\n )\n if upper_bound is not None and int_spec['max_value']\ + \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {int_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ + \ in parameter_spec:\n int_values = parameter_spec['discrete_value_spec']['values']\n\ + \ for val in int_values:\n if not isinstance(val, int):\n \ + \ raise ValueError(\n f'{msg}, but got non-integer {val}\ + \ with '\n f'type {type(val)} in {int_values}.'\n \ + \ )\n if (lower_bound is not None and val < lower_bound) or (\n \ + \ upper_bound is not None and val > upper_bound\n ):\n\ + \ raise ValueError(f'{msg}, but got {val} in {int_values}.')\n\ + \ else:\n raise ValueError(\n f'Unexpected value spec for\ + \ {parameter_spec[\"parameter_id\"]}. {msg}.'\n )\n\n def _validate_categorical_spec(\n\ + \ parameter_spec: Dict[str, Any], valid_categories: Optional[List[str]]\n\ + \ ) -> None:\n msg = (\n f'Parameter spec for {parameter_spec[\"\ + parameter_id\"]} must contain '\n 'categorical_value_spec with unique\ + \ categories from '\n f'{valid_categories}'\n )\n if 'categorical_value_spec'\ + \ in parameter_spec:\n if valid_categories is None:\n # Any\ + \ category is valid.\n return\n categorical_values = parameter_spec['categorical_value_spec']['values']\n\ + \ valid_categorical_values = set(categorical_values).intersection(\n\ + \ set(valid_categories)\n )\n if len(valid_categorical_values)\ + \ != len(categorical_values):\n raise ValueError(f'{msg}, but got\ + \ {categorical_values}.')\n else:\n raise ValueError(\n \ + \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"]}. {msg}.'\n\ + \ )\n\n _XGBOOST_PARAM_VALIDATIONS = {\n 'num_boost_round': functools.partial(\n\ + \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ + \ 'early_stopping_rounds': functools.partial(\n _validate_int_spec,\ + \ lower_bound=1, upper_bound=None\n ),\n 'base_score': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ + \ 'booster': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['gbtree', 'gblinear', 'dart'],\n ),\n\ + \ 'eta': functools.partial(\n _validate_float_spec, lower_bound=0,\ + \ upper_bound=1\n ),\n 'gamma': functools.partial(\n \ + \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n ),\n\ + \ 'max_depth': functools.partial(\n _validate_int_spec, lower_bound=0,\ + \ upper_bound=None\n ),\n 'min_child_weight': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n \ + \ ),\n 'max_delta_step': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=math.inf\n ),\n 'subsample': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ + \ ),\n 'colsample_bytree': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0.0001, upper_bound=1\n ),\n 'colsample_bylevel':\ + \ functools.partial(\n _validate_float_spec, lower_bound=0.0001,\ + \ upper_bound=1\n ),\n 'colsample_bynode': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ + \ ),\n 'lambda': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'alpha': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ + \ 'tree_method': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['auto', 'exact', 'approx', 'hist', 'gpu_hist'],\n\ + \ ),\n 'scale_pos_weight': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=math.inf\n ),\n 'updater': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=None\n ),\n\ + \ 'refresh_leaf': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'process_type': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=['default', 'updated']\n\ + \ ),\n 'grow_policy': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['depthwise', 'lossguide'],\n ),\n \ + \ 'sampling_method': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['uniform', 'gradient_based'],\n ),\n \ + \ 'monotone_constraints': functools.partial(\n _validate_categorical_spec,\ + \ valid_categories=None\n ),\n 'interaction_constraints': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=None\n ),\n\ + \ 'sample_type': functools.partial(\n _validate_categorical_spec,\ + \ valid_categories=['uniform', 'weighted']\n ),\n 'normalize_type':\ + \ functools.partial(\n _validate_categorical_spec, valid_categories=['tree',\ + \ 'forest']\n ),\n 'rate_drop': functools.partial(\n \ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n 'one_drop':\ + \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=1\n\ + \ ),\n 'skip_drop': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'num_parallel_tree': functools.partial(\n\ + \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ + \ 'feature_selector': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['cyclic', 'shuffle', 'random', 'greedy', 'thrifty'],\n\ + \ ),\n 'top_k': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=None\n ),\n 'max_cat_to_onehot':\ + \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=None\n\ + \ ),\n 'max_leaves': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=None\n ),\n 'max_bin': functools.partial(\n\ + \ _validate_int_spec, lower_bound=0, upper_bound=None\n ),\n\ + \ }\n\n def _add_booster_param(\n override_booster_params: Dict[str,\ + \ Any],\n param: Dict[str, Any],\n override_boosters: List[str],\n\ + \ ) -> None:\n # Validate parameter spec.\n param_id = param['parameter_spec']['parameter_id']\n\ + \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param_id]\n validation_func(param['parameter_spec'])\n\ + \ # Add parameter spec for valid boosters.\n parent_boosters = param['parent_categorical_values']['values']\n\ + \ all_boosters = set(_XGBOOST_BOOSTER_PARAMETERS_MAP[param_id]).intersection(\n\ + \ set(override_boosters)\n )\n valid_parent_boosters = set(parent_boosters).intersection(all_boosters)\n\ + \ if valid_parent_boosters:\n override_booster_params[param_id]\ + \ = {}\n for booster in valid_parent_boosters:\n override_booster_params[param_id][booster]\ + \ = param['parameter_spec']\n\n def _get_booster_param_specs(\n override_booster_params:\ + \ Dict[str, Any],\n param_id: str,\n default_param_spec: Optional[Dict[str,\ + \ Any]],\n ) -> List[Dict[str, Any]]:\n if param_id not in override_booster_params:\n\ + \ if default_param_spec is None:\n return []\n return [default_param_spec]\n\ + \ override_param_specs = override_booster_params[param_id]\n if default_param_spec\ + \ is not None:\n for booster in default_param_spec['parent_categorical_values']['values']:\n\ + \ if booster not in override_param_specs:\n override_param_specs[booster]\ + \ = default_param_spec['parameter_spec']\n param_specs = []\n for\ + \ booster, override_spec in override_param_specs.items():\n included\ + \ = False\n for spec in param_specs:\n if spec['parameter_spec']\ + \ == override_spec:\n spec['parent_categorical_values']['values'].append(booster)\n\ + \ included = True\n break\n if not included:\n \ + \ param_specs.append({\n 'parameter_spec': override_spec,\n\ + \ 'parent_categorical_values': {'values': [booster]},\n \ + \ })\n return param_specs\n\n default_params = [\n {\n \ + \ 'parameter_id': 'num_boost_round',\n 'discrete_value_spec':\ + \ {'values': [1, 5, 10, 15, 20]},\n },\n {\n 'parameter_id':\ + \ 'early_stopping_rounds',\n 'discrete_value_spec': {'values':\ + \ [3, 5, 10]},\n },\n {'parameter_id': 'base_score', 'discrete_value_spec':\ + \ {'values': [0.5]}},\n {\n 'parameter_id': 'booster',\n \ + \ 'categorical_value_spec': {'values': ['gbtree', 'gblinear', 'dart']},\n\ + \ 'conditional_parameter_specs': [\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'eta',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'gamma',\n\ + \ 'discrete_value_spec': {\n \ + \ 'values': [0, 10, 50, 100, 500, 1000]\n },\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'max_depth',\n\ + \ 'integer_value_spec': {'min_value': 6, 'max_value':\ + \ 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'min_child_weight',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0,\n 'max_value': 10.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'max_delta_step',\n\ + \ 'discrete_value_spec': {\n \ + \ 'values': [0.0, 1.0, 3.0, 5.0, 7.0, 9.0]\n },\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'subsample',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bytree',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bylevel',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bynode',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'lambda',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_REVERSE_LOG_SCALE',\n\ + \ },\n 'parent_categorical_values': {\n\ + \ 'values': ['gbtree', 'dart', 'gblinear']\n \ + \ },\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'alpha',\n \ + \ 'double_value_spec': {\n 'min_value': 0.0001,\n\ + \ 'max_value': 1.0,\n },\n\ + \ 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n 'parent_categorical_values': {\n \ + \ 'values': ['gbtree', 'dart', 'gblinear']\n },\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'tree_method',\n \ + \ 'categorical_value_spec': {'values': ['auto']},\n \ + \ },\n 'parent_categorical_values': {'values': ['gbtree',\ + \ 'dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'scale_pos_weight',\n \ + \ 'discrete_value_spec': {'values': [1.0]},\n \ + \ },\n 'parent_categorical_values': {'values': ['gbtree',\ + \ 'dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'refresh_leaf',\n \ + \ 'discrete_value_spec': {'values': [1]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'process_type',\n \ + \ 'categorical_value_spec': {'values': ['default']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'grow_policy',\n\ + \ 'categorical_value_spec': {'values': ['depthwise']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'sampling_method',\n\ + \ 'categorical_value_spec': {'values': ['uniform']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'sample_type',\n\ + \ 'categorical_value_spec': {'values': ['uniform']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'normalize_type',\n \ + \ 'categorical_value_spec': {'values': ['tree']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'rate_drop',\n \ + \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'one_drop',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'skip_drop',\n \ + \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'num_parallel_tree',\n \ + \ 'discrete_value_spec': {'values': [1]},\n \ + \ },\n 'parent_categorical_values': {'values': ['gblinear']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'feature_selector',\n \ + \ 'categorical_value_spec': {'values': ['cyclic']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gblinear']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'top_k',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n \ + \ 'parent_categorical_values': {'values': ['gblinear']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'max_leaves',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'max_bin',\n \ + \ 'discrete_value_spec': {'values': [256]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n ],\n },\n ]\n\n # Construct dictionaries\ + \ so that parameter specs are accessible by id.\n override_params = {}\n\ + \ override_booster_params = {}\n for param in study_spec_parameters_override:\n\ + \ # Validate a study spec before adding to the override_params dictionary.\n\ + \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param['parameter_id']]\n\ + \ validation_func(param)\n override_params[param['parameter_id']]\ + \ = param\n\n # Add any param that does not have a default parameter\ + \ spec.\n if (\n param['parameter_id'] == 'max_cat_to_onehot'\n\ + \ and param['parameter_id'] not in default_params\n ):\n \ + \ default_params.append(param)\n if (\n param['parameter_id']\ + \ == 'booster'\n and 'conditional_parameter_specs' in param\n \ + \ ):\n for booster_param in param['conditional_parameter_specs']:\n\ + \ _add_booster_param(\n override_booster_params,\n \ + \ booster_param,\n override_boosters=param['categorical_value_spec']['values'],\n\ + \ )\n\n # Validate override params according to XGBoost param dependencies.\n\ + \ tree_method = override_booster_params.get('tree_method', None)\n if\ + \ tree_method is not None:\n for booster, tree_method_spec in tree_method.items():\n\ + \ if tree_method_spec['categorical_value_spec']['values'] != ['exact']:\n\ + \ continue\n # TODO(b/277777886): exact requires non-zero max_depth\ + \ value.\n # The below code is no longer necessary after raising min_value\ + \ to 6 in\n # the default spec. In the long run, we need to decide\ + \ the best\n # approach for max_depth. Keeping the code for now in\ + \ case the approach\n # involves overriding max_depth for 'exact' tree_method.\n\ + \ max_depth_spec = {\n 'parameter_id': 'max_depth',\n \ + \ 'integer_value_spec': {'min_value': 6, 'max_value': 10},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n }\n override_booster_params['max_depth']\ + \ = override_booster_params.get(\n 'max_depth', {booster: max_depth_spec}\n\ + \ )\n override_booster_params['max_depth'][booster] = override_booster_params[\n\ + \ 'max_depth'\n ].get(booster, max_depth_spec)\n try:\n\ + \ _validate_int_spec(\n override_booster_params['max_depth'][booster],\n\ + \ lower_bound=1,\n upper_bound=None,\n )\n\ + \ except ValueError as e:\n raise ValueError(\n 'max_depth\ + \ cannot be 0 (or < 0) when tree method is fixed to be '\n '\"\ + exact\".'\n ) from e\n\n # Construct the modified study specs study_spec_parameters.\n\ + \ study_spec_parameters = []\n for default_param in default_params:\n\ + \ override_param = override_params.get(\n default_param['parameter_id'],\ + \ default_param\n )\n study_spec_parameters.append(override_param)\n\ + \ # Override conditional parameters for booster.\n if default_param['parameter_id']\ + \ == 'booster':\n booster_param_specs = []\n override_booster_vals\ + \ = override_param['categorical_value_spec']['values']\n\n for booster_param\ + \ in default_param['conditional_parameter_specs']:\n override_parent_boosters\ + \ = set(\n booster_param['parent_categorical_values']['values']\n\ + \ ).intersection(override_booster_vals)\n if not override_parent_boosters:\n\ + \ # No need to include a booster param if no relevant booster will\n\ + \ # be used.\n continue\n # Update default booster\ + \ param boosters to exclude irrelevant boosters.\n booster_param['parent_categorical_values']['values']\ + \ = list(\n override_parent_boosters\n )\n booster_param_specs.extend(\n\ + \ _get_booster_param_specs(\n override_booster_params,\n\ + \ param_id=booster_param['parameter_spec']['parameter_id'],\n\ + \ default_param_spec=booster_param,\n )\n \ + \ )\n\n for booster_param_name in _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS:\n\ + \ booster_param_specs.extend(\n _get_booster_param_specs(\n\ + \ override_booster_params,\n param_id=booster_param_name,\n\ + \ default_param_spec=None,\n )\n )\n\n\ + \ # booster_param_specs combines the overriding booster parameter\n\ + \ # specs from user input and the default booster parameter specs.\n\ + \ override_param['conditional_parameter_specs'] = booster_param_specs\n\ + \n return study_spec_parameters\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-xgboost-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"xgboost-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", + "}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The XGBoost HyperparameterTuningJob pipeline. + name: automl-tabular-xgboost-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_default_eval_metric: + componentInputParameter: disable_default_eval_metric + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_metric: + componentInputParameter: eval_metric + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--objective: + componentInputParameter: objective + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--seed_per_iteration: + componentInputParameter: seed_per_iteration + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_accelerator_count: + componentInputParameter: training_accelerator_count + pipelinechannel--training_accelerator_type: + componentInputParameter: training_accelerator_type + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--training_machine_type: + componentInputParameter: training_machine_type + pipelinechannel--training_total_replica_count: + componentInputParameter: training_total_replica_count + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: The BigQuery data source. + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: The CSV data source. + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + objective: + description: 'Specifies the learning task and the learning objective. Must + be + + one of [reg:squarederror, reg:squaredlogerror, + + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + + binary:logistic, multi:softprob].' + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize. For options, please look under + + ''eval_metrics'' at + + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + training_machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + training_total_replica_count: + defaultValue: 1.0 + description: Number of workers. + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py new file mode 100644 index 0000000000..e03036c353 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py @@ -0,0 +1,77 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML XGBoost Trainer component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def xgboost_trainer( + project: str, + location: str, + worker_pool_specs: list, + gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains an XGBoost model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + worker_pool_specs: The worker pool specs. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "xgboost-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": ', + worker_pool_specs, + '}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml new file mode 100644 index 0000000000..0fc86f8c67 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -0,0 +1,4396 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-xgboost-trainer +# Description: The XGBoost training pipeline. +# Inputs: +# base_score: float [Default: 0.5] +# bigquery_staging_full_dataset_id: str [Default: ''] +# booster: str [Default: 'gbtree'] +# colsample_bylevel: float [Default: 1.0] +# colsample_bynode: float [Default: 1.0] +# colsample_bytree: float [Default: 1.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_default_eval_metric: int [Default: 0.0] +# early_stopping_rounds: int [Default: -1.0] +# encryption_spec_key_name: str [Default: ''] +# eta: float [Default: 0.3] +# eval_metric: str [Default: ''] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# feature_selector: str [Default: 'cyclic'] +# gamma: float [Default: 0.0] +# grow_policy: str [Default: 'depthwise'] +# huber_slope: float [Default: 1.0] +# interaction_constraints: str [Default: ''] +# location: str +# max_bin: int [Default: 256.0] +# max_cat_to_onehot: int [Default: -1.0] +# max_delta_step: float [Default: 0.0] +# max_depth: int [Default: 6.0] +# max_leaves: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# min_child_weight: float [Default: 1.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# monotone_constraints: str [Default: ''] +# normalize_type: str [Default: 'tree'] +# num_boost_round: int [Default: 10.0] +# num_parallel_tree: int [Default: 1.0] +# objective: str +# one_drop: int [Default: 0.0] +# predefined_split_key: str [Default: ''] +# process_type: str [Default: 'default'] +# project: str +# rate_drop: float [Default: 0.0] +# refresh_leaf: int [Default: 1.0] +# reg_alpha: float [Default: 0.0] +# reg_lambda: float [Default: 1.0] +# root_dir: str +# run_evaluation: bool [Default: True] +# run_feature_selection: bool [Default: False] +# sample_type: str [Default: 'uniform'] +# sampling_method: str [Default: 'uniform'] +# scale_pos_weight: float [Default: 1.0] +# seed: int [Default: 0.0] +# seed_per_iteration: bool [Default: False] +# skip_drop: float [Default: 0.0] +# stratified_split_key: str [Default: ''] +# subsample: float [Default: 1.0] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# top_k: int [Default: 0.0] +# training_accelerator_count: int [Default: 0.0] +# training_accelerator_type: str [Default: ''] +# training_fraction: float [Default: -1.0] +# training_machine_type: str [Default: 'c2-standard-16'] +# training_total_replica_count: int [Default: 1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# tree_method: str [Default: 'auto'] +# tweedie_variance_power: float [Default: 1.5] +# updater: str [Default: ''] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--get-prediction-type-for-xgboost-Output: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + dependentTasks: + - xgboost-trainer + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - generate-xgboost-trainer-worker-pool-specs + - get-prediction-type-for-xgboost + inputs: + artifacts: + pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: generate-xgboost-trainer-worker-pool-specs + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--get-prediction-type-for-xgboost-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - get-prediction-type-for-xgboost + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: boosted_trees + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + generate-xgboost-trainer-worker-pool-specs: + cachingOptions: + enableCache: true + componentRef: + name: comp-generate-xgboost-trainer-worker-pool-specs + dependentTasks: + - feature-transform-engine + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + accelerator_count: + componentInputParameter: pipelinechannel--training_accelerator_count + accelerator_type: + componentInputParameter: pipelinechannel--training_accelerator_type + base_score: + componentInputParameter: pipelinechannel--base_score + booster: + componentInputParameter: pipelinechannel--booster + colsample_bylevel: + componentInputParameter: pipelinechannel--colsample_bylevel + colsample_bynode: + componentInputParameter: pipelinechannel--colsample_bynode + colsample_bytree: + componentInputParameter: pipelinechannel--colsample_bytree + disable_default_eval_metric: + componentInputParameter: pipelinechannel--disable_default_eval_metric + early_stopping_rounds: + componentInputParameter: pipelinechannel--early_stopping_rounds + eta: + componentInputParameter: pipelinechannel--eta + eval_metric: + componentInputParameter: pipelinechannel--eval_metric + feature_selector: + componentInputParameter: pipelinechannel--feature_selector + gamma: + componentInputParameter: pipelinechannel--gamma + grow_policy: + componentInputParameter: pipelinechannel--grow_policy + huber_slope: + componentInputParameter: pipelinechannel--huber_slope + interaction_constraints: + componentInputParameter: pipelinechannel--interaction_constraints + machine_type: + componentInputParameter: pipelinechannel--training_machine_type + max_bin: + componentInputParameter: pipelinechannel--max_bin + max_cat_to_onehot: + componentInputParameter: pipelinechannel--max_cat_to_onehot + max_delta_step: + componentInputParameter: pipelinechannel--max_delta_step + max_depth: + componentInputParameter: pipelinechannel--max_depth + max_leaves: + componentInputParameter: pipelinechannel--max_leaves + min_child_weight: + componentInputParameter: pipelinechannel--min_child_weight + monotone_constraints: + componentInputParameter: pipelinechannel--monotone_constraints + normalize_type: + componentInputParameter: pipelinechannel--normalize_type + num_boost_round: + componentInputParameter: pipelinechannel--num_boost_round + num_parallel_tree: + componentInputParameter: pipelinechannel--num_parallel_tree + objective: + componentInputParameter: pipelinechannel--objective + one_drop: + componentInputParameter: pipelinechannel--one_drop + process_type: + componentInputParameter: pipelinechannel--process_type + rate_drop: + componentInputParameter: pipelinechannel--rate_drop + refresh_leaf: + componentInputParameter: pipelinechannel--refresh_leaf + reg_alpha: + componentInputParameter: pipelinechannel--reg_alpha + reg_lambda: + componentInputParameter: pipelinechannel--reg_lambda + sample_type: + componentInputParameter: pipelinechannel--sample_type + sampling_method: + componentInputParameter: pipelinechannel--sampling_method + scale_pos_weight: + componentInputParameter: pipelinechannel--scale_pos_weight + seed: + componentInputParameter: pipelinechannel--seed + seed_per_iteration: + componentInputParameter: pipelinechannel--seed_per_iteration + skip_drop: + componentInputParameter: pipelinechannel--skip_drop + subsample: + componentInputParameter: pipelinechannel--subsample + target_column: + componentInputParameter: pipelinechannel--target_column + top_k: + componentInputParameter: pipelinechannel--top_k + total_replica_count: + componentInputParameter: pipelinechannel--training_total_replica_count + tree_method: + componentInputParameter: pipelinechannel--tree_method + tweedie_variance_power: + componentInputParameter: pipelinechannel--tweedie_variance_power + updater: + componentInputParameter: pipelinechannel--updater + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: generate-xgboost-trainer-worker-pool-specs + get-prediction-type-for-xgboost: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-type-for-xgboost + inputs: + parameters: + objective: + componentInputParameter: pipelinechannel--objective + taskInfo: + name: get-prediction-type-for-xgboost + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - generate-xgboost-trainer-worker-pool-specs + - xgboost-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: generate-xgboost-trainer-worker-pool-specs + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + - get-prediction-type-for-xgboost + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + xgboost-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-xgboost-trainer + dependentTasks: + - generate-xgboost-trainer-worker-pool-specs + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + worker_pool_specs: + taskOutputParameter: + outputParameterKey: worker_pool_specs + producerTask: generate-xgboost-trainer-worker-pool-specs + taskInfo: + name: xgboost-trainer + inputDefinitions: + parameters: + pipelinechannel--base_score: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--booster: + parameterType: STRING + pipelinechannel--colsample_bylevel: + parameterType: NUMBER_DOUBLE + pipelinechannel--colsample_bynode: + parameterType: NUMBER_DOUBLE + pipelinechannel--colsample_bytree: + parameterType: NUMBER_DOUBLE + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_default_eval_metric: + parameterType: NUMBER_INTEGER + pipelinechannel--early_stopping_rounds: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eta: + parameterType: NUMBER_DOUBLE + pipelinechannel--eval_metric: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--feature_selector: + parameterType: STRING + pipelinechannel--gamma: + parameterType: NUMBER_DOUBLE + pipelinechannel--grow_policy: + parameterType: STRING + pipelinechannel--huber_slope: + parameterType: NUMBER_DOUBLE + pipelinechannel--interaction_constraints: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_bin: + parameterType: NUMBER_INTEGER + pipelinechannel--max_cat_to_onehot: + parameterType: NUMBER_INTEGER + pipelinechannel--max_delta_step: + parameterType: NUMBER_DOUBLE + pipelinechannel--max_depth: + parameterType: NUMBER_INTEGER + pipelinechannel--max_leaves: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--min_child_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--monotone_constraints: + parameterType: STRING + pipelinechannel--normalize_type: + parameterType: STRING + pipelinechannel--num_boost_round: + parameterType: NUMBER_INTEGER + pipelinechannel--num_parallel_tree: + parameterType: NUMBER_INTEGER + pipelinechannel--objective: + parameterType: STRING + pipelinechannel--one_drop: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--process_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--rate_drop: + parameterType: NUMBER_DOUBLE + pipelinechannel--refresh_leaf: + parameterType: NUMBER_INTEGER + pipelinechannel--reg_alpha: + parameterType: NUMBER_DOUBLE + pipelinechannel--reg_lambda: + parameterType: NUMBER_DOUBLE + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--sample_type: + parameterType: STRING + pipelinechannel--sampling_method: + parameterType: STRING + pipelinechannel--scale_pos_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--seed_per_iteration: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--skip_drop: + parameterType: NUMBER_DOUBLE + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--subsample: + parameterType: NUMBER_DOUBLE + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--top_k: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_count: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_type: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_machine_type: + parameterType: STRING + pipelinechannel--training_total_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--tree_method: + parameterType: STRING + pipelinechannel--tweedie_variance_power: + parameterType: NUMBER_DOUBLE + pipelinechannel--updater: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-generate-xgboost-trainer-worker-pool-specs: + executorLabel: exec-generate-xgboost-trainer-worker-pool-specs + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path to JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized validation + + split.' + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized train + + split.' + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to transform output. + parameters: + accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + base_score: + defaultValue: 0.5 + description: The initial prediction score of all instances, global bias. + isOptional: true + parameterType: NUMBER_DOUBLE + booster: + defaultValue: gbtree + description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree + and + + dart use tree based model while gblinear uses linear function.' + isOptional: true + parameterType: STRING + colsample_bylevel: + defaultValue: 1.0 + description: Subsample ratio of columns for each split, in each level. + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bynode: + defaultValue: 1.0 + description: Subsample ratio of columns for each node (split). + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bytree: + defaultValue: 1.0 + description: Subsample ratio of columns when constructing each tree. + isOptional: true + parameterType: NUMBER_DOUBLE + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + early_stopping_rounds: + defaultValue: -1.0 + description: 'Activates early stopping. Validation error needs to + + decrease at least every early_stopping_rounds round(s) to continue + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + eta: + defaultValue: 0.3 + description: Learning rate. + isOptional: true + parameterType: NUMBER_DOUBLE + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + feature_selector: + defaultValue: cyclic + description: '[linear booster only] Feature selection and ordering + + method.' + isOptional: true + parameterType: STRING + gamma: + defaultValue: 0.0 + description: 'Minimum loss reduction required to make a further partition + on a leaf + + node of the tree.' + isOptional: true + parameterType: NUMBER_DOUBLE + grow_policy: + defaultValue: depthwise + description: 'Controls a way new nodes are added to the tree. Only supported + + if tree_method is hist. Choices:["depthwise", "lossguide"]' + isOptional: true + parameterType: STRING + huber_slope: + defaultValue: 1.0 + description: 'A parameter used for Pseudo-Huber loss to define the delta + + term.' + isOptional: true + parameterType: NUMBER_DOUBLE + interaction_constraints: + defaultValue: '' + description: 'Constraints for interaction representing permitted + + interactions.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + max_bin: + defaultValue: 256.0 + description: Maximum number of discrete bins to bucket continuous features. + isOptional: true + parameterType: NUMBER_INTEGER + max_cat_to_onehot: + defaultValue: -1.0 + description: 'A threshold for deciding whether XGBoost should use + + one-hot encoding based split for categorical data.' + isOptional: true + parameterType: NUMBER_INTEGER + max_delta_step: + defaultValue: 0.0 + description: 'Maximum delta step we allow each tree''s weight estimation + to + + be.' + isOptional: true + parameterType: NUMBER_DOUBLE + max_depth: + defaultValue: 6.0 + description: Maximum depth of a tree. + isOptional: true + parameterType: NUMBER_INTEGER + max_leaves: + defaultValue: 0.0 + description: Maximum number of nodes to be added. + isOptional: true + parameterType: NUMBER_INTEGER + min_child_weight: + defaultValue: 1.0 + description: Minimum sum of instance weight(hessian) needed in a child. + isOptional: true + parameterType: NUMBER_DOUBLE + monotone_constraints: + defaultValue: '' + description: Constraint of variable monotonicity. + isOptional: true + parameterType: STRING + normalize_type: + defaultValue: tree + description: '[dart booster only] Type of normalization algorithm, + + Choices:["tree", "forest"]' + isOptional: true + parameterType: STRING + num_boost_round: + defaultValue: 10.0 + description: Number of boosting iterations. + isOptional: true + parameterType: NUMBER_INTEGER + num_parallel_tree: + defaultValue: 1.0 + description: 'Number of parallel trees constructed during each + + iteration. This option is used to support boosted random forest.' + isOptional: true + parameterType: NUMBER_INTEGER + objective: + description: Required. Specifies the learning task and the learning objective. + parameterType: STRING + one_drop: + defaultValue: 0.0 + description: '[dart booster only] When this flag is enabled, at least one + tree + + is always dropped during the dropout (allows Binomial-plus-one or + + epsilon-dropout from the original DART paper).' + isOptional: true + parameterType: NUMBER_INTEGER + process_type: + defaultValue: default + description: 'A type of boosting process to run. Choices:["default", + + "update"]' + isOptional: true + parameterType: STRING + rate_drop: + defaultValue: 0.0 + description: '[dart booster only] Dropout rate.''' + isOptional: true + parameterType: NUMBER_DOUBLE + refresh_leaf: + defaultValue: 1.0 + description: 'Refresh updater plugin. Update tree leaf and nodes''s stats + if + + True. When it is False, only node stats are updated.' + isOptional: true + parameterType: NUMBER_INTEGER + reg_alpha: + defaultValue: 0.0 + description: L1 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + reg_lambda: + defaultValue: 1.0 + description: L2 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + sample_type: + defaultValue: uniform + description: '[dart booster only] Type of sampling algorithm. + + Choices:["uniform", "weighted"]' + isOptional: true + parameterType: STRING + sampling_method: + defaultValue: uniform + description: The method to use to sample the training instances. + isOptional: true + parameterType: STRING + scale_pos_weight: + defaultValue: 1.0 + description: Control the balance of positive and negative weights. + isOptional: true + parameterType: NUMBER_DOUBLE + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + skip_drop: + defaultValue: 0.0 + description: '[dart booster only] Probability of skipping the dropout procedure + + during a boosting iteration.' + isOptional: true + parameterType: NUMBER_DOUBLE + subsample: + defaultValue: 1.0 + description: Subsample ratio of the training instance. + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: Required. Target column name. + parameterType: STRING + top_k: + defaultValue: 0.0 + description: 'The number of top features to select in greedy and thrifty + feature + + selector. The value of 0 means using all the features.' + isOptional: true + parameterType: NUMBER_INTEGER + total_replica_count: + description: Number of workers. + parameterType: NUMBER_INTEGER + tree_method: + defaultValue: auto + description: 'The tree construction algorithm used in XGBoost. Choices: + + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' + isOptional: true + parameterType: STRING + tweedie_variance_power: + defaultValue: 1.5 + description: 'Parameter that controls the variance of the Tweedie + + distribution.' + isOptional: true + parameterType: NUMBER_DOUBLE + updater: + defaultValue: '' + description: 'A comma separated string defining the sequence of tree updaters + to + + run.' + isOptional: true + parameterType: STRING + weight_column: + defaultValue: '' + description: Weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + job_dir: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + worker_pool_specs: + parameterType: LIST + comp-get-prediction-type-for-xgboost: + executorLabel: exec-get-prediction-type-for-xgboost + inputDefinitions: + parameters: + objective: + description: The XGBoost training objective + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-xgboost-trainer: + executorLabel: exec-xgboost-trainer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + worker_pool_specs: + description: The worker pool specs. + parameterType: LIST + outputDefinitions: + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-generate-xgboost-trainer-worker-pool-specs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _generate_xgboost_trainer_worker_pool_specs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _generate_xgboost_trainer_worker_pool_specs(\n total_replica_count:\ + \ int,\n target_column: str,\n objective: str,\n materialized_train_split:\ + \ dsl.InputPath('MaterializedSplit'),\n materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n\ + \ transform_output: dsl.InputPath('TransformOutput'),\n training_schema_uri:\ + \ dsl.InputPath('DatasetSchema'),\n instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ + \ job_dir: dsl.OutputPath('JobDir'),\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n machine_type: str = 'c2-standard-16',\n\ + \ accelerator_type: str = '',\n accelerator_count: int = 0,\n weight_column:\ + \ str = '',\n eval_metric: str = '',\n num_boost_round: int = 10,\n\ + \ early_stopping_rounds: int = -1,\n base_score: float = 0.5,\n \ + \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ + \ bool = False,\n booster: str = 'gbtree',\n eta: float = 0.3,\n \ + \ gamma: float = 0.0,\n max_depth: int = 6,\n min_child_weight:\ + \ float = 1.0,\n max_delta_step: float = 0.0,\n subsample: float =\ + \ 1.0,\n colsample_bytree: float = 1.0,\n colsample_bylevel: float\ + \ = 1.0,\n colsample_bynode: float = 1.0,\n reg_lambda: float = 1.0,\n\ + \ reg_alpha: float = 0.0,\n tree_method: str = 'auto',\n scale_pos_weight:\ + \ float = 1.0,\n updater: str = '',\n refresh_leaf: int = 1,\n \ + \ process_type: str = 'default',\n grow_policy: str = 'depthwise',\n\ + \ sampling_method: str = 'uniform',\n monotone_constraints: str =\ + \ '',\n interaction_constraints: str = '',\n sample_type: str = 'uniform',\n\ + \ normalize_type: str = 'tree',\n rate_drop: float = 0.0,\n one_drop:\ + \ int = 0,\n skip_drop: float = 0.0,\n num_parallel_tree: int = 1,\n\ + \ feature_selector: str = 'cyclic',\n top_k: int = 0,\n max_cat_to_onehot:\ + \ int = -1,\n max_leaves: int = 0,\n max_bin: int = 256,\n tweedie_variance_power:\ + \ float = 1.5,\n huber_slope: float = 1.0,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('worker_pool_specs', list), # pylint:disable=g-bare-generic\n\ + \ ],\n):\n \"\"\"Generates worker pool specs for XGBoost training.\n\ + \n For single machine XGBoost training, returns one worker pool spec for\ + \ master.\n For distributed XGBoost training, returns two worker pool specs,\ + \ the first one\n for master and the second one for the remaining workers.\n\ + \n Args:\n total_replica_count: Number of workers.\n target_column:\ + \ Required. Target column name.\n objective: Required. Specifies the\ + \ learning task and the learning objective.\n materialized_train_split:\ + \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ + \ Required. The path to the materialized validation\n split.\n transform_output:\ + \ Required. The path to transform output.\n training_schema_uri: Required.\ + \ The path to the training schema.\n instance_baseline: Path to JSON\ + \ file for baseline values.\n job_dir: Job dir path.\n unmanaged_container_model:\ + \ The unmanaged model.\n machine_type: Machine type.\n accelerator_type:\ + \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ + \ Weight column name.\n eval_metric: Evaluation metrics for validation\ + \ data represented as a\n comma-separated string.\n num_boost_round:\ + \ Number of boosting iterations.\n early_stopping_rounds: Activates early\ + \ stopping. Validation error needs to\n decrease at least every early_stopping_rounds\ + \ round(s) to continue\n training.\n base_score: The initial prediction\ + \ score of all instances, global bias.\n disable_default_eval_metric:\ + \ Flag to disable default metric. Set to >0 to\n disable. Default to\ + \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ + \ via iterator number.\n booster: Which booster to use, can be gbtree,\ + \ gblinear or dart. gbtree and\n dart use tree based model while gblinear\ + \ uses linear function.\n eta: Learning rate.\n gamma: Minimum loss\ + \ reduction required to make a further partition on a leaf\n node of\ + \ the tree.\n max_depth: Maximum depth of a tree.\n min_child_weight:\ + \ Minimum sum of instance weight(hessian) needed in a child.\n max_delta_step:\ + \ Maximum delta step we allow each tree's weight estimation to\n be.\n\ + \ subsample: Subsample ratio of the training instance.\n colsample_bytree:\ + \ Subsample ratio of columns when constructing each tree.\n colsample_bylevel:\ + \ Subsample ratio of columns for each split, in each level.\n colsample_bynode:\ + \ Subsample ratio of columns for each node (split).\n reg_lambda: L2\ + \ regularization term on weights.\n reg_alpha: L1 regularization term\ + \ on weights.\n tree_method: The tree construction algorithm used in\ + \ XGBoost. Choices:\n [\"auto\", \"exact\", \"approx\", \"hist\", \"\ + gpu_exact\", \"gpu_hist\"].\n scale_pos_weight: Control the balance of\ + \ positive and negative weights.\n updater: A comma separated string\ + \ defining the sequence of tree updaters to\n run.\n refresh_leaf:\ + \ Refresh updater plugin. Update tree leaf and nodes's stats if\n True.\ + \ When it is False, only node stats are updated.\n process_type: A type\ + \ of boosting process to run. Choices:[\"default\",\n \"update\"]\n\ + \ grow_policy: Controls a way new nodes are added to the tree. Only supported\n\ + \ if tree_method is hist. Choices:[\"depthwise\", \"lossguide\"]\n\ + \ sampling_method: The method to use to sample the training instances.\n\ + \ monotone_constraints: Constraint of variable monotonicity.\n interaction_constraints:\ + \ Constraints for interaction representing permitted\n interactions.\n\ + \ sample_type: [dart booster only] Type of sampling algorithm.\n \ + \ Choices:[\"uniform\", \"weighted\"]\n normalize_type: [dart booster\ + \ only] Type of normalization algorithm,\n Choices:[\"tree\", \"forest\"\ + ]\n rate_drop: [dart booster only] Dropout rate.'\n one_drop: [dart\ + \ booster only] When this flag is enabled, at least one tree\n is always\ + \ dropped during the dropout (allows Binomial-plus-one or\n epsilon-dropout\ + \ from the original DART paper).\n skip_drop: [dart booster only] Probability\ + \ of skipping the dropout procedure\n during a boosting iteration.\n\ + \ num_parallel_tree: Number of parallel trees constructed during each\n\ + \ iteration. This option is used to support boosted random forest.\n\ + \ feature_selector: [linear booster only] Feature selection and ordering\n\ + \ method.\n top_k: The number of top features to select in greedy\ + \ and thrifty feature\n selector. The value of 0 means using all the\ + \ features.\n max_cat_to_onehot: A threshold for deciding whether XGBoost\ + \ should use\n one-hot encoding based split for categorical data.\n\ + \ max_leaves: Maximum number of nodes to be added.\n max_bin: Maximum\ + \ number of discrete bins to bucket continuous features.\n tweedie_variance_power:\ + \ Parameter that controls the variance of the Tweedie\n distribution.\n\ + \ huber_slope: A parameter used for Pseudo-Huber loss to define the delta\n\ + \ term.\n\n Raises:\n ValueError: If accelerator_count <= 0 and\ + \ accelerator_type is specified.\n\n Returns:\n Outputs containing the\ + \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ + \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ + \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ + \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325'\n\ + \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ + \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ + \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ + \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ + \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ + \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ + \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ + \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ + \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ + \ f'--eval_metric={eval_metric}',\n f'--num_boost_round={num_boost_round}',\n\ + \ f'--base_score={base_score}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ + \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ + \ f'--booster={booster}',\n f'--eta={eta}',\n\ + \ f'--gamma={gamma}',\n f'--max_depth={max_depth}',\n\ + \ f'--min_child_weight={min_child_weight}',\n \ + \ f'--max_delta_step={max_delta_step}',\n f'--subsample={subsample}',\n\ + \ f'--colsample_bytree={colsample_bytree}',\n \ + \ f'--colsample_bylevel={colsample_bylevel}',\n f'--colsample_bynode={colsample_bynode}',\n\ + \ f'--lambda={reg_lambda}',\n f'--alpha={reg_alpha}',\n\ + \ f'--tree_method={tree_method}',\n f'--scale_pos_weight={scale_pos_weight}',\n\ + \ f'--refresh_leaf={refresh_leaf}',\n f'--process_type={process_type}',\n\ + \ f'--grow_policy={grow_policy}',\n f'--sampling_method={sampling_method}',\n\ + \ f'--sample_type={sample_type}',\n f'--normalize_type={normalize_type}',\n\ + \ f'--rate_drop={rate_drop}',\n f'--one_drop={one_drop}',\n\ + \ f'--skip_drop={skip_drop}',\n f'--num_parallel_tree={num_parallel_tree}',\n\ + \ f'--feature_selector={feature_selector}',\n \ + \ f'--top_k={top_k}',\n f'--max_leaves={max_leaves}',\n \ + \ f'--max_bin={max_bin}',\n f'--tweedie_variance_power={tweedie_variance_power}',\n\ + \ f'--huber_slope={huber_slope}',\n f'--prediction_docker_uri={prediction_docker_uri}',\n\ + \ '--executor_input={{$.json_escape[1]}}',\n ],\n\ + \ },\n }\n\n # Add optional arguments if set\n if weight_column:\n\ + \ master_worker_pool_spec['container_spec']['args'].append(\n \ + \ f'--weight_column={weight_column}'\n )\n if early_stopping_rounds\ + \ >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--early_stopping_rounds={early_stopping_rounds}'\n )\n if\ + \ updater:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--updater={updater}'\n )\n if monotone_constraints:\n \ + \ master_worker_pool_spec['container_spec']['args'].append(\n f'--monotone_constraints={monotone_constraints}'\n\ + \ )\n if interaction_constraints:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--interaction_constraints={interaction_constraints}'\n )\n\ + \ if max_cat_to_onehot >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--max_cat_to_onehot={max_cat_to_onehot}'\n )\n\n # Add accelerator_type\ + \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ + \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ + \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ + \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ + \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ + \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ + \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ + \n # Build unmanaged_container_model\n model_dir = os.path.join(formatted_job_dir,\ + \ 'model')\n unmanaged_container_model.metadata['containerSpec'] = {\n\ + \ 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': os.path.join(model_dir, 'instance.yaml'),\n\ + \ 'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\ + \ }\n unmanaged_container_model.uri = model_dir\n\n return collections.namedtuple('Outputs',\ + \ ['worker_pool_specs'])(\n worker_pool_specs_lst\n )\n\n" + image: python:3.7 + exec-get-prediction-type-for-xgboost: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_type_for_xgboost + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ + \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ + \ objective: The XGBoost training objective\n\n Returns:\n A string.\ + \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ + \ or objective.startswith('multi'):\n return 'classification'\n elif\ + \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ + \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ + \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ + \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ + \ ' multi:softprob].'\n )\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-xgboost-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"xgboost-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", + "}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The XGBoost training pipeline. + name: automl-tabular-xgboost-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--base_score: + componentInputParameter: base_score + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--booster: + componentInputParameter: booster + pipelinechannel--colsample_bylevel: + componentInputParameter: colsample_bylevel + pipelinechannel--colsample_bynode: + componentInputParameter: colsample_bynode + pipelinechannel--colsample_bytree: + componentInputParameter: colsample_bytree + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_default_eval_metric: + componentInputParameter: disable_default_eval_metric + pipelinechannel--early_stopping_rounds: + componentInputParameter: early_stopping_rounds + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eta: + componentInputParameter: eta + pipelinechannel--eval_metric: + componentInputParameter: eval_metric + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--feature_selector: + componentInputParameter: feature_selector + pipelinechannel--gamma: + componentInputParameter: gamma + pipelinechannel--grow_policy: + componentInputParameter: grow_policy + pipelinechannel--huber_slope: + componentInputParameter: huber_slope + pipelinechannel--interaction_constraints: + componentInputParameter: interaction_constraints + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_bin: + componentInputParameter: max_bin + pipelinechannel--max_cat_to_onehot: + componentInputParameter: max_cat_to_onehot + pipelinechannel--max_delta_step: + componentInputParameter: max_delta_step + pipelinechannel--max_depth: + componentInputParameter: max_depth + pipelinechannel--max_leaves: + componentInputParameter: max_leaves + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--min_child_weight: + componentInputParameter: min_child_weight + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--monotone_constraints: + componentInputParameter: monotone_constraints + pipelinechannel--normalize_type: + componentInputParameter: normalize_type + pipelinechannel--num_boost_round: + componentInputParameter: num_boost_round + pipelinechannel--num_parallel_tree: + componentInputParameter: num_parallel_tree + pipelinechannel--objective: + componentInputParameter: objective + pipelinechannel--one_drop: + componentInputParameter: one_drop + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--process_type: + componentInputParameter: process_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--rate_drop: + componentInputParameter: rate_drop + pipelinechannel--refresh_leaf: + componentInputParameter: refresh_leaf + pipelinechannel--reg_alpha: + componentInputParameter: reg_alpha + pipelinechannel--reg_lambda: + componentInputParameter: reg_lambda + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--sample_type: + componentInputParameter: sample_type + pipelinechannel--sampling_method: + componentInputParameter: sampling_method + pipelinechannel--scale_pos_weight: + componentInputParameter: scale_pos_weight + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--seed_per_iteration: + componentInputParameter: seed_per_iteration + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--skip_drop: + componentInputParameter: skip_drop + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--subsample: + componentInputParameter: subsample + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--top_k: + componentInputParameter: top_k + pipelinechannel--training_accelerator_count: + componentInputParameter: training_accelerator_count + pipelinechannel--training_accelerator_type: + componentInputParameter: training_accelerator_type + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--training_machine_type: + componentInputParameter: training_machine_type + pipelinechannel--training_total_replica_count: + componentInputParameter: training_total_replica_count + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--tree_method: + componentInputParameter: tree_method + pipelinechannel--tweedie_variance_power: + componentInputParameter: tweedie_variance_power + pipelinechannel--updater: + componentInputParameter: updater + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + base_score: + defaultValue: 0.5 + description: The initial prediction score of all instances, global bias. + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + booster: + defaultValue: gbtree + description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree + and + + dart use tree based model while gblinear uses linear function.' + isOptional: true + parameterType: STRING + colsample_bylevel: + defaultValue: 1.0 + description: Subsample ratio of columns for each split, in each level. + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bynode: + defaultValue: 1.0 + description: Subsample ratio of columns for each node (split). + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bytree: + defaultValue: 1.0 + description: Subsample ratio of columns when constructing each tree. + isOptional: true + parameterType: NUMBER_DOUBLE + data_source_bigquery_table_path: + defaultValue: '' + description: The BigQuery data source. + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: The CSV data source. + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + early_stopping_rounds: + defaultValue: -1.0 + description: 'Activates early stopping. Validation error needs to + + decrease at least every early_stopping_rounds round(s) to continue + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eta: + defaultValue: 0.3 + description: Learning rate. + isOptional: true + parameterType: NUMBER_DOUBLE + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + feature_selector: + defaultValue: cyclic + description: '[linear booster only] Feature selection and ordering + + method.' + isOptional: true + parameterType: STRING + gamma: + defaultValue: 0.0 + description: 'Minimum loss reduction required to make a further partition + on a leaf + + node of the tree.' + isOptional: true + parameterType: NUMBER_DOUBLE + grow_policy: + defaultValue: depthwise + description: 'Controls a way new nodes are added to the tree. Only supported + + if tree_method is hist. Choices:["depthwise", "lossguide"]' + isOptional: true + parameterType: STRING + huber_slope: + defaultValue: 1.0 + description: 'A parameter used for Pseudo-Huber loss to define the delta + + term.' + isOptional: true + parameterType: NUMBER_DOUBLE + interaction_constraints: + defaultValue: '' + description: 'Constraints for interaction representing permitted + + interactions.' + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_bin: + defaultValue: 256.0 + description: Maximum number of discrete bins to bucket continuous features. + isOptional: true + parameterType: NUMBER_INTEGER + max_cat_to_onehot: + defaultValue: -1.0 + description: 'A threshold for deciding whether XGBoost should use + + one-hot encoding based split for categorical data.' + isOptional: true + parameterType: NUMBER_INTEGER + max_delta_step: + defaultValue: 0.0 + description: 'Maximum delta step we allow each tree''s weight estimation to + + be.' + isOptional: true + parameterType: NUMBER_DOUBLE + max_depth: + defaultValue: 6.0 + description: Maximum depth of a tree. + isOptional: true + parameterType: NUMBER_INTEGER + max_leaves: + defaultValue: 0.0 + description: Maximum number of nodes to be added. + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + min_child_weight: + defaultValue: 1.0 + description: Minimum sum of instance weight(hessian) needed in a child. + isOptional: true + parameterType: NUMBER_DOUBLE + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + monotone_constraints: + defaultValue: '' + description: Constraint of variable monotonicity. + isOptional: true + parameterType: STRING + normalize_type: + defaultValue: tree + description: '[dart booster only] Type of normalization algorithm, + + Choices:["tree", "forest"]' + isOptional: true + parameterType: STRING + num_boost_round: + defaultValue: 10.0 + description: Number of boosting iterations. + isOptional: true + parameterType: NUMBER_INTEGER + num_parallel_tree: + defaultValue: 1.0 + description: 'Number of parallel trees constructed during each + + iteration. This option is used to support boosted random forest.' + isOptional: true + parameterType: NUMBER_INTEGER + objective: + description: 'Specifies the learning task and the learning objective. Must + be + + one of [reg:squarederror, reg:squaredlogerror, + + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + + binary:logistic, multi:softprob].' + parameterType: STRING + one_drop: + defaultValue: 0.0 + description: '[dart booster only] When this flag is enabled, at least one + tree + + is always dropped during the dropout (allows Binomial-plus-one or + + epsilon-dropout from the original DART paper).' + isOptional: true + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + process_type: + defaultValue: default + description: 'A type of boosting process to run. Choices:["default", + + "update"]' + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + rate_drop: + defaultValue: 0.0 + description: '[dart booster only] Dropout rate.''' + isOptional: true + parameterType: NUMBER_DOUBLE + refresh_leaf: + defaultValue: 1.0 + description: 'Refresh updater plugin. Update tree leaf and nodes''s stats + if + + True. When it is False, only node stats are updated.' + isOptional: true + parameterType: NUMBER_INTEGER + reg_alpha: + defaultValue: 0.0 + description: L1 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + reg_lambda: + defaultValue: 1.0 + description: L2 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + sample_type: + defaultValue: uniform + description: '[dart booster only] Type of sampling algorithm. + + Choices:["uniform", "weighted"]' + isOptional: true + parameterType: STRING + sampling_method: + defaultValue: uniform + description: The method to use to sample the training instances. + isOptional: true + parameterType: STRING + scale_pos_weight: + defaultValue: 1.0 + description: Control the balance of positive and negative weights. + isOptional: true + parameterType: NUMBER_DOUBLE + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + skip_drop: + defaultValue: 0.0 + description: '[dart booster only] Probability of skipping the dropout procedure + + during a boosting iteration.' + isOptional: true + parameterType: NUMBER_DOUBLE + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + subsample: + defaultValue: 1.0 + description: Subsample ratio of the training instance. + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + top_k: + defaultValue: 0.0 + description: 'The number of top features to select in greedy and thrifty feature + + selector. The value of 0 means using all the features.' + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + training_machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + training_total_replica_count: + defaultValue: 1.0 + description: Number of workers. + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + tree_method: + defaultValue: auto + description: 'The tree construction algorithm used in XGBoost. Choices: + + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' + isOptional: true + parameterType: STRING + tweedie_variance_power: + defaultValue: 1.5 + description: 'Parameter that controls the variance of the Tweedie + + distribution.' + isOptional: true + parameterType: NUMBER_DOUBLE + updater: + defaultValue: '' + description: 'A comma separated string defining the sequence of tree updaters + to + + run.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py new file mode 100644 index 0000000000..6dbcd85caf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GA AutoML forecasting components.""" + +from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp + +__all__ = [ + 'ProphetTrainerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml new file mode 100644 index 0000000000..14c7dd13b2 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml @@ -0,0 +1,1159 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-bqml-arima-prediction +# Description: Forecasts using a BQML ARIMA_PLUS model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# generate_explanation: bool [Default: False] +# location: str +# model_name: str +# project: str +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-dataset-2: + executorLabel: exec-bigquery-create-dataset-2 + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-create-dataset-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-dataset-2 + dependentTasks: + - get-table-location + - maybe-replace-with-default + - validate-inputs + inputs: + parameters: + dataset: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + exists_ok: + runtimeValue: + constant: 1.0 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-prediction-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - get-first-valid + - get-model-metadata + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--get-first-valid-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-first-valid + pipelinechannel--get-model-metadata-forecast_horizon: + taskOutputParameter: + outputParameterKey: forecast_horizon + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-target_column: + taskOutputParameter: + outputParameterKey: target_column + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-time_column: + taskOutputParameter: + outputParameterKey: time_column + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-time_series_identifier_column: + taskOutputParameter: + outputParameterKey: time_series_identifier_column + producerTask: get-model-metadata + pipelinechannel--model_name: + componentInputParameter: pipelinechannel--model_name + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n target.*,\n STRUCT(prediction.time_series_adjusted_data\ + \ AS value)\n AS predicted_{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}},\n\ + \ prediction.* EXCEPT (\n {{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}},\n\ + \ time_series_timestamp,\n time_series_adjusted_data\n\ + \ ),\n FROM\n ML.EXPLAIN_FORECAST(\n \ + \ MODEL `{{$.inputs.parameters['pipelinechannel--model_name']}}`,\n\ + \ STRUCT({{$.inputs.parameters['pipelinechannel--get-model-metadata-forecast_horizon']}}\ + \ AS horizon)) AS prediction\n RIGHT JOIN `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\ + \ AS target\n ON\n CAST(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ + \ AS STRING)\n = CAST(prediction.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ + \ AS STRING)\n AND TIMESTAMP(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_column']}})\ + \ = prediction.time_series_timestamp\n WHERE target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}}\ + \ IS NULL\n " + taskInfo: + name: predictions-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset-2 + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' + table_id: + runtimeValue: + constant: predictions_{{$.pipeline_job_uuid}} + taskInfo: + name: build-job-configuration-query + get-first-valid: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-first-valid + dependentTasks: + - load-table-from-uri + inputs: + parameters: + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + pipelinechannel--load-table-from-uri-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: load-table-from-uri + values: + runtimeValue: + constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", + "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' + taskInfo: + name: get-first-valid + get-model-metadata: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-model-metadata + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + model: + componentInputParameter: pipelinechannel--model_name + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-model-metadata + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + load-table-from-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-table-from-uri + dependentTasks: + - bigquery-create-dataset + - get-table-location + inputs: + parameters: + destination: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + source_format: + runtimeValue: + constant: CSV + source_uris: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: load-table-from-uri + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + runtimeValue: + constant: prediction_{{$.pipeline_job_uuid}} + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + source_model_uri: + componentInputParameter: pipelinechannel--model_name + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + comp-get-first-valid: + executorLabel: exec-get-first-valid + inputDefinitions: + parameters: + values: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-model-metadata: + executorLabel: exec-get-model-metadata + inputDefinitions: + parameters: + location: + parameterType: STRING + model: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + forecast_horizon: + parameterType: NUMBER_INTEGER + target_column: + parameterType: STRING + time_column: + parameterType: STRING + time_series_identifier_column: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-load-table-from-uri: + executorLabel: exec-load-table-from-uri + inputDefinitions: + parameters: + destination: + description: Table into which data is to be loaded. + parameterType: STRING + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + source_format: + defaultValue: CSV + description: 'The file format for the files being imported. Only CSV is + + supported.' + isOptional: true + parameterType: STRING + source_uris: + description: 'URIs of data files to be loaded; in format + + gs:///.' + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-dataset-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-get-first-valid: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_first_valid + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ + \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n for value in json.loads(values):\n if value:\n return value\n\ + \ raise ValueError('No valid values.')\n\n" + image: python:3.7-slim + exec-get-model-metadata: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_model_metadata + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_model_metadata(\n project: str,\n location: str,\n\ + \ model: str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('time_column',\ + \ str),\n ('time_series_identifier_column', str),\n ('target_column',\ + \ str),\n ('forecast_horizon', int),\n ],\n):\n \"\"\"Retrieves\ + \ training options for a BQML model.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n options\ + \ = client.get_model(model).training_runs[0].training_options\n return\ + \ collections.namedtuple(\n 'Outputs', [\n 'time_column',\n\ + \ 'time_series_identifier_column',\n 'target_column',\n\ + \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ + \ options.time_series_id_column,\n options.time_series_data_column,\n\ + \ options.horizon,\n )\n\n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-load-table-from-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_table_from_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ + \ source_uris: str,\n destination: str,\n source_format: str =\ + \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ + \ project: The GCP project.\n location: The GCP region.\n source_uris:\ + \ URIs of data files to be loaded; in format\n gs:///.\n\ + \ destination: Table into which data is to be loaded.\n source_format:\ + \ The file format for the files being imported. Only CSV is\n supported.\n\ + \n Returns:\n The destination table containing imported data.\n \"\"\ + \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ + \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ + \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ + \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ + \ destination=destination,\n project=project,\n location=location,\n\ + \ job_config=job_config).result()\n return destination\n\n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Forecasts using a BQML ARIMA_PLUS model. + name: automl-tabular-bqml-arima-prediction +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_name: + componentInputParameter: model_name + pipelinechannel--project: + componentInputParameter: project + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, a resource will be created under a new dataset in the project.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + generate_explanation: + defaultValue: false + description: 'Generate explanation along with the batch prediction + + results. This will cause the batch prediction output to include + + explanations.' + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region for Vertex AI. + parameterType: STRING + model_name: + description: ARIMA_PLUS BQML model URI. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml new file mode 100644 index 0000000000..1d23bd2993 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -0,0 +1,5085 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-bqml-arima-train +# Description: Trains a BQML ARIMA_PLUS model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_granularity_unit: str +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# forecast_horizon: int +# location: str +# max_order: int [Default: 5.0] +# override_destination: bool [Default: False] +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: True] +# target_column: str +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# window_column: str [Default: ''] +# window_max_count: int [Default: -1.0] +# window_stride_length: int [Default: -1.0] +# Outputs: +# create-metrics-artifact-evaluation_metrics: system.Metrics +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-dataset-2: + executorLabel: exec-bigquery-create-dataset-2 + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-model-job: + executorLabel: exec-bigquery-create-model-job + inputDefinitions: + parameters: + job_configuration_query: + defaultValue: {} + description: 'A json formatted string describing the rest of the job configuration. + + For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: "The labels associated with this job. You can\nuse these to\ + \ organize and group your jobs. Label keys and values can\nbe no longer\ + \ than 63 characters, can only containlowercase letters,\nnumeric characters,\ + \ underscores and dashes. International characters\nare allowed. Label\ + \ values are optional. Label keys must start with a\nletter and each label\ + \ in the list must have a different key.\n Example: { \"name\": \"wrench\"\ + , \"mass\": \"1.3kg\", \"count\": \"3\" }." + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location of the job to create the BigQuery model. If not set, + default to + + `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run BigQuery model creation job. + parameterType: STRING + query: + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'Query parameters for standard SQL queries. + + If query_parameters are both specified in here and in + + job_configuration_query, the value in here will override the other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.BQMLModel + schemaVersion: 0.0.1 + description: Describes the model which is created. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-list-rows: + executorLabel: exec-bigquery-list-rows + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A google.BQTable artifact. + parameters: + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-bigquery-list-rows-2: + executorLabel: exec-bigquery-list-rows-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A google.BQTable artifact. + parameters: + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-2: + executorLabel: exec-bigquery-query-job-2 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-3: + executorLabel: exec-bigquery-query-job-3 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-4: + executorLabel: exec-bigquery-query-job-4 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-5: + executorLabel: exec-bigquery-query-job-5 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-2: + executorLabel: exec-build-job-configuration-query-2 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-3: + executorLabel: exec-build-job-configuration-query-3 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-4: + executorLabel: exec-build-job-configuration-query-4 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-5: + executorLabel: exec-build-job-configuration-query-5 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-6: + executorLabel: exec-build-job-configuration-query-6 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-serialized-query-parameters: + executorLabel: exec-build-serialized-query-parameters + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-build-serialized-query-parameters-2: + executorLabel: exec-build-serialized-query-parameters-2 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-build-serialized-query-parameters-3: + executorLabel: exec-build-serialized-query-parameters-3 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-cond: + executorLabel: exec-cond + inputDefinitions: + parameters: + false_str: + parameterType: STRING + predicate: + parameterType: BOOLEAN + true_str: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: create-metrics-artifact + tasks: + bigquery-list-rows: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-list-rows + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + parameters: + location: + componentInputParameter: pipelinechannel--get-table-location-Output + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: bigquery-list-rows + bigquery-list-rows-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-list-rows-2 + dependentTasks: + - bigquery-query-job-4 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-4 + parameters: + location: + componentInputParameter: pipelinechannel--get-table-location-Output + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: bigquery-list-rows-2 + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - build-serialized-query-parameters + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n time_series_windows AS (\n \ + \ SELECT\n FIRST_VALUE({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ OVER (horizon) AS start_time,\n COUNT(*) OVER (horizon)\ + \ AS count,\n FIRST_VALUE(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ OVER (horizon) AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n WINDOW horizon AS (\n \ + \ PARTITION BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}}\n\ + \ ROWS BETWEEN 0 PRECEDING AND @forecast_horizon FOLLOWING)\n\ + \ )\n SELECT\n start_time,\n TIMESTAMP(DATETIME_ADD(\n\ + \ DATETIME(start_time),\n INTERVAL @forecast_horizon\ + \ {{$.inputs.parameters['pipelinechannel--data_granularity_unit']}}\n\ + \ )) AS end_time,\n SUM(count) AS count,\n \ + \ ROW_NUMBER() OVER () AS window_number,\n FROM time_series_windows\n\ + \ WHERE window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\n\ + \ GROUP BY start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters + taskInfo: + name: create-eval-windows-table + bigquery-query-job-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.metrics`\ + \ (\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n MAE FLOAT64,\n MSE\ + \ FLOAT64,\n MAPE FLOAT64,\n prediction_count\ + \ INT64\n )\n " + taskInfo: + name: create-tmp-metrics-table + bigquery-query-job-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-3 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.evaluated_examples`\ + \ (\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ STRING,\n {{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n {{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ FLOAT64,\n predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ STRUCT\n )\n " + taskInfo: + name: create-evaluated-examples-table + bigquery-query-job-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-4 + dependentTasks: + - build-job-configuration-query-5 + - for-loop-3 + - table-to-uri + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-5 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n SUM(MAE * prediction_count) /\ + \ SUM(prediction_count) AS MAE,\n SQRT(SUM(MSE * prediction_count)\ + \ / SUM(prediction_count)) AS RMSE,\n SUM(MAPE * prediction_count)\ + \ / SUM(prediction_count) AS MAPE,\n FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}}`\n\ + \ " + taskInfo: + name: create-backtest-table + bigquery-query-job-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-5 + dependentTasks: + - build-job-configuration-query-6 + - for-loop-3 + - table-to-uri-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-6 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--table-to-uri-2-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: SELECT * FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}` + taskInfo: + name: export-evaluated-examples-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: windows + taskInfo: + name: build-job-configuration-query + build-job-configuration-query-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-5 + dependentTasks: + - cond + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--cond-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: cond + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: final_metrics + write_disposition: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' + taskInfo: + name: build-job-configuration-query-5 + build-job-configuration-query-6: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-6 + dependentTasks: + - cond + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--cond-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: cond + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' + table_id: + runtimeValue: + constant: evaluated_examples + write_disposition: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' + taskInfo: + name: build-job-configuration-query-6 + build-serialized-query-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters + inputs: + parameters: + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecast_horizon_off_by_one: + runtimeValue: + constant: 1.0 + splits: + runtimeValue: + constant: + - TEST + taskInfo: + name: build-serialized-query-parameters + cond: + cachingOptions: + enableCache: true + componentRef: + name: comp-cond + inputs: + parameters: + false_str: + runtimeValue: + constant: WRITE_EMPTY + predicate: + componentInputParameter: pipelinechannel--override_destination + true_str: + runtimeValue: + constant: WRITE_TRUNCATE + taskInfo: + name: cond + create-metrics-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-metrics-artifact + dependentTasks: + - bigquery-list-rows-2 + inputs: + parameters: + metrics_rows: + taskOutputParameter: + outputParameterKey: Output + producerTask: bigquery-list-rows-2 + taskInfo: + name: create-metrics-artifact + for-loop-3: + componentRef: + name: comp-for-loop-3 + dependentTasks: + - bigquery-list-rows + - table-to-uri + - table-to-uri-2 + inputs: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--bigquery-list-rows-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bigquery-list-rows + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--get-table-location-Output: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--max_order: + componentInputParameter: pipelinechannel--max_order + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--table-to-uri-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri + pipelinechannel--table-to-uri-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri + pipelinechannel--table-to-uri-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + iteratorPolicy: + parallelismLimit: 50 + parameterIterator: + itemInput: pipelinechannel--bigquery-list-rows-Output-loop-item + items: + inputParameter: pipelinechannel--bigquery-list-rows-Output + taskInfo: + name: for-loop-3 + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-2 + taskInfo: + name: table-to-uri + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - bigquery-query-job-3 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-3 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-2-project_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-project_id: + parameterType: STRING + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--get-fte-suffix-Output: + parameterType: STRING + pipelinechannel--get-table-location-Output: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--override_destination: + parameterType: BOOLEAN + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-create-metrics-artifact: + executorLabel: exec-create-metrics-artifact + inputDefinitions: + parameters: + metrics_rows: + parameterType: LIST + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: create-metrics-artifact-evaluation_metrics + producerSubtask: condition-2 + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-create-dataset-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-dataset-2 + dependentTasks: + - get-table-location + - maybe-replace-with-default + - validate-inputs + inputs: + parameters: + dataset: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + exists_ok: + runtimeValue: + constant: 1.0 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-export-dataset + bigquery-create-model-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-model-job + dependentTasks: + - bigquery-create-dataset-2 + - build-serialized-query-parameters-3 + - get-fte-suffix + - get-table-location + inputs: + parameters: + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.model_{{$.pipeline_job_uuid}}`\n\ + \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ + \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ + \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ + \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ + \ horizon = @forecast_horizon,\n auto_arima\ + \ = True,\n auto_arima_max_order = @max_order,\n \ + \ data_frequency = @data_granularity_unit,\n holiday_region\ + \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ + \ adjust_step_changes = True,\n decompose_time_series\ + \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ < @start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-3 + taskInfo: + name: create-serving-model + build-serialized-query-parameters-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters-3 + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + max_order: + componentInputParameter: pipelinechannel--max_order + splits: + runtimeValue: + constant: + - TRAIN + - VALIDATE + - TEST + taskInfo: + name: build-serialized-query-parameters-3 + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bigquery-create-dataset + - bigquery-create-dataset-2 + - get-fte-suffix + - get-table-location + inputs: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--get-table-location-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--max_order: + componentInputParameter: pipelinechannel--max_order + pipelinechannel--override_destination: + componentInputParameter: pipelinechannel--override_destination + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - bigquery-create-dataset-2 + inputs: + parameters: + autodetect_csv_schema: + runtimeValue: + constant: 1.0 + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + forecasting_apply_windowing: + runtimeValue: + constant: 0.0 + forecasting_context_window: + runtimeValue: + constant: 0.0 + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + runtimeValue: + constant: {} + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + taskInfo: + name: feature-transform-engine + get-fte-suffix: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-fte-suffix + dependentTasks: + - bigquery-create-dataset-2 + - feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + fte_table: + runtimeValue: + constant: fte_time_series_output + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-fte-suffix + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + runtimeValue: + constant: export_{{$.pipeline_job_uuid}} + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + window_column: + componentInputParameter: pipelinechannel--window_column + window_max_count: + componentInputParameter: pipelinechannel--window_max_count + window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--override_destination: + parameterType: BOOLEAN + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--window_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-for-loop-3: + dag: + tasks: + build-job-configuration-query-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-2 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + taskInfo: + name: build-job-configuration-query-2 + build-job-configuration-query-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-3 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-dataset_id'']}}' + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + pipelinechannel--table-to-uri-dataset_id: + componentInputParameter: pipelinechannel--table-to-uri-dataset_id + pipelinechannel--table-to-uri-project_id: + componentInputParameter: pipelinechannel--table-to-uri-project_id + pipelinechannel--table-to-uri-table_id: + componentInputParameter: pipelinechannel--table-to-uri-table_id + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_APPEND + taskInfo: + name: build-job-configuration-query-3 + build-job-configuration-query-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-4 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + pipelinechannel--table-to-uri-2-dataset_id: + componentInputParameter: pipelinechannel--table-to-uri-2-dataset_id + pipelinechannel--table-to-uri-2-project_id: + componentInputParameter: pipelinechannel--table-to-uri-2-project_id + pipelinechannel--table-to-uri-2-table_id: + componentInputParameter: pipelinechannel--table-to-uri-2-table_id + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_APPEND + taskInfo: + name: build-job-configuration-query-4 + build-serialized-query-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters-2 + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + max_order: + componentInputParameter: pipelinechannel--max_order + splits: + runtimeValue: + constant: + - TRAIN + - VALIDATE + - TEST + window: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + taskInfo: + name: build-serialized-query-parameters-2 + get-value: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-value + inputs: + parameters: + d: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + key: + runtimeValue: + constant: window_number + taskInfo: + name: get_window_number + get-window-query-priority: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-window-query-priority + inputs: + parameters: + max_interactive: + runtimeValue: + constant: 50.0 + window: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + taskInfo: + name: get-window-query-priority + query-with-retry: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry + dependentTasks: + - build-job-configuration-query-2 + - build-serialized-query-parameters-2 + - get-value + inputs: + parameters: + destination_uri: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.model_{{$.inputs.parameters[''pipelinechannel--get-value-Output'']}}' + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-2 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--get-value-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-value + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.model_{{$.inputs.parameters['pipelinechannel--get-value-Output']}}`\n\ + \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ + \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ + \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ + \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ + \ horizon = @forecast_horizon,\n auto_arima\ + \ = True,\n auto_arima_max_order = @max_order,\n \ + \ data_frequency = @data_granularity_unit,\n holiday_region\ + \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ + \ adjust_step_changes = True,\n decompose_time_series\ + \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ < @start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: create-eval-model + query-with-retry-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry-2 + dependentTasks: + - build-job-configuration-query-3 + - build-serialized-query-parameters-2 + - query-with-retry + inputs: + parameters: + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-3 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--query-with-retry-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: query-with-retry + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ AVG(mean_absolute_error) AS MAE,\n AVG(mean_squared_error)\ + \ AS MSE,\n AVG(mean_absolute_percentage_error) AS MAPE,\n\ + \ @prediction_count AS prediction_count,\n FROM ML.EVALUATE(\n\ + \ MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ + \ TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`,\n\ + \ STRUCT(True AS perform_aggregation, {{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ + \ as horizon))\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: append-evaluation-metrics + query-with-retry-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry-3 + dependentTasks: + - build-job-configuration-query-4 + - build-serialized-query-parameters-2 + - query-with-retry + inputs: + parameters: + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-4 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--query-with-retry-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: query-with-retry + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n CAST(actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING)\n AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ CAST(actual.{{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ AS FLOAT64) AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ STRUCT(pred.forecast_value AS value) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM\n ML.FORECAST(\n MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ + \ STRUCT({{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ + \ AS horizon)) pred\n JOIN `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\ + \ actual\n ON\n pred.forecast_timestamp = TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\n\ + \ AND pred.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ = actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: append-evaluated-examples + inputDefinitions: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-2-project_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-project_id: + parameterType: STRING + pipelinechannel--bigquery-list-rows-Output: + parameterType: LIST + pipelinechannel--bigquery-list-rows-Output-loop-item: + parameterType: STRUCT + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--get-fte-suffix-Output: + parameterType: STRING + pipelinechannel--get-table-location-Output: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--table-to-uri-2-dataset_id: + parameterType: STRING + pipelinechannel--table-to-uri-2-project_id: + parameterType: STRING + pipelinechannel--table-to-uri-2-table_id: + parameterType: STRING + pipelinechannel--table-to-uri-dataset_id: + parameterType: STRING + pipelinechannel--table-to-uri-project_id: + parameterType: STRING + pipelinechannel--table-to-uri-table_id: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + comp-get-fte-suffix: + executorLabel: exec-get-fte-suffix + inputDefinitions: + parameters: + bigquery_staging_full_dataset_id: + parameterType: STRING + fte_table: + parameterType: STRING + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-value: + executorLabel: exec-get-value + inputDefinitions: + parameters: + d: + parameterType: STRUCT + key: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-window-query-priority: + executorLabel: exec-get-window-query-priority + inputDefinitions: + parameters: + max_interactive: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + window: + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry: + executorLabel: exec-query-with-retry + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry-2: + executorLabel: exec-query-with-retry-2 + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry-3: + executorLabel: exec-query-with-retry-3 + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-dataset-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-model-job: + container: + args: + - --type + - BigqueryCreateModelJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.create_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-list-rows: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_list_rows + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ + \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ + \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ + \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ + \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ + \ Rows are keyed by column, and\n all values are stored as strings.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n metadata\ + \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ + \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ + \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ + \ return result\n\n" + image: python:3.7-slim + exec-bigquery-list-rows-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_list_rows + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ + \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ + \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ + \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ + \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ + \ Rows are keyed by column, and\n all values are stored as strings.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n metadata\ + \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ + \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ + \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ + \ return result\n\n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-2: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-3: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-4: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-5: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-4: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-5: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-6: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-cond: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - cond + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ + \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ + \ return true_str if predicate else false_str\n\n" + image: python:3.7-slim + exec-create-metrics-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - create_metrics_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_metrics_artifact(\n metrics_rows: List[Dict[str, str]],\n\ + \ evaluation_metrics: dsl.Output[dsl.Metrics],\n) -> None:\n \"\"\"\ + Converts the rows of a metrics table into an Artifact.\"\"\"\n # Use the\ + \ Vertex Eval component's Metrics metadata naming from\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/metadata/schema/google/artifact_schema.py?cl=467006447&l=344\n\ + \ metric_name_map = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE':\ + \ 'rootMeanSquaredError',\n 'MAPE': 'meanAbsolutePercentageError',\n\ + \ }\n metrics = {metric_name_map[k]: v for k, v in dict(metrics_rows[0]).items()}\n\ + \ evaluation_metrics.metadata = metrics\n\n" + image: python:3.7-slim + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-get-fte-suffix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_fte_suffix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ + \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ + \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n for\ + \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ + \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ + \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ + \n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-get-value: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_value + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ + \n" + image: python:3.7-slim + exec-get-window-query-priority: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_window_query_priority + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_window_query_priority(\n window: Dict[str, str],\n \ + \ max_interactive: int = 100,\n) -> str:\n \"\"\"Returns a query priority\ + \ depending on the window number.\"\"\"\n if int(window['window_number'])\ + \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ + \n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-query-with-retry: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-query-with-retry-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-query-with-retry-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Trains a BQML ARIMA_PLUS model. + name: automl-tabular-bqml-arima-train +root: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: create-metrics-artifact-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_granularity_unit: + componentInputParameter: data_granularity_unit + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_order: + componentInputParameter: max_order + pipelinechannel--override_destination: + componentInputParameter: override_destination + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--window_column: + componentInputParameter: window_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, resources will be created under a new dataset in the project. + + Unlike in Vertex Forecasting, all resources will be given hardcoded names + + under this dataset, and the model artifact will also be exported here.' + isOptional: true + parameterType: STRING + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_order: + defaultValue: 5.0 + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + override_destination: + defaultValue: false + description: 'Whether to overwrite the metrics and evaluated + + examples tables if they already exist. If this is False and the tables + + exist, this pipeline will fail.' + isOptional: true + parameterType: BOOLEAN + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + defaultValue: '' + description: 'Name of the column that should be used to filter input rows. + + The column should contain either booleans or string booleans; if the value + + of the row is True, generate a sliding window from that row.' + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: -1.0 + description: 'Number of rows that should be used to generate input + + examples. If the total row count is larger than this number, the input + + data will be randomly sampled to hit the count.' + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + defaultValue: -1.0 + description: 'Step length used to generate input examples. Every + + window_stride_length rows will be used to generate a sliding window.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml new file mode 100644 index 0000000000..6cdb273900 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -0,0 +1,2150 @@ +# PIPELINE DEFINITION +# Name: prophet-predict +# Description: Creates a batch prediction using a Prophet model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# location: str +# machine_type: str [Default: 'n1-standard-2'] +# max_num_workers: int [Default: 10.0] +# model_name: str +# project: str +# target_column: str +# time_column: str +# time_series_identifier_column: str +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-2: + executorLabel: exec-bigquery-query-job-2 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-2: + executorLabel: exec-build-job-configuration-query-2 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - get-first-valid + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--get-first-valid-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-first-valid + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n base_data AS (\n SELECT\ + \ * FROM `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\n\ + \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ \n \n \n FROM base_data\n GROUP\ + \ BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + taskInfo: + name: remove-feature-columns + bigquery-query-job-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-2 + dependentTasks: + - build-job-configuration-query-2 + - get-table-location-2 + - table-to-uri-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-2 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location-2 + pipelinechannel--table-to-uri-2-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n predictions AS (\n SELECT\n\ + \ {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ JSON_QUERY_ARRAY(prediction, '$.{{$.inputs.parameters['pipelinechannel--time_column']}}')\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ JSON_EXTRACT(\n prediction,\n \ + \ '$.predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}'\n\ + \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ JSON_QUERY_ARRAY(\n prediction,\n \ + \ '$.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}'\n\ + \ ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}`\n\ + \ )\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ + \"',\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\n\ + \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ + \"',\n {{$.inputs.parameters['pipelinechannel--time_column']}}[SAFE_OFFSET(index)]\n\ + \ ) AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ STRUCT(\n CAST(predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}[SAFE_OFFSET(index)]\ + \ AS FLOAT64)\n AS value\n ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\n\ + \ FROM predictions\n CROSS JOIN\n UNNEST(GENERATE_ARRAY(0,\ + \ ARRAY_LENGTH({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ - 1)) AS index\n " + taskInfo: + name: create-predictions-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: data + write_disposition: + runtimeValue: + constant: WRITE_EMPTY + taskInfo: + name: build-job-configuration-query + build-job-configuration-query-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-2 + dependentTasks: + - table-to-uri-2 + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' + pipelinechannel--table-to-uri-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri-2 + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_TRUNCATE + taskInfo: + name: build-job-configuration-query-2 + get-first-valid: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-first-valid + dependentTasks: + - load-table-from-uri + inputs: + parameters: + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + pipelinechannel--load-table-from-uri-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: load-table-from-uri + values: + runtimeValue: + constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", + "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' + taskInfo: + name: get-first-valid + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + get-table-location-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location-2 + dependentTasks: + - table-to-uri-2 + inputs: + parameters: + project: + componentInputParameter: pipelinechannel--project + table: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + taskInfo: + name: get-table-location-2 + load-table-from-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-table-from-uri + dependentTasks: + - bigquery-create-dataset + - get-table-location + inputs: + parameters: + destination: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + source_format: + runtimeValue: + constant: CSV + source_uris: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: load-table-from-uri + make-vertex-model-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-make-vertex-model-artifact + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + model_resource_name: + componentInputParameter: pipelinechannel--model_name + taskInfo: + name: make-vertex-model-artifact + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + componentInputParameter: pipelinechannel--project + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + dependentTasks: + - make-vertex-model-artifact + - maybe-replace-with-default + - table-to-uri + inputs: + artifacts: + model: + taskOutputArtifact: + outputArtifactKey: vertex_model + producerTask: make-vertex-model-artifact + parameters: + bigquery_destination_output_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--maybe-replace-with-default-Output']}} + bigquery_source_input_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--machine_type + max_replica_count: + componentInputParameter: pipelinechannel--max_num_workers + pipelinechannel--maybe-replace-with-default-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-batch-predict + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + taskInfo: + name: table-to-uri + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + taskInfo: + name: table-to-uri-2 + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--machine_type: + parameterType: STRING + pipelinechannel--max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--model_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + comp-get-first-valid: + executorLabel: exec-get-first-valid + inputDefinitions: + parameters: + values: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location-2: + executorLabel: exec-get-table-location-2 + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-load-table-from-uri: + executorLabel: exec-load-table-from-uri + inputDefinitions: + parameters: + destination: + description: Table into which data is to be loaded. + parameterType: STRING + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + source_format: + defaultValue: CSV + description: 'The file format for the files being imported. Only CSV is + + supported.' + isOptional: true + parameterType: STRING + source_uris: + description: 'URIs of data files to be loaded; in format + + gs:///.' + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-make-vertex-model-artifact: + executorLabel: exec-make-vertex-model-artifact + inputDefinitions: + parameters: + location: + parameterType: STRING + model_resource_name: + parameterType: STRING + outputDefinitions: + artifacts: + vertex_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-2: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-get-first-valid: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_first_valid + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ + \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n for value in json.loads(values):\n if value:\n return value\n\ + \ raise ValueError('No valid values.')\n\n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-get-table-location-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-load-table-from-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_table_from_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ + \ source_uris: str,\n destination: str,\n source_format: str =\ + \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ + \ project: The GCP project.\n location: The GCP region.\n source_uris:\ + \ URIs of data files to be loaded; in format\n gs:///.\n\ + \ destination: Table into which data is to be loaded.\n source_format:\ + \ The file format for the files being imported. Only CSV is\n supported.\n\ + \n Returns:\n The destination table containing imported data.\n \"\"\ + \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ + \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ + \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ + \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ + \ destination=destination,\n project=project,\n location=location,\n\ + \ job_config=job_config).result()\n return destination\n\n" + image: python:3.7-slim + exec-make-vertex-model-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - make_vertex_model_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef make_vertex_model_artifact(\n location: str,\n model_resource_name:\ + \ str,\n vertex_model: dsl.Output[dsl.Artifact],\n) -> None:\n \"\"\"\ + Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ + \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ + \ f'/v1/{model_resource_name}')\n\n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Creates a batch prediction using a Prophet model. + name: prophet-predict +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--machine_type: + componentInputParameter: machine_type + pipelinechannel--max_num_workers: + componentInputParameter: max_num_workers + pipelinechannel--model_name: + componentInputParameter: model_name + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, resources will be created under a new dataset in the project. + + Unlike in Vertex Forecasting, all resources will be given hardcoded names + + under this dataset, and the model artifact will also be exported here.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region for Vertex AI. + parameterType: STRING + machine_type: + defaultValue: n1-standard-2 + description: The machine type used for batch prediction. + isOptional: true + parameterType: STRING + max_num_workers: + defaultValue: 10.0 + description: The max number of workers used for batch prediction. + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + description: 'The name of the Model resource, in a form of + + projects/{project}/locations/{location}/models/{model}.' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py new file mode 100644 index 0000000000..7c3bb6111b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -0,0 +1,211 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Prophet trainer component spec.""" + +from typing import Optional +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Output + + +# pylint: disable=g-doc-args,unused-argument +@dsl.container_component +def prophet_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + time_column: str, + time_series_identifier_column: str, + forecast_horizon: int, + window_column: str, + data_granularity_unit: str, + predefined_split_column: str, + source_bigquery_uri: str, + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], + evaluated_examples_directory: Output[Artifact], + optimization_objective: Optional[str] = 'rmse', + max_num_trials: Optional[int] = 6, + encryption_spec_key_name: Optional[str] = '', + dataflow_max_num_workers: Optional[int] = 10, + dataflow_machine_type: Optional[str] = 'n1-standard-1', + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_service_account: Optional[str] = '', + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, +): + # fmt: off + """Trains and tunes one Prophet model per time series using Dataflow. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the + time series. + time_series_identifier_column: Name of the column that identifies + the time series. + target_column: Name of the column that the model is to predict + values for. + forecast_horizon: The number of time periods into the future for + which forecasts will be created. Future periods start after the latest + timestamp for each time series. + optimization_objective: Optimization objective for tuning. Supported + metrics come from Prophet's performance_metrics function. These are mse, + rmse, mae, mape, mdape, smape, and coverage. + data_granularity_unit: String representing the units of time for the + time column. + predefined_split_column: The predefined_split column name. A string + that represents a list of comma separated CSV filenames. + source_bigquery_uri: The BigQuery table path of format + bq (str)://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter + input rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding window + from that row. + max_num_trials: Maximum number of tuning trials to perform + per time series. There are up to 100 possible combinations to explore + for each time series. Recommended values to try are 3, 6, and 24. + encryption_spec_key_name: Customer-managed encryption key. + dataflow_machine_type: The dataflow machine type used for + training. + dataflow_max_num_workers: The max number of Dataflow + workers used for training. + dataflow_disk_size_gb: Dataflow worker's disk size in GB + during training. + dataflow_service_account: Custom service account to run + dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + '{"display_name": ' + + f'"prophet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}", ', + '"encryption_spec": {"kms_key_name":"', + encryption_spec_key_name, + '"}, ', + '"job_spec": {"worker_pool_specs": [{"replica_count":"1", ', + '"machine_spec": {"machine_type": "n1-standard-4"}, ', + ( + '"container_spec":' + ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", ' + ), + '"args": ["prophet_trainer", "', + f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "', + ( + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", "' + ), + ( + '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325", "' + ), + '--artifacts_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/model/", "', + '--evaluated_examples_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/eval/", "', + '--region=', + location, + '", "', + '--source_bigquery_uri=', + source_bigquery_uri, + '", "', + '--target_column=', + target_column, + '", "', + '--time_column=', + time_column, + '", "', + '--time_series_identifier_column=', + time_series_identifier_column, + '", "', + '--forecast_horizon=', + forecast_horizon, + '", "', + '--window_column=', + window_column, + '", "', + '--optimization_objective=', + optimization_objective, + '", "', + '--data_granularity_unit=', + data_granularity_unit, + '", "', + '--predefined_split_column=', + predefined_split_column, + '", "', + '--max_num_trials=', + max_num_trials, + '", "', + '--dataflow_project=', + project, + '", "', + '--dataflow_max_num_workers=', + dataflow_max_num_workers, + '", "', + '--dataflow_machine_type=', + dataflow_machine_type, + '", "', + '--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "', + '--dataflow_service_account=', + dataflow_service_account, + '", "', + '--dataflow_subnetwork=', + dataflow_subnetwork, + '", "', + '--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "', + '--gcp_resources_path=', + gcp_resources, + '", "', + '--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml new file mode 100644 index 0000000000..2fadb6830e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -0,0 +1,2958 @@ +# PIPELINE DEFINITION +# Name: prophet-train +# Description: Trains one Prophet model per time series. +# Inputs: +# data_granularity_unit: str +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluation_dataflow_disk_size_gb: int [Default: 40.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-1'] +# evaluation_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int +# location: str +# max_num_trials: int [Default: 6.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: True] +# target_column: str +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# trainer_dataflow_disk_size_gb: int [Default: 40.0] +# trainer_dataflow_machine_type: str [Default: 'n1-standard-1'] +# trainer_dataflow_max_num_workers: int [Default: 10.0] +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# window_column: str [Default: ''] +# window_max_count: int [Default: -1.0] +# window_stride_length: int [Default: -1.0] +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-condition-2: + dag: + tasks: + model-evaluation-regression: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-regression + inputs: + artifacts: + predictions_gcs_source: + componentInputArtifact: pipelinechannel--prophet-trainer-evaluated_examples_directory + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + runtimeValue: + constant: prediction.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}} + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + target_field_name: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: model-evaluation-regression + inputDefinitions: + artifacts: + pipelinechannel--prophet-trainer-evaluated_examples_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - bigquery-create-dataset + - build-job-configuration-query + - get-fte-suffix + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n base_data AS (\n SELECT\ + \ * FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ ARRAY_AGG({{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ ARRAY_AGG(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ ARRAY_AGG(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ FROM base_data\n GROUP BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + taskInfo: + name: aggregate-by-time-series-id + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: data + write_disposition: + runtimeValue: + constant: WRITE_EMPTY + taskInfo: + name: build-job-configuration-query + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - prophet-trainer + inputs: + artifacts: + pipelinechannel--prophet-trainer-evaluated_examples_directory: + taskOutputArtifact: + outputArtifactKey: evaluated_examples_directory + producerTask: prophet-trainer + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + autodetect_csv_schema: + runtimeValue: + constant: 1.0 + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + forecasting_apply_windowing: + runtimeValue: + constant: 0.0 + forecasting_context_window: + runtimeValue: + constant: 0.0 + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + runtimeValue: + constant: {} + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + taskInfo: + name: feature-transform-engine + get-fte-suffix: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-fte-suffix + dependentTasks: + - bigquery-create-dataset + - feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + fte_table: + runtimeValue: + constant: fte_time_series_output + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-fte-suffix + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - prophet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: prophet-trainer + parameters: + description: + runtimeValue: + constant: Prophet model. + display_name: + runtimeValue: + constant: prophet_{{$.pipeline_job_uuid}} + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + prophet-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-prophet-trainer + dependentTasks: + - get-fte-suffix + - table-to-uri + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--trainer_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--trainer_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--trainer_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + location: + componentInputParameter: pipelinechannel--location + max_num_trials: + componentInputParameter: pipelinechannel--max_num_trials + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + predefined_split_column: + runtimeValue: + constant: split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + source_bigquery_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} + target_column: + componentInputParameter: pipelinechannel--target_column + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + window_column: + runtimeValue: + constant: window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} + taskInfo: + name: prophet-trainer + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + taskInfo: + name: table-to-uri + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + window_column: + componentInputParameter: pipelinechannel--window_column + window_max_count: + componentInputParameter: pipelinechannel--window_max_count + window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_num_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--trainer_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--trainer_dataflow_machine_type: + parameterType: STRING + pipelinechannel--trainer_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--window_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-fte-suffix: + executorLabel: exec-get-fte-suffix + inputDefinitions: + parameters: + bigquery_staging_full_dataset_id: + parameterType: STRING + fte_table: + parameterType: STRING + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-evaluation-regression: + executorLabel: exec-model-evaluation-regression + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The managed Vertex Model used for + + predictions job, if using Vertex batch prediction. Must share the same + + location as the provided input argument `location`.' + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*". For explanation results, the files + + should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + ground_truth_bigquery_source: + defaultValue: '' + description: 'Required for custom tabular. + + The BigQuery table uri representing where the ground truth is located. + + Used to provide ground truth for each prediction instance when they are + + not part of the batch prediction jobs prediction instance.' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + description: 'Required for custom tabular and non + + tabular data. The file format for the ground truth files. `jsonl`, + + `csv`, and `bigquery` are the allowed formats. If not set, defaulted to + + `jsonl`.' + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + description: 'Required for custom + + tabular and non tabular data. The GCS uris representing where the ground + + truth is located. Used to provide ground truth for each prediction + + instance when they are not part of the batch prediction jobs prediction + + instance.' + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + description: 'Location for running the evaluation. If not set, + + defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + description: 'The column name of the field + + containing batch prediction scores. Formatted to be able to find nested + + columns, delimited by `.`. If not set, defaulted to `prediction.scores` + + for classification.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run evaluation container. + parameterType: STRING + target_field_name: + description: 'The full name path of the features target field + + in the predictions file. Formatted to be able to find nested columns, + + delimited by `.`. Alternatively referred to as the ground truth (or + + ground_truth_column) field.' + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'google.ClassificationMetrics representing the classification + + evaluation metrics in GCS.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-prophet-trainer: + executorLabel: exec-prophet-trainer + inputDefinitions: + parameters: + data_granularity_unit: + description: 'String representing the units of time for the + + time column.' + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB + + during training.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-1 + description: 'The dataflow machine type used for + + training.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow + + workers used for training.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used.' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for + + which forecasts will be created. Future periods start after the latest + + timestamp for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_num_trials: + defaultValue: 6.0 + description: 'Maximum number of tuning trials to perform + + per time series. There are up to 100 possible combinations to explore + + for each time series. Recommended values to try are 3, 6, and 24.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + defaultValue: rmse + description: 'Optimization objective for tuning. Supported + + metrics come from Prophet''s performance_metrics function. These are mse, + + rmse, mae, mape, mdape, smape, and coverage.' + isOptional: true + parameterType: STRING + predefined_split_column: + description: 'The predefined_split column name. A string + + that represents a list of comma separated CSV filenames.' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + source_bigquery_uri: + description: 'The BigQuery table path of format + + bq (str)://bq_project.bq_dataset.bq_table' + parameterType: STRING + target_column: + description: 'Name of the column that the model is to predict + + values for.' + parameterType: STRING + time_column: + description: 'Name of the column that identifies time order in the + + time series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies + + the time series.' + parameterType: STRING + window_column: + description: 'Name of the column that should be used to filter + + input rows. The column should contain either booleans or string + + booleans; if the value of the row is True, generate a sliding window + + from that row.' + parameterType: STRING + outputDefinitions: + artifacts: + evaluated_examples_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-get-fte-suffix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_fte_suffix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ + \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ + \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n for\ + \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ + \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ + \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ + \n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-model-evaluation-regression: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - regression + - --target_field_name + - '{"Concat": ["instance.", "{{$.inputs.parameters[''target_field_name'']}}"]}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-regression-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-prophet-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"prophet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", + ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": + {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325\", + ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", + \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325\", + \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325\", + \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", + \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", + "\", \"", "--source_bigquery_uri=", "{{$.inputs.parameters[''source_bigquery_uri'']}}", + "\", \"", "--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"", "--time_column=", "{{$.inputs.parameters[''time_column'']}}", + "\", \"", "--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}", + "\", \"", "--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}", + "\", \"", "--window_column=", "{{$.inputs.parameters[''window_column'']}}", + "\", \"", "--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}", + "\", \"", "--data_granularity_unit=", "{{$.inputs.parameters[''data_granularity_unit'']}}", + "\", \"", "--predefined_split_column=", "{{$.inputs.parameters[''predefined_split_column'']}}", + "\", \"", "--max_num_trials=", "{{$.inputs.parameters[''max_num_trials'']}}", + "\", \"", "--dataflow_project=", "{{$.inputs.parameters[''project'']}}", + "\", \"", "--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"", "--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"", "--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"", "--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"", "--dataflow_subnetwork=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"", "--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"", "--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"", "--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Trains one Prophet model per time series. + name: prophet-train +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--data_granularity_unit: + componentInputParameter: data_granularity_unit + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_num_trials: + componentInputParameter: max_num_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--trainer_dataflow_disk_size_gb: + componentInputParameter: trainer_dataflow_disk_size_gb + pipelinechannel--trainer_dataflow_machine_type: + componentInputParameter: trainer_dataflow_machine_type + pipelinechannel--trainer_dataflow_max_num_workers: + componentInputParameter: trainer_dataflow_max_num_workers + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--window_column: + componentInputParameter: window_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'String representing the units of time for the time + + column.' + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used.' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB during + + evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-1 + description: 'The dataflow machine type used for + + evaluation.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow workers used + + for evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_num_trials: + defaultValue: 6.0 + description: 'Maximum number of tuning trials to perform per time series. + + There are up to 100 possible combinations to explore for each time series. + + Recommended values to try are 3, 6, and 24.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: Optimization objective for the model. + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + trainer_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB during + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + trainer_dataflow_machine_type: + defaultValue: n1-standard-1 + description: The dataflow machine type used for training. + isOptional: true + parameterType: STRING + trainer_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow workers used + + for training.' + isOptional: true + parameterType: NUMBER_INTEGER + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + defaultValue: '' + description: 'Name of the column that should be used to filter input rows. + + The column should contain either booleans or string booleans; if the value + + of the row is True, generate a sliding window from that row.' + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: -1.0 + description: 'Number of rows that should be used to generate input + + examples. If the total row count is larger than this number, the input + + data will be randomly sampled to hit the count.' + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + defaultValue: -1.0 + description: 'Step length used to generate input examples. Every + + window_stride_length rows will be used to generate a sliding window.' + isOptional: true + parameterType: NUMBER_INTEGER +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py new file mode 100644 index 0000000000..b69d5430a5 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py @@ -0,0 +1,341 @@ +"""Util functions for Vertex Forecasting pipelines.""" + +import os +import pathlib +from typing import Any, Dict, Tuple + +_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() + + +def get_bqml_arima_train_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + forecast_horizon: int, + data_granularity_unit: str, + predefined_split_key: str = '', + timestamp_split_key: str = '', + training_fraction: float = -1.0, + validation_fraction: float = -1.0, + test_fraction: float = -1.0, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + window_column: str = '', + window_stride_length: int = -1, + window_max_count: int = -1, + bigquery_destination_uri: str = '', + override_destination: bool = False, + max_order: int = 5, + run_evaluation: bool = True, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + forecast_horizon: The number of time periods into the future for which + forecasts will be created. Future periods start after the latest timestamp + for each time series. + data_granularity_unit: The data granularity unit. Accepted values are: + minute, hour, day, week, month, year. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter input rows. + The column should contain either booleans or string booleans; if the value + of the row is True, generate a sliding window from that row. + window_stride_length: Step length used to generate input examples. Every + window_stride_length rows will be used to generate a sliding window. + window_max_count: Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the input + data will be randomly sampled to hit the count. + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, resources will be created under a new dataset in the project. + Unlike in Vertex Forecasting, all resources will be given hardcoded names + under this dataset, and the model artifact will also be exported here. + override_destination: Whether to overwrite the metrics and evaluated + examples tables if they already exist. If this is False and the tables + exist, this pipeline will fail. + max_order: Integer between 1 and 5 representing the size of the parameter + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + but also the longest training runtime. + run_evaluation: Whether to run evaluation steps during training. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'forecast_horizon': forecast_horizon, + 'data_granularity_unit': data_granularity_unit, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'window_column': window_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'bigquery_destination_uri': bigquery_destination_uri, + 'override_destination': override_destination, + 'max_order': max_order, + 'run_evaluation': run_evaluation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'bqml_arima_train_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_bqml_arima_predict_pipeline_and_parameters( + project: str, + location: str, + model_name: str, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + bigquery_destination_uri: str = '', + generate_explanation: bool = False, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS prediction pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + model_name: ARIMA_PLUS BQML model URI. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, a resource will be created under a new dataset in the project. + generate_explanation: Generate explanation along with the batch prediction + results. This will cause the batch prediction output to include + explanations. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'model_name': model_name, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_destination_uri': bigquery_destination_uri, + 'generate_explanation': generate_explanation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'bqml_arima_predict_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_prophet_train_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + forecast_horizon: int, + optimization_objective: str, + data_granularity_unit: str, + predefined_split_key: str = '', + timestamp_split_key: str = '', + training_fraction: float = -1.0, + validation_fraction: float = -1.0, + test_fraction: float = -1.0, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + window_column: str = '', + window_stride_length: int = -1, + window_max_count: int = -1, + max_num_trials: int = 6, + trainer_dataflow_machine_type: str = 'n1-standard-1', + trainer_dataflow_max_num_workers: int = 10, + trainer_dataflow_disk_size_gb: int = 40, + evaluation_dataflow_machine_type: str = 'n1-standard-1', + evaluation_dataflow_max_num_workers: int = 10, + evaluation_dataflow_disk_size_gb: int = 40, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + run_evaluation: bool = True, +) -> Tuple[str, Dict[str, Any]]: + """Returns Prophet train pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + forecast_horizon: The number of time periods into the future for which + forecasts will be created. Future periods start after the latest timestamp + for each time series. + optimization_objective: Optimization objective for the model. + data_granularity_unit: String representing the units of time for the time + column. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter input rows. + The column should contain either booleans or string booleans; if the value + of the row is True, generate a sliding window from that row. + window_stride_length: Step length used to generate input examples. Every + window_stride_length rows will be used to generate a sliding window. + window_max_count: Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the input + data will be randomly sampled to hit the count. + max_num_trials: Maximum number of tuning trials to perform per time series. + trainer_dataflow_machine_type: The dataflow machine type used for training. + trainer_dataflow_max_num_workers: The max number of Dataflow workers used + for training. + trainer_dataflow_disk_size_gb: Dataflow worker's disk size in GB during + training. + evaluation_dataflow_machine_type: The dataflow machine type used for + evaluation. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers used + for evaluation. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB during + evaluation. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + run_evaluation: Whether to run evaluation steps during training. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'forecast_horizon': forecast_horizon, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'window_column': window_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'max_num_trials': max_num_trials, + 'optimization_objective': optimization_objective, + 'data_granularity_unit': data_granularity_unit, + 'trainer_dataflow_machine_type': trainer_dataflow_machine_type, + 'trainer_dataflow_max_num_workers': trainer_dataflow_max_num_workers, + 'trainer_dataflow_disk_size_gb': trainer_dataflow_disk_size_gb, + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'run_evaluation': run_evaluation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'prophet_trainer_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_prophet_prediction_pipeline_and_parameters( + project: str, + location: str, + model_name: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + bigquery_destination_uri: str = '', + machine_type: str = 'n1-standard-2', + max_num_workers: int = 10, +) -> Tuple[str, Dict[str, Any]]: + """Returns Prophet prediction pipeline and formatted parameters. + + Unlike the prediction server for Vertex Forecasting, the Prophet prediction + server returns predictions batched by time series id. This pipeline shows how + these predictions can be disaggregated to get results similar to what Vertex + Forecasting provides. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + model_name: The name of the Model resource, in a form of + projects/{project}/locations/{location}/models/{model}. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, resources will be created under a new dataset in the project. + machine_type: The machine type used for batch prediction. + max_num_workers: The max number of workers used for batch prediction. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'model_name': model_name, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_destination_uri': bigquery_destination_uri, + 'machine_type': machine_type, + 'max_num_workers': max_num_workers, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'prophet_predict_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py new file mode 100644 index 0000000000..2522350d36 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py @@ -0,0 +1,37 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GA AutoML tabular components.""" + +from google_cloud_pipeline_components.v1.automl.tabular.cv_trainer import automl_tabular_cv_trainer as CvTrainerOp +from google_cloud_pipeline_components.v1.automl.tabular.ensemble import automl_tabular_ensemble as EnsembleOp +from google_cloud_pipeline_components.v1.automl.tabular.finalizer import automl_tabular_finalizer as FinalizerOp +from google_cloud_pipeline_components.v1.automl.tabular.infra_validator import automl_tabular_infra_validator as InfraValidatorOp +from google_cloud_pipeline_components.v1.automl.tabular.split_materialized_data import split_materialized_data as SplitMaterializedDataOp +from google_cloud_pipeline_components.v1.automl.tabular.stage_1_tuner import automl_tabular_stage_1_tuner as Stage1TunerOp +from google_cloud_pipeline_components.v1.automl.tabular.stats_and_example_gen import tabular_stats_and_example_gen as StatsAndExampleGenOp +from google_cloud_pipeline_components.v1.automl.tabular.training_configurator_and_validator import training_configurator_and_validator as TrainingConfiguratorAndValidatorOp +from google_cloud_pipeline_components.v1.automl.tabular.transform import automl_tabular_transform as TransformOp + +__all__ = [ + 'CvTrainerOp', + 'InfraValidatorOp', + 'Stage1TunerOp', + 'EnsembleOp', + 'StatsAndExampleGenOp', + 'TransformOp', + 'FinalizerOp', + 'SplitMaterializedDataOp', + 'TrainingConfiguratorAndValidatorOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml new file mode 100644 index 0000000000..3c4fbb6d46 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -0,0 +1,11149 @@ +# PIPELINE DEFINITION +# Name: automl-tabular +# Description: The AutoML Tabular pipeline v1. +# Inputs: +# additional_experiments: dict +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# disable_early_stopping: bool [Default: False] +# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# distill_batch_predict_max_replica_count: int [Default: 25.0] +# distill_batch_predict_starting_replica_count: int [Default: 25.0] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] +# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] +# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# transformations: str +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-3-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-3-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-3: + executorLabel: exec-automl-tabular-ensemble-3 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-3: + executorLabel: exec-automl-tabular-infra-validator-3 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-stage-1-tuner-2: + executorLabel: exec-automl-tabular-stage-1-tuner-2 + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform: + executorLabel: exec-automl-tabular-transform + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform-2: + executorLabel: exec-automl-tabular-transform-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-7 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-7 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity-2 + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_distillation + taskInfo: + name: bool-identity-3 + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + condition-7: + componentRef: + name: comp-condition-7 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + - calculate-training-parameters-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + pipelinechannel--tabular-stats-and-example-gen-eval_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + pipelinechannel--tabular-stats-and-example-gen-metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + pipelinechannel--tabular-stats-and-example-gen-test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + pipelinechannel--tabular-stats-and-example-gen-train_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + parameters: + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: distill_stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: is-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'true' + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-7: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-8 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-8 + tasks: + automl-tabular-ensemble-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-3 + dependentTasks: + - automl-tabular-stage-1-tuner-2 + - automl-tabular-transform-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-3 + automl-tabular-infra-validator-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-3 + dependentTasks: + - automl-tabular-ensemble-3 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + taskInfo: + name: automl-tabular-infra-validator-3 + automl-tabular-stage-1-tuner-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner-2 + dependentTasks: + - automl-tabular-transform-2 + inputs: + artifacts: + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform-2 + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform-2 + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + parameters: + deadline_hours: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + runtimeValue: + constant: 1.0 + single_run_max_secs: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner-2 + automl-tabular-transform-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform-2 + dependentTasks: + - write-bp-result-path + - write-bp-result-path-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + eval_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path-2 + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + train_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform-2 + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - automl-tabular-ensemble-3 + - model-upload-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + pipelinechannel--model-upload-3-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-3 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-batch-predict-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-3 + dependentTasks: + - read-input-uri + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-train-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-3 + model-batch-predict-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-4 + dependentTasks: + - read-input-uri-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri-2 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-eval-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-4 + model-upload-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-3 + dependentTasks: + - automl-tabular-ensemble-3 + - automl-tabular-infra-validator-3 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + parameters: + display_name: + runtimeValue: + constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-3 + read-input-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + taskInfo: + name: read-input-uri + read-input-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri-2 + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + taskInfo: + name: read-input-uri-2 + write-bp-result-path: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path + dependentTasks: + - model-batch-predict-3 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-3 + taskInfo: + name: write-bp-result-path + write-bp-result-path-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path-2 + dependentTasks: + - model-batch-predict-4 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-4 + taskInfo: + name: write-bp-result-path-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-8: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-3 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-3 + tasks: + feature-attribution-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-3 + dependentTasks: + - model-batch-explanation-3 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-3 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-3 + model-batch-explanation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-3 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-3 + model-batch-predict-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-5 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-5 + model-evaluation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-3 + dependentTasks: + - model-batch-predict-5 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-5 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-3 + model-evaluation-import-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-3 + dependentTasks: + - feature-attribution-3 + - model-evaluation-3 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-3 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-3 + model: + componentInputArtifact: pipelinechannel--model-upload-3-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-3 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-3-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-transform: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform + dependentTasks: + - tabular-stats-and-example-gen + inputs: + artifacts: + dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - automl-tabular-transform + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + taskInfo: + name: merge-materialized-splits + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: string-not-empty + tabular-stats-and-example-gen: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-stats-and-example-gen + inputs: + parameters: + additional_experiments_json: + componentInputParameter: pipelinechannel--additional_experiments + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + quantiles: + componentInputParameter: pipelinechannel--quantiles + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column_name: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + transformations: + runtimeValue: + constant: '[]' + transformations_path: + componentInputParameter: pipelinechannel--transformations + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column_name: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabular-stats-and-example-gen + inputDefinitions: + parameters: + pipelinechannel--additional_experiments: + parameterType: STRUCT + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + parameterType: STRING + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--transformations: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-3: + executorLabel: exec-feature-attribution-3 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-3: + executorLabel: exec-model-batch-explanation-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-3: + executorLabel: exec-model-batch-predict-3 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-4: + executorLabel: exec-model-batch-predict-4 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-5: + executorLabel: exec-model-batch-predict-5 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-3: + executorLabel: exec-model-evaluation-3 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-3: + executorLabel: exec-model-evaluation-import-3 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-3: + executorLabel: exec-model-upload-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-read-input-uri: + executorLabel: exec-read-input-uri + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-read-input-uri-2: + executorLabel: exec-read-input-uri-2 + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-tabular-stats-and-example-gen: + executorLabel: exec-tabular-stats-and-example-gen + inputDefinitions: + parameters: + additional_experiments: + defaultValue: '' + isOptional: true + parameterType: STRING + additional_experiments_json: + defaultValue: {} + isOptional: true + parameterType: STRUCT + data_source_bigquery_table_path: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Location for running dataset statistics and example + + generation.' + parameterType: STRING + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The prediction type. Supported values: + + "classification", "regression".' + parameterType: STRING + project: + description: 'Project to run dataset statistics and example + + generation.' + parameterType: STRING + quantiles: + defaultValue: [] + isOptional: true + parameterType: LIST + request_type: + defaultValue: COLUMN_STATS_ONLY + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + target_column_name: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Quote escaped JSON string for transformations. Each + + transformation will apply transform function to given input column. And + + the result will be used for training. When creating transformation for + + BigQuery Struct column, the column should be flattened using "." as the + + delimiter.' + parameterType: STRING + transformations_path: + defaultValue: '' + description: 'Path to a GCS file containing JSON + + string for transformations.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column_name: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The instance baseline used to calculate explanations. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + downsampled_test_split_json: + description: The downsampled test split JSON object. + parameterType: LIST + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + test_split_json: + description: The test split JSON object. + parameterType: LIST + comp-write-bp-result-path: + executorLabel: exec-write-bp-result-path + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-write-bp-result-path-2: + executorLabel: exec-write-bp-result-path-2 + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-3: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-3: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-stage-1-tuner-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-3: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-4: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-5: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-3: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-3: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-3: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-read-input-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-read-input-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-tabular-stats-and-example-gen: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": + \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": + \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": + \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": + \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": + ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": + ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": + ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", + \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", + "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", + "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", + "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", + "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", + "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", + "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", + "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", + "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", + \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", + "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", + "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", + \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", + \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", + "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", + "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", + "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", + "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", + \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", + \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", + "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", + "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-write-bp-result-path: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 + exec-write-bp-result-path-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 +pipelineInfo: + description: The AutoML Tabular pipeline v1. + name: automl-tabular +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--additional_experiments: + componentInputParameter: additional_experiments + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: distill_batch_predict_starting_replica_count + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + componentInputParameter: stats_and_example_gen_dataflow_machine_type + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + componentInputParameter: stats_and_example_gen_dataflow_max_num_workers + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact, + parameters: + additional_experiments: + description: Use this field to config private preview features. + isOptional: true + parameterType: STRUCT + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + distill_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'The prediction server machine type for + + batch predict component in the model distillation.' + isOptional: true + parameterType: STRING + distill_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The max number of prediction server + + for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + distill_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'The initial number of + + prediction server for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model, + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model, + isOptional: true + parameterType: STRING + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in + + GB for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for + + stats_and_example_gen component.' + isOptional: true + parameterType: STRING + stats_and_example_gen_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow + + workers for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transformations: + description: 'The path to a GCS file containing the transformations to + + apply.' + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py new file mode 100644 index 0000000000..716d6f1ba4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -0,0 +1,166 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Cross Validation Trainer component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_cv_trainer( + project: str, + location: str, + root_dir: str, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + num_selected_trials: int, + transform_output: Input[Artifact], + metadata: Input[Artifact], + materialized_cv_splits: Input[Artifact], + tuning_result_input: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + execution_metrics: dsl.OutputPath(dict), + worker_pool_specs_override_json: Optional[list] = [], + num_selected_features: Optional[int] = 0, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes AutoML Tabular models and selects top trials using cross-validation. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + deadline_hours: Number of hours the cross-validation trainer should run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of + features to learn in the NN models. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + materialized_cv_splits: The materialized cross-validation splits. + tuning_result_input: AutoML Tabular tuning result. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + tuning_result_output: The trained model and architectures. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + execution_metrics: Core metrics in dictionary of component execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-cv-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["l2l_cv_tuner", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + ( + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "--training_base_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--num_parallel_trial=' + ), + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + ( + '", "--valid_trials_completed_threshold=0.7",' + ' "--num_selected_trials=' + ), + num_selected_trials, + '", "--num_selected_features=', + num_selected_features, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--materialized_cv_splits=', + materialized_cv_splits.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--execution_metrics_path=', + execution_metrics, + ( + '", "--use_custom_job=true", "--use_json=true",' + ' "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json new file mode 100644 index 0000000000..5133d9cf2e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json @@ -0,0 +1,7974 @@ +{ + "pipelineSpec": { + "components": { + "comp-automl-tabular-cv-trainer": { + "executorLabel": "exec-automl-tabular-cv-trainer", + "inputDefinitions": { + "artifacts": { + "materialized_cv_splits": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-ensemble": { + "executorLabel": "exec-automl-tabular-ensemble", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "warmup_data": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_architecture": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_without_custom_ops": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-ensemble-2": { + "executorLabel": "exec-automl-tabular-ensemble-2", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "warmup_data": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_architecture": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_without_custom_ops": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-finalizer": { + "executorLabel": "exec-automl-tabular-finalizer", + "inputDefinitions": { + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-infra-validator": { + "executorLabel": "exec-automl-tabular-infra-validator", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-automl-tabular-infra-validator-2": { + "executorLabel": "exec-automl-tabular-infra-validator-2", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-automl-tabular-stage-1-tuner": { + "executorLabel": "exec-automl-tabular-stage-1-tuner", + "inputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "study_spec_parameters_override": { + "type": "STRING" + }, + "study_spec_parameters_override_json": { + "type": "STRING" + }, + "tune_feature_selection_rate": { + "type": "STRING" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-stage-1-tuner-2": { + "executorLabel": "exec-automl-tabular-stage-1-tuner-2", + "inputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "study_spec_parameters_override": { + "type": "STRING" + }, + "study_spec_parameters_override_json": { + "type": "STRING" + }, + "tune_feature_selection_rate": { + "type": "STRING" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-transform": { + "executorLabel": "exec-automl-tabular-transform", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_test_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "training_schema_uri": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-transform-2": { + "executorLabel": "exec-automl-tabular-transform-2", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_test_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "training_schema_uri": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bool-identity": { + "executorLabel": "exec-bool-identity", + "inputDefinitions": { + "parameters": { + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-bool-identity-2": { + "executorLabel": "exec-bool-identity-2", + "inputDefinitions": { + "parameters": { + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-condition-is-distill-4": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-5" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-5" + } + ] + } + } + }, + "tasks": { + "automl-tabular-ensemble-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-ensemble-2" + }, + "dependentTasks": [ + "automl-tabular-stage-1-tuner-2", + "automl-tabular-transform-2" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" + }, + "instance_baseline": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-instance_baseline" + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform-2" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-stage-1-tuner-2" + } + }, + "warmup_data": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" + } + }, + "parameters": { + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-ensemble-2" + } + }, + "automl-tabular-infra-validator-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-infra-validator-2" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2" + ], + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-infra-validator-2" + } + }, + "automl-tabular-stage-1-tuner-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-stage-1-tuner-2" + }, + "dependentTasks": [ + "automl-tabular-transform-2" + ], + "inputs": { + "artifacts": { + "materialized_eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform-2" + } + }, + "materialized_train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform-2" + } + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform-2" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" + }, + "disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "num_selected_trials": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "study_spec_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "study_spec_parameters_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "study_spec_parameters_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "tune_feature_selection_rate": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-stage-1-tuner-2" + } + }, + "automl-tabular-transform-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-transform-2" + }, + "dependentTasks": [ + "write-bp-result-path", + "write-bp-result-path-2" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" + }, + "eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "result", + "producerTask": "write-bp-result-path-2" + } + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "test_split": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-test_split" + }, + "train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "result", + "producerTask": "write-bp-result-path" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_use_public_ips": { + "runtimeValue": { + "constantValue": { + "stringValue": "true" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-transform-2" + } + }, + "condition-is-evaluation-5": { + "componentRef": { + "name": "comp-condition-is-evaluation-5" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2", + "model-upload-3" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--model-upload-3-model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-3" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--bool-identity-2-Output": { + "componentInputParameter": "pipelineparam--bool-identity-2-Output" + }, + "pipelineparam--bool-identity-Output": { + "componentInputParameter": "pipelineparam--bool-identity-Output" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-is-evaluation-5" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" + } + }, + "model-batch-predict-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-2" + }, + "dependentTasks": [ + "model-upload-2", + "read-input-uri" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-2" + } + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "read-input-uri" + } + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-train-split" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-2" + } + }, + "model-batch-predict-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-3" + }, + "dependentTasks": [ + "model-upload-2", + "read-input-uri-2" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-2" + } + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "read-input-uri-2" + } + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-eval-split" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-3" + } + }, + "model-upload-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload-2" + }, + "dependentTasks": [ + "set-model-can-skip-validation" + ], + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload-2" + } + }, + "model-upload-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload-3" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2", + "automl-tabular-infra-validator-2" + ], + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload-3" + } + }, + "read-input-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-read-input-uri" + }, + "inputs": { + "artifacts": { + "split_uri": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-train_split" + } + } + }, + "taskInfo": { + "name": "read-input-uri" + } + }, + "read-input-uri-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-read-input-uri-2" + }, + "inputs": { + "artifacts": { + "split_uri": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" + } + } + }, + "taskInfo": { + "name": "read-input-uri-2" + } + }, + "set-model-can-skip-validation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-set-model-can-skip-validation" + }, + "inputs": { + "artifacts": { + "model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + } + }, + "taskInfo": { + "name": "set-model-can-skip-validation" + } + }, + "write-bp-result-path": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-write-bp-result-path" + }, + "dependentTasks": [ + "model-batch-predict-2" + ], + "inputs": { + "artifacts": { + "bp_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-2" + } + } + } + }, + "taskInfo": { + "name": "write-bp-result-path" + } + }, + "write-bp-result-path-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-write-bp-result-path-2" + }, + "dependentTasks": [ + "model-batch-predict-3" + ], + "inputs": { + "artifacts": { + "bp_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-3" + } + } + } + }, + "taskInfo": { + "name": "write-bp-result-path-2" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--disable_early_stopping": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--reduce_search_space_mode": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--transform_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-is-evaluation-3": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-2" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation" + } + ] + } + } + }, + "tasks": { + "model-batch-explanation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-explanation" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-explanation" + } + }, + "model-batch-predict": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict" + }, + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict" + } + }, + "model-evaluation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation" + }, + "dependentTasks": [ + "model-batch-predict" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation" + } + }, + "model-evaluation-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-2" + }, + "dependentTasks": [ + "model-batch-explanation" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-explanation" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-2" + } + }, + "model-evaluation-import": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-import" + }, + "dependentTasks": [ + "model-evaluation", + "model-evaluation-2" + ], + "inputs": { + "artifacts": { + "explanation": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-2" + } + }, + "metrics": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation" + } + }, + "model": { + "componentInputArtifact": "pipelineparam--model-upload-model" + } + }, + "parameters": { + "dataset_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataset_paths": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "dataset_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + } + } + }, + "taskInfo": { + "name": "model-evaluation-import" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--model-upload-model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-is-evaluation-5": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-3" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-4" + } + ] + } + } + }, + "tasks": { + "model-batch-explanation-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-explanation-2" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-2-explanation_parameters" + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-explanation-2" + } + }, + "model-batch-predict-4": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-4" + }, + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-4" + } + }, + "model-evaluation-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-3" + }, + "dependentTasks": [ + "model-batch-predict-4" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-4" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-3" + } + }, + "model-evaluation-4": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-4" + }, + "dependentTasks": [ + "model-batch-explanation-2" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-explanation-2" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-4" + } + }, + "model-evaluation-import-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-import-2" + }, + "dependentTasks": [ + "model-evaluation-3", + "model-evaluation-4" + ], + "inputs": { + "artifacts": { + "explanation": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-4" + } + }, + "metrics": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-3" + } + }, + "model": { + "componentInputArtifact": "pipelineparam--model-upload-3-model" + } + }, + "parameters": { + "dataset_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataset_paths": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "dataset_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + } + } + }, + "taskInfo": { + "name": "model-evaluation-import-2" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--model-upload-3-model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-no-distill-2": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-3" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-3" + } + ] + } + } + }, + "tasks": { + "condition-is-evaluation-3": { + "componentRef": { + "name": "comp-condition-is-evaluation-3" + }, + "dependentTasks": [ + "model-upload" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + }, + "pipelineparam--model-upload-model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "pipelineparam--bool-identity-2-Output": { + "componentInputParameter": "pipelineparam--bool-identity-2-Output" + }, + "pipelineparam--bool-identity-Output": { + "componentInputParameter": "pipelineparam--bool-identity-Output" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-is-evaluation-3" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" + } + }, + "model-upload": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-exit-handler-1": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "condition-no-distill-2" + } + ] + }, + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "condition-is-distill-4" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "condition-is-distill-4" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "condition-no-distill-2" + } + ] + } + } + }, + "tasks": { + "automl-tabular-cv-trainer": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-cv-trainer" + }, + "dependentTasks": [ + "automl-tabular-stage-1-tuner", + "automl-tabular-transform", + "merge-materialized-splits", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "materialized_cv_splits": { + "taskOutputArtifact": { + "outputArtifactKey": "splits", + "producerTask": "merge-materialized-splits" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-stage-1-tuner" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--stage_2_deadline_hours" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_2_num_parallel_trials" + }, + "num_selected_trials": { + "componentInputParameter": "pipelineparam--stage_2_num_selected_trials" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_2_single_run_max_secs" + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--cv_trainer_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-cv-trainer" + } + }, + "automl-tabular-ensemble": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-ensemble" + }, + "dependentTasks": [ + "automl-tabular-cv-trainer", + "automl-tabular-transform", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "instance_baseline": { + "taskOutputArtifact": { + "outputArtifactKey": "instance_baseline", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-cv-trainer" + } + }, + "warmup_data": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-ensemble" + } + }, + "automl-tabular-infra-validator": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-infra-validator" + }, + "dependentTasks": [ + "automl-tabular-ensemble" + ], + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-infra-validator" + } + }, + "automl-tabular-stage-1-tuner": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-stage-1-tuner" + }, + "dependentTasks": [ + "automl-tabular-transform", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "materialized_eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform" + } + }, + "materialized_train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--stage_1_deadline_hours" + }, + "disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "num_selected_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_selected_trials" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "study_spec_override": { + "componentInputParameter": "pipelineparam--study_spec_override" + }, + "study_spec_parameters_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "study_spec_parameters_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "tune_feature_selection_rate": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-stage-1-tuner" + } + }, + "automl-tabular-transform": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-transform" + }, + "dependentTasks": [ + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "test_split": { + "taskOutputArtifact": { + "outputArtifactKey": "test_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "train_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-transform" + } + }, + "bool-identity": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bool-identity" + }, + "inputs": { + "parameters": { + "value": { + "componentInputParameter": "pipelineparam--run_evaluation" + } + } + }, + "taskInfo": { + "name": "bool-identity" + } + }, + "bool-identity-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bool-identity-2" + }, + "inputs": { + "parameters": { + "value": { + "componentInputParameter": "pipelineparam--run_distillation" + } + } + }, + "taskInfo": { + "name": "bool-identity-2" + } + }, + "condition-is-distill-4": { + "componentRef": { + "name": "comp-condition-is-distill-4" + }, + "dependentTasks": [ + "automl-tabular-ensemble", + "automl-tabular-infra-validator", + "bool-identity", + "bool-identity-2", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { + "taskOutputArtifact": { + "outputArtifactKey": "instance_baseline", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split": { + "taskOutputArtifact": { + "outputArtifactKey": "test_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "train_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--bool-identity-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity-2" + } + }, + "pipelineparam--bool-identity-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity" + } + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "downsampled_test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "pipelineparam--transform_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + } + } + }, + "taskInfo": { + "name": "condition-is-distill-4" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'true'" + } + }, + "condition-no-distill-2": { + "componentRef": { + "name": "comp-condition-no-distill-2" + }, + "dependentTasks": [ + "automl-tabular-ensemble", + "automl-tabular-infra-validator", + "bool-identity", + "bool-identity-2", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--bool-identity-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity-2" + } + }, + "pipelineparam--bool-identity-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity" + } + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "downsampled_test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-no-distill-2" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'false'" + } + }, + "merge-materialized-splits": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-merge-materialized-splits" + }, + "dependentTasks": [ + "automl-tabular-transform" + ], + "inputs": { + "artifacts": { + "split_0": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform" + } + }, + "split_1": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform" + } + } + } + }, + "taskInfo": { + "name": "merge-materialized-splits" + } + }, + "tabular-stats-and-example-gen": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-tabular-stats-and-example-gen" + }, + "inputs": { + "parameters": { + "additional_experiments": { + "componentInputParameter": "pipelineparam--additional_experiments" + }, + "additional_experiments_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "data_source": { + "componentInputParameter": "pipelineparam--data_source" + }, + "data_source_bigquery_table_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "data_source_csv_filenames": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "optimization_objective": { + "componentInputParameter": "pipelineparam--optimization_objective" + }, + "optimization_objective_precision_value": { + "componentInputParameter": "pipelineparam--optimization_objective_precision_value" + }, + "optimization_objective_recall_value": { + "componentInputParameter": "pipelineparam--optimization_objective_recall_value" + }, + "predefined_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "request_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "COLUMN_STATS_ONLY" + } + } + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "componentInputParameter": "pipelineparam--run_distillation" + }, + "split_spec": { + "componentInputParameter": "pipelineparam--split_spec" + }, + "stratified_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "test_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "timestamp_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "training_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "transformations": { + "componentInputParameter": "pipelineparam--transformations" + }, + "transformations_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "validation_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "weight_column_name": { + "componentInputParameter": "pipelineparam--weight_column_name" + } + } + }, + "taskInfo": { + "name": "tabular-stats-and-example-gen" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "pipelineparam--additional_experiments": { + "type": "STRING" + }, + "pipelineparam--cv_trainer_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--data_source": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--disable_early_stopping": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--optimization_objective": { + "type": "STRING" + }, + "pipelineparam--optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "pipelineparam--optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--reduce_search_space_mode": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--run_distillation": { + "type": "STRING" + }, + "pipelineparam--run_evaluation": { + "type": "STRING" + }, + "pipelineparam--split_spec": { + "type": "STRING" + }, + "pipelineparam--stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_num_selected_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--stage_2_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--stage_2_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_2_num_selected_trials": { + "type": "INT" + }, + "pipelineparam--stage_2_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--stats_and_example_gen_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--study_spec_override": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--transform_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--transformations": { + "type": "STRING" + }, + "pipelineparam--weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-merge-materialized-splits": { + "executorLabel": "exec-merge-materialized-splits", + "inputDefinitions": { + "artifacts": { + "split_0": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "split_1": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "splits": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-model-batch-explanation": { + "executorLabel": "exec-model-batch-explanation", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-explanation-2": { + "executorLabel": "exec-model-batch-explanation-2", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict": { + "executorLabel": "exec-model-batch-predict", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-2": { + "executorLabel": "exec-model-batch-predict-2", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-3": { + "executorLabel": "exec-model-batch-predict-3", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-4": { + "executorLabel": "exec-model-batch-predict-4", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation": { + "executorLabel": "exec-model-evaluation", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-2": { + "executorLabel": "exec-model-evaluation-2", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-3": { + "executorLabel": "exec-model-evaluation-3", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-4": { + "executorLabel": "exec-model-evaluation-4", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-import": { + "executorLabel": "exec-model-evaluation-import", + "inputDefinitions": { + "artifacts": { + "explanation": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataset_path": { + "type": "STRING" + }, + "dataset_paths": { + "type": "STRING" + }, + "dataset_type": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-import-2": { + "executorLabel": "exec-model-evaluation-import-2", + "inputDefinitions": { + "artifacts": { + "explanation": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataset_path": { + "type": "STRING" + }, + "dataset_paths": { + "type": "STRING" + }, + "dataset_type": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload": { + "executorLabel": "exec-model-upload", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload-2": { + "executorLabel": "exec-model-upload-2", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload-3": { + "executorLabel": "exec-model-upload-3", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-read-input-uri": { + "executorLabel": "exec-read-input-uri", + "inputDefinitions": { + "artifacts": { + "split_uri": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-read-input-uri-2": { + "executorLabel": "exec-read-input-uri-2", + "inputDefinitions": { + "artifacts": { + "split_uri": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-set-model-can-skip-validation": { + "executorLabel": "exec-set-model-can-skip-validation", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-tabular-stats-and-example-gen": { + "executorLabel": "exec-tabular-stats-and-example-gen", + "inputDefinitions": { + "parameters": { + "additional_experiments": { + "type": "STRING" + }, + "additional_experiments_json": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "data_source_bigquery_table_path": { + "type": "STRING" + }, + "data_source_csv_filenames": { + "type": "STRING" + }, + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "optimization_objective": { + "type": "STRING" + }, + "optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "predefined_split_key": { + "type": "STRING" + }, + "prediction_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "request_type": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "split_spec": { + "type": "STRING" + }, + "stratified_split_key": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "test_fraction": { + "type": "DOUBLE" + }, + "timestamp_split_key": { + "type": "STRING" + }, + "training_fraction": { + "type": "DOUBLE" + }, + "transformations": { + "type": "STRING" + }, + "transformations_path": { + "type": "STRING" + }, + "validation_fraction": { + "type": "DOUBLE" + }, + "weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "dataset_stats": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "downsampled_test_split_json": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + }, + "test_split_json": { + "type": "STRING" + } + } + } + }, + "comp-write-bp-result-path": { + "executorLabel": "exec-write-bp-result-path", + "inputDefinitions": { + "artifacts": { + "bp_job": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "result": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-write-bp-result-path-2": { + "executorLabel": "exec-write-bp-result-path-2", + "inputDefinitions": { + "artifacts": { + "bp_job": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "result": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + } + } + }, + "deploymentSpec": { + "executors": { + "exec-automl-tabular-cv-trainer": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_cv_splits={{$.inputs.artifacts['materialized_cv_splits'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_custom_job=true\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-ensemble": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-ensemble-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-finalizer": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-infra-validator": { + "container": { + "args": [ + "--executor_input", + "{{$}}" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", + "resources": { + "cpuLimit": 8.0, + "memoryLimit": 52.0 + } + } + }, + "exec-automl-tabular-infra-validator-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", + "resources": { + "cpuLimit": 8.0, + "memoryLimit": 52.0 + } + } + }, + "exec-automl-tabular-stage-1-tuner": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-stage-1-tuner-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-transform": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-transform-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-bool-identity": { + "container": { + "args": [ + "--value", + "{{$.inputs.parameters['value']}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bool-identity-2": { + "container": { + "args": [ + "--value", + "{{$.inputs.parameters['value']}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-merge-materialized-splits": { + "container": { + "args": [ + "--split-0", + "{{$.inputs.artifacts['split_0'].path}}", + "--split-1", + "{{$.inputs.artifacts['split_1'].path}}", + "--splits", + "{{$.outputs.artifacts['splits'].path}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef _merge_materialized_splits(\n split_0,\n split_1,\n splits,\n):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The first materialized split.\n split_1: The second materialized split.\n splits: The merged materialized split.\n \"\"\"\n with open(split_0, 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r') as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n f.write(','.join([split_0_content, split_1_content]))\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Merge materialized splits', description='Merge two materialized splits.')\n_parser.add_argument(\"--split-0\", dest=\"split_0\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--split-1\", dest=\"split_1\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--splits\", dest=\"splits\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = _merge_materialized_splits(**_parsed_args)\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-model-batch-explanation": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-batch-explanation-2": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-batch-predict": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-2": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-3": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-4": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-evaluation": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-2": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-3": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-4": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-import": { + "container": { + "args": [ + "--metrics", + "{{$.inputs.artifacts['metrics'].uri}}", + "--metrics_explanation", + "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", + "--explanation", + "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--display_name", + "{{$.inputs.parameters['display_name']}}", + "--dataset_path", + "{{$.inputs.parameters['dataset_path']}}", + "--dataset_paths", + "{{$.inputs.parameters['dataset_paths']}}", + "--dataset_type", + "{{$.inputs.parameters['dataset_type']}}", + "--pipeline_job_id", + "{{$.pipeline_job_uuid}}", + "--pipeline_job_resource_name", + "{{$.pipeline_job_resource_name}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['resourceName']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-evaluation-import-2": { + "container": { + "args": [ + "--metrics", + "{{$.inputs.artifacts['metrics'].uri}}", + "--metrics_explanation", + "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", + "--explanation", + "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--display_name", + "{{$.inputs.parameters['display_name']}}", + "--dataset_path", + "{{$.inputs.parameters['dataset_path']}}", + "--dataset_paths", + "{{$.inputs.parameters['dataset_paths']}}", + "--dataset_type", + "{{$.inputs.parameters['dataset_type']}}", + "--pipeline_job_id", + "{{$.pipeline_job_uuid}}", + "--pipeline_job_resource_name", + "{{$.pipeline_job_resource_name}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['resourceName']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-upload": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-upload-2": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-upload-3": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-read-input-uri": { + "container": { + "args": [ + "--split-uri", + "{{$.inputs.artifacts['split_uri'].path}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-read-input-uri-2": { + "container": { + "args": [ + "--split-uri", + "{{$.inputs.artifacts['split_uri'].path}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-set-model-can-skip-validation": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_set_model_can_skip_validation" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _set_model_can_skip_validation(model: Input[Artifact]):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n model: The model artifact.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import os\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n # create an empty CAN_SKIP_VALIDATION file\n with tf.io.gfile.GFile(os.path.join(model.uri, 'CAN_SKIP_VALIDATION'),\n 'w') as f:\n f.write('')\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + }, + "exec-tabular-stats-and-example-gen": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"stats_generator\",\"--train_spec={\\\"prediction_type\\\": \\\"{{$.inputs.parameters['prediction_type']}}\\\", \\\"target_column\\\": \\\"{{$.inputs.parameters['target_column_name']}}\\\", \\\"optimization_objective\\\": \\\"{{$.inputs.parameters['optimization_objective']}}\\\", \\\"weight_column_name\\\": \\\"{{$.inputs.parameters['weight_column_name']}}\\\", \\\"transformations\\\": {{$.inputs.parameters['transformations']}}}\", \"--transformations_override_path={{$.inputs.parameters['transformations_path']}}\", \"--split_spec={{$.inputs.parameters['split_spec']}}\", \"--data_source={{$.inputs.parameters['data_source']}}\", \"--data_source_csv_filenames={{$.inputs.parameters['data_source_csv_filenames']}}\", \"--data_source_bigquery_table_path={{$.inputs.parameters['data_source_bigquery_table_path']}}\", \"--predefined_split_key={{$.inputs.parameters['predefined_split_key']}}\", \"--timestamp_split_key={{$.inputs.parameters['timestamp_split_key']}}\", \"--stratified_split_key={{$.inputs.parameters['stratified_split_key']}}\", \"--training_fraction={{$.inputs.parameters['training_fraction']}}\", \"--validation_fraction={{$.inputs.parameters['validation_fraction']}}\", \"--test_fraction={{$.inputs.parameters['test_fraction']}}\", \"--target_column={{$.inputs.parameters['target_column_name']}}\", \"--request_type={{$.inputs.parameters['request_type']}}\", \"--optimization_objective_recall_value={{$.inputs.parameters['optimization_objective_recall_value']}}\", \"--optimization_objective_precision_value={{$.inputs.parameters['optimization_objective_precision_value']}}\", \"--example_gen_gcs_output_prefix={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", \"--dataset_stats_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", \"--stats_result_path={{$.outputs.artifacts['dataset_stats'].uri}}\", \"--dataset_schema_path={{$.outputs.artifacts['dataset_schema'].uri}}\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--additional_experiments={{$.inputs.parameters['additional_experiments']}}\", \"--metadata_path={{$.outputs.artifacts['metadata'].uri}}\", \"--train_split={{$.outputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.outputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.outputs.artifacts['test_split'].uri}}\", \"--test_split_for_batch_prediction_component={{$.outputs.parameters['test_split_json'].output_file}}\", \"--downsampled_test_split_for_batch_prediction_component={{$.outputs.parameters['downsampled_test_split_json'].output_file}}\", \"--instance_baseline_path={{$.outputs.artifacts['instance_baseline'].uri}}\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-write-bp-result-path": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_write_bp_result_path" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + }, + "exec-write-bp-result-path-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_write_bp_result_path" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + } + } + }, + "pipelineInfo": { + "name": "automl-tabular-deprecated" + }, + "root": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + } + } + }, + "tasks": { + "automl-tabular-finalizer": { + "componentRef": { + "name": "comp-automl-tabular-finalizer" + }, + "dependentTasks": [ + "exit-handler-1" + ], + "inputs": { + "parameters": { + "encryption_spec_key_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "location": { + "componentInputParameter": "location" + }, + "project": { + "componentInputParameter": "project" + }, + "root_dir": { + "componentInputParameter": "root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-finalizer" + }, + "triggerPolicy": { + "strategy": "ALL_UPSTREAM_TASKS_COMPLETED" + } + }, + "exit-handler-1": { + "componentRef": { + "name": "comp-exit-handler-1" + }, + "inputs": { + "parameters": { + "pipelineparam--additional_experiments": { + "componentInputParameter": "additional_experiments" + }, + "pipelineparam--cv_trainer_worker_pool_specs_override": { + "componentInputParameter": "cv_trainer_worker_pool_specs_override" + }, + "pipelineparam--data_source": { + "componentInputParameter": "data_source" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "dataflow_use_public_ips" + }, + "pipelineparam--disable_early_stopping": { + "componentInputParameter": "disable_early_stopping" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "componentInputParameter": "distill_batch_predict_machine_type" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "componentInputParameter": "distill_batch_predict_max_replica_count" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "componentInputParameter": "distill_batch_predict_starting_replica_count" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "componentInputParameter": "distill_stage_1_deadline_hours" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "evaluation_dataflow_max_num_workers" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "componentInputParameter": "export_additional_model_without_custom_ops" + }, + "pipelineparam--location": { + "componentInputParameter": "location" + }, + "pipelineparam--optimization_objective": { + "componentInputParameter": "optimization_objective" + }, + "pipelineparam--optimization_objective_precision_value": { + "componentInputParameter": "optimization_objective_precision_value" + }, + "pipelineparam--optimization_objective_recall_value": { + "componentInputParameter": "optimization_objective_recall_value" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "project" + }, + "pipelineparam--reduce_search_space_mode": { + "componentInputParameter": "reduce_search_space_mode" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "root_dir" + }, + "pipelineparam--run_distillation": { + "componentInputParameter": "run_distillation" + }, + "pipelineparam--run_evaluation": { + "componentInputParameter": "run_evaluation" + }, + "pipelineparam--split_spec": { + "componentInputParameter": "split_spec" + }, + "pipelineparam--stage_1_deadline_hours": { + "componentInputParameter": "stage_1_deadline_hours" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "componentInputParameter": "stage_1_num_parallel_trials" + }, + "pipelineparam--stage_1_num_selected_trials": { + "componentInputParameter": "stage_1_num_selected_trials" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "componentInputParameter": "stage_1_single_run_max_secs" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "componentInputParameter": "stage_1_tuner_worker_pool_specs_override" + }, + "pipelineparam--stage_2_deadline_hours": { + "componentInputParameter": "stage_2_deadline_hours" + }, + "pipelineparam--stage_2_num_parallel_trials": { + "componentInputParameter": "stage_2_num_parallel_trials" + }, + "pipelineparam--stage_2_num_selected_trials": { + "componentInputParameter": "stage_2_num_selected_trials" + }, + "pipelineparam--stage_2_single_run_max_secs": { + "componentInputParameter": "stage_2_single_run_max_secs" + }, + "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { + "componentInputParameter": "stats_and_example_gen_dataflow_disk_size_gb" + }, + "pipelineparam--stats_and_example_gen_dataflow_machine_type": { + "componentInputParameter": "stats_and_example_gen_dataflow_machine_type" + }, + "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { + "componentInputParameter": "stats_and_example_gen_dataflow_max_num_workers" + }, + "pipelineparam--study_spec_override": { + "componentInputParameter": "study_spec_override" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "target_column_name" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "componentInputParameter": "transform_dataflow_disk_size_gb" + }, + "pipelineparam--transform_dataflow_machine_type": { + "componentInputParameter": "transform_dataflow_machine_type" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "componentInputParameter": "transform_dataflow_max_num_workers" + }, + "pipelineparam--transformations": { + "componentInputParameter": "transformations" + }, + "pipelineparam--weight_column_name": { + "componentInputParameter": "weight_column_name" + } + } + }, + "taskInfo": { + "name": "exit-handler-1" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "additional_experiments": { + "type": "STRING" + }, + "cv_trainer_worker_pool_specs_override": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "distill_batch_predict_machine_type": { + "type": "STRING" + }, + "distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "optimization_objective": { + "type": "STRING" + }, + "optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "prediction_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "run_evaluation": { + "type": "STRING" + }, + "split_spec": { + "type": "STRING" + }, + "stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "stage_1_num_parallel_trials": { + "type": "INT" + }, + "stage_1_num_selected_trials": { + "type": "INT" + }, + "stage_1_single_run_max_secs": { + "type": "INT" + }, + "stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "stage_2_deadline_hours": { + "type": "DOUBLE" + }, + "stage_2_num_parallel_trials": { + "type": "INT" + }, + "stage_2_num_selected_trials": { + "type": "INT" + }, + "stage_2_single_run_max_secs": { + "type": "INT" + }, + "stats_and_example_gen_dataflow_disk_size_gb": { + "type": "INT" + }, + "stats_and_example_gen_dataflow_machine_type": { + "type": "STRING" + }, + "stats_and_example_gen_dataflow_max_num_workers": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "transform_dataflow_machine_type": { + "type": "STRING" + }, + "transform_dataflow_max_num_workers": { + "type": "INT" + }, + "transformations": { + "type": "STRING" + }, + "weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "schemaVersion": "2.0.0", + "sdkVersion": "kfp-1.8.11" + }, + "runtimeConfig": { + "parameters": { + "additional_experiments": { + "stringValue": "" + }, + "cv_trainer_worker_pool_specs_override": { + "stringValue": "" + }, + "dataflow_service_account": { + "stringValue": "" + }, + "dataflow_subnetwork": { + "stringValue": "" + }, + "dataflow_use_public_ips": { + "stringValue": "True" + }, + "disable_early_stopping": { + "stringValue": "False" + }, + "distill_batch_predict_machine_type": { + "stringValue": "n1-standard-16" + }, + "distill_batch_predict_max_replica_count": { + "intValue": "25" + }, + "distill_batch_predict_starting_replica_count": { + "intValue": "25" + }, + "distill_stage_1_deadline_hours": { + "doubleValue": 1.0 + }, + "encryption_spec_key_name": { + "stringValue": "" + }, + "evaluation_batch_predict_machine_type": { + "stringValue": "n1-standard-16" + }, + "evaluation_batch_predict_max_replica_count": { + "intValue": "25" + }, + "evaluation_batch_predict_starting_replica_count": { + "intValue": "25" + }, + "evaluation_dataflow_disk_size_gb": { + "intValue": "50" + }, + "evaluation_dataflow_machine_type": { + "stringValue": "n1-standard-4" + }, + "evaluation_dataflow_max_num_workers": { + "intValue": "25" + }, + "export_additional_model_without_custom_ops": { + "stringValue": "False" + }, + "optimization_objective_precision_value": { + "doubleValue": -1.0 + }, + "optimization_objective_recall_value": { + "doubleValue": -1.0 + }, + "reduce_search_space_mode": { + "stringValue": "regular" + }, + "run_distillation": { + "stringValue": "False" + }, + "run_evaluation": { + "stringValue": "False" + }, + "stage_1_tuner_worker_pool_specs_override": { + "stringValue": "" + }, + "stats_and_example_gen_dataflow_disk_size_gb": { + "intValue": "40" + }, + "stats_and_example_gen_dataflow_machine_type": { + "stringValue": "n1-standard-16" + }, + "stats_and_example_gen_dataflow_max_num_workers": { + "intValue": "25" + }, + "study_spec_override": { + "stringValue": "" + }, + "transform_dataflow_disk_size_gb": { + "intValue": "40" + }, + "transform_dataflow_machine_type": { + "stringValue": "n1-standard-16" + }, + "transform_dataflow_max_num_workers": { + "intValue": "25" + }, + "weight_column_name": { + "stringValue": "" + } + } + } +} \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py new file mode 100644 index 0000000000..1afdbfa157 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -0,0 +1,167 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Ensemble component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_ensemble( + project: str, + location: str, + root_dir: str, + transform_output: Input[Artifact], + metadata: Input[Artifact], + dataset_schema: Input[Artifact], + tuning_result_input: Input[Artifact], + instance_baseline: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + model_architecture: Output[Artifact], + model: Output[Artifact], + unmanaged_container_model: Output[UnmanagedContainerModel], + model_without_custom_ops: Output[Artifact], + explanation_metadata: dsl.OutputPath(dict), + explanation_metadata_artifact: Output[Artifact], + explanation_parameters: dsl.OutputPath(dict), + warmup_data: Optional[Input[Dataset]] = None, + encryption_spec_key_name: Optional[str] = '', + export_additional_model_without_custom_ops: Optional[bool] = False, +): + # fmt: off + """Ensembles AutoML Tabular models. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + dataset_schema: The schema of the dataset. + tuning_result_input: AutoML Tabular tuning + result. + instance_baseline: The instance baseline + used to calculate explanations. + warmup_data: The warm up data. Ensemble component will save the + warm up data together with the model artifact, used to warm up the model + when prediction server starts. + encryption_spec_key_name: Customer-managed encryption key. + export_additional_model_without_custom_ops: True if export + an additional model without custom TF operators to the + `model_without_custom_ops` output. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + model_architecture: The architecture of the output model. + model: The output model. + model_without_custom_ops: The output model without custom TF operators, this output will be empty unless `export_additional_model_without_custom_ops` is set. + model_uri: The URI of the output model. + instance_schema_uri: The URI of the instance schema. + prediction_schema_uri: The URI of the prediction schema. + explanation_metadata: The explanation metadata used by Vertex online and batch explanations. + explanation_metadata: The explanation parameters used by Vertex online and batch explanations. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["ensemble", "--transform_output_path=', + transform_output.uri, + '", "--model_output_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model",' + ' "--custom_model_output_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/custom_model",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--export_custom_model=' + ), + export_additional_model_without_custom_ops, + '", "--metadata_path=', + metadata.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--warmup_data=', + warmup_data.uri, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--model_path=', + model.uri, + '", "--custom_model_path=', + model_without_custom_ops.uri, + '", "--explanation_metadata_path=', + explanation_metadata, + ',', + explanation_metadata_artifact.uri, + '", "--explanation_parameters_path=', + explanation_parameters, + '", "--model_architecture_path=', + model_architecture.uri, + ( + '", "--use_json=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py new file mode 100644 index 0000000000..ea36d7d297 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -0,0 +1,88 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Pipeline Finalizer component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def automl_tabular_finalizer( + project: str, + location: str, + root_dir: str, + gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Finalizes AutoML Tabular pipelines. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-finalizer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["cancel_l2l_tuner", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--cleanup_lro_job_infos=' + ), + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro"' + ']}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py new file mode 100644 index 0000000000..8fc6b00ec9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -0,0 +1,39 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Infra Validator component spec.""" + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Input + + +@dsl.container_component +def automl_tabular_infra_validator( + unmanaged_container_model: Input[UnmanagedContainerModel], # pylint: disable=unused-argument +): + # fmt: off + """Validates the trained AutoML Tabular model is a valid model. + + Args: + unmanaged_container_model: google.UnmanagedContainerModel for model + to be validated. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + command=[], + args=['--executor_input', '{{$}}'], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py new file mode 100644 index 0000000000..29091ded20 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -0,0 +1,119 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Split Materialized Data component spec.""" + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def split_materialized_data( + materialized_data: Input[Dataset], + materialized_train_split: Output[Artifact], + materialized_eval_split: Output[Artifact], + materialized_test_split: Output[Artifact], +): + # fmt: off + """Splits materialized dataset into train, eval, and test data splits. + + The materialized dataset generated by the Feature Transform Engine consists of + all the splits + that were combined into the input transform dataset (i.e., train, eval, and + test splits). + This components splits the output materialized dataset into corresponding + materialized data splits + so that the splits can be used by down-stream training or evaluation + components. + + Args: + materialized_data: Materialized dataset output by the Feature + Transform Engine. + + Returns: + materialized_train_split: Path patern to materialized train split. + materialized_eval_split: Path patern to materialized eval split. + materialized_test_split: Path patern to materialized test split. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + command=[ + 'sh', + '-ec', + ( + 'program_path=$(mktemp -d)\nprintf "%s" "$0" >' + ' "$program_path/ephemeral_component.py"\npython3 -m' + ' kfp.components.executor_main ' + ' --component_module_path ' + ' "$program_path/ephemeral_component.py" ' + ' "$@"\n' + ), + ( + '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom' + ' typing import *\n\ndef _split_materialized_data(\n ' + ' materialized_data: Input[Dataset],\n ' + " materialized_train_split: OutputPath('MaterializedSplit'),\n " + " materialized_eval_split: OutputPath('MaterializedSplit'),\n " + " materialized_test_split: OutputPath('MaterializedSplit')):\n " + ' """Splits materialized_data into materialized_data test,' + ' train, and eval splits.\n\n Necessary adapter between FTE' + ' pipeline and trainer.\n\n Args:\n materialized_data:' + ' materialized_data dataset output by FTE.\n ' + ' materialized_train_split: Path patern to' + ' materialized_train_split.\n materialized_eval_split: Path' + ' patern to materialized_eval_split.\n ' + ' materialized_test_split: Path patern to' + ' materialized_test_split.\n """\n # pylint:' + ' disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n' + ' import json\n import tensorflow as tf\n # pylint:' + ' enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n' + " with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n " + ' artifact_path = f.read()\n\n # needed to import tf because' + ' this is a path in gs://\n with' + " tf.io.gfile.GFile(artifact_path, 'r') as f:\n " + ' materialized_data_json = json.load(f)\n\n if' + " 'tf_record_data_source' in materialized_data_json:\n " + ' file_patterns =' + " materialized_data_json['tf_record_data_source'][\n " + " 'file_patterns']\n elif 'avro_data_source' in" + ' materialized_data_json:\n file_patterns =' + " materialized_data_json['avro_data_source'][\n " + " 'file_patterns']\n elif 'parquet_data_source' in" + ' materialized_data_json:\n file_patterns =' + " materialized_data_json['parquet_data_source'][\n " + " 'file_patterns']\n else:\n raise ValueError(f'Unsupported" + " training data source: {materialized_data_json}')\n\n # we map" + ' indices to file patterns based on the ordering of insertion' + ' order\n # in our transform_data (see above in' + ' _generate_analyze_and_transform_data)\n with' + " tf.io.gfile.GFile(materialized_train_split, 'w') as f:\n " + ' f.write(file_patterns[0])\n\n with' + " tf.io.gfile.GFile(materialized_eval_split, 'w') as f:\n " + ' f.write(file_patterns[1])\n\n with' + " tf.io.gfile.GFile(materialized_test_split, 'w') as f:\n " + ' f.write(file_patterns[2])\n\n' + ), + ], + args=[ + '--executor_input', + '{{$}}', + '--function_to_execute', + '_split_materialized_data', + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py new file mode 100644 index 0000000000..095837620d --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -0,0 +1,189 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Stage 1 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_stage_1_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + execution_metrics: dsl.OutputPath(dict), + study_spec_parameters_override: Optional[list] = [], + worker_pool_specs_override_json: Optional[list] = [], + reduce_search_space_mode: Optional[str] = 'regular', + num_selected_features: Optional[int] = 0, + disable_early_stopping: Optional[bool] = False, + feature_ranking: Optional[Input[Artifact]] = None, + tune_feature_selection_rate: Optional[bool] = False, + encryption_spec_key_name: Optional[str] = '', + run_distillation: Optional[bool] = False, +): + # fmt: off + """Searches AutoML Tabular architectures and selects the top trials. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + study_spec_parameters_override: JSON study spec. E.g., + [{"parameter_id": "model_type","categorical_value_spec": {"values": + ["nn"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + reduce_search_space_mode: The reduce search space mode. Possible + values: "regular" (default), "minimal", "full". + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of + features to learn in the NN models. + deadline_hours: Number of hours the cross-validation trainer + should run. + disable_early_stopping: True if disable early stopping. Default + value is false. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The tabular example gen metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + run_distillation: True if in distillation mode. The default value + is false. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained model and architectures. + execution_metrics: Core metrics in dictionary of component execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "--feature_selection_result_path=', + feature_ranking.uri, + '", "--disable_early_stopping=', + disable_early_stopping, + '", "--tune_feature_selection_rate=', + tune_feature_selection_rate, + '", "--reduce_search_space_mode=', + reduce_search_space_mode, + ( + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "--training_base_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--num_parallel_trial=' + ), + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--num_selected_features=', + num_selected_features, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--is_distill=', + run_distillation, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--execution_metrics_path=', + execution_metrics, + ( + '", "--use_json=true", "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py new file mode 100644 index 0000000000..6c7e915dbe --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -0,0 +1,304 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Stats and Example Generation component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Output + + +@dsl.container_component +def tabular_stats_and_example_gen( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + transformations: str, + dataset_schema: Output[Artifact], + dataset_stats: Output[Artifact], + train_split: Output[Dataset], + eval_split: Output[Dataset], + test_split: Output[Dataset], + test_split_json: dsl.OutputPath(list), + downsampled_test_split_json: dsl.OutputPath(list), + instance_baseline: Output[Artifact], + metadata: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + weight_column_name: Optional[str] = '', + optimization_objective: Optional[str] = '', + optimization_objective_recall_value: Optional[float] = -1, + optimization_objective_precision_value: Optional[float] = -1, + transformations_path: Optional[str] = '', + request_type: Optional[str] = 'COLUMN_STATS_ONLY', + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + run_distillation: Optional[bool] = False, + additional_experiments: Optional[str] = '', + additional_experiments_json: Optional[dict] = {}, + data_source_csv_filenames: Optional[str] = '', + data_source_bigquery_table_path: Optional[str] = '', + predefined_split_key: Optional[str] = '', + timestamp_split_key: Optional[str] = '', + stratified_split_key: Optional[str] = '', + training_fraction: Optional[float] = -1, + validation_fraction: Optional[float] = -1, + test_fraction: Optional[float] = -1, + quantiles: Optional[list] = [], + enable_probabilistic_inference: Optional[bool] = False, +): + # fmt: off + """Generates stats and training instances for tabular data. + + Args: + project: Project to run dataset statistics and example + generation. + location: Location for running dataset statistics and example + generation. + root_dir: The Cloud Storage location to store the output. + target_column_name: The target column name. + weight_column_name: The weight column name. + prediction_type: The prediction type. Supported values: + "classification", "regression". + optimization_objective: Objective function the model is optimizing + towards. The training process creates a model that maximizes/minimizes + the value of the objective function over the validation set. The + supported optimization objectives depend on the prediction type. If the + field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the + area under the receiver operating characteristic (ROC) curve. + "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - + Maximize the area under the precision-recall curve. + "maximize-precision-at-recall" - Maximize precision for a specified + recall value. "maximize-recall-at-precision" - Maximize recall for a + specified precision value. + classification (multi-class): "minimize-log-loss" (default) - Minimize + log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared + error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when + optimization_objective is "maximize-precision-at-recall". Must be + between 0 and 1, inclusive. + optimization_objective_precision_value: Required when + optimization_objective is "maximize-recall-at-precision". Must be + between 0 and 1, inclusive. + transformations: Quote escaped JSON string for transformations. Each + transformation will apply transform function to given input column. And + the result will be used for training. When creating transformation for + BigQuery Struct column, the column should be flattened using "." as the + delimiter. + transformations_path: Path to a GCS file containing JSON + string for transformations. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + run_distillation: True if in distillation mode. The default value + is false. + + Returns: + dataset_schema: The schema of the dataset. + dataset_stats: The stats of the dataset. + train_split: The train split. + eval_split: The eval split. + test_split: The test split. + test_split_json: The test split JSON object. + downsampled_test_split_json: The downsampled test split JSON object. + instance_baseline: The instance baseline used to calculate explanations. + metadata: The tabular example gen metadata. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["stats_generator",', + '"--train_spec={\\"prediction_type\\": \\"', + prediction_type, + '\\", \\"target_column\\": \\"', + target_column_name, + '\\", \\"optimization_objective\\": \\"', + optimization_objective, + '\\", \\"weight_column_name\\": \\"', + weight_column_name, + '\\", \\"transformations\\": ', + transformations, + ', \\"quantiles\\": ', + quantiles, + ', \\"enable_probabilistic_inference\\": ', + enable_probabilistic_inference, + '}", "--transformations_override_path=', + transformations_path, + '", "--data_source_csv_filenames=', + data_source_csv_filenames, + '", "--data_source_bigquery_table_path=', + data_source_bigquery_table_path, + '", "--predefined_split_key=', + predefined_split_key, + '", "--timestamp_split_key=', + timestamp_split_key, + '", "--stratified_split_key=', + stratified_split_key, + '", "--training_fraction=', + training_fraction, + '", "--validation_fraction=', + validation_fraction, + '", "--test_fraction=', + test_fraction, + '", "--target_column=', + target_column_name, + '", "--request_type=', + request_type, + '", "--optimization_objective_recall_value=', + optimization_objective_recall_value, + '", "--optimization_objective_precision_value=', + optimization_objective_precision_value, + '", "--example_gen_gcs_output_prefix=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/example_gen_output",' + ' "--dataset_stats_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/stats/",' + ' "--stats_result_path=' + ), + dataset_stats.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + ( + f'", "--job_name=tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--is_distill=', + run_distillation, + '", "--additional_experiments=', + additional_experiments, + '", "--metadata_path=', + metadata.uri, + '", "--train_split=', + train_split.uri, + '", "--eval_split=', + eval_split.uri, + '", "--test_split=', + test_split.uri, + '", "--test_split_for_batch_prediction_component=', + test_split_json, + ( + '", "--downsampled_test_split_for_batch_prediction_component=' + ), + downsampled_test_split_json, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--gcp_resources_path=' + ), + gcp_resources, + ( + '", "--parse_json=true",' + ' "--generate_additional_downsample_test_split=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py new file mode 100644 index 0000000000..d4ff9c5473 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -0,0 +1,285 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Training Configurator and Validator component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def training_configurator_and_validator( + dataset_stats: Input[Artifact], + split_example_counts: str, + training_schema: Input[Artifact], + instance_schema: Input[Artifact], + metadata: Output[Artifact], + instance_baseline: Output[Artifact], + target_column: Optional[str] = '', + weight_column: Optional[str] = '', + prediction_type: Optional[str] = '', + optimization_objective: Optional[str] = '', + optimization_objective_recall_value: Optional[float] = -1, + optimization_objective_precision_value: Optional[float] = -1, + run_evaluation: Optional[bool] = False, + run_distill: Optional[bool] = False, + enable_probabilistic_inference: Optional[bool] = False, + time_series_identifier_column: Optional[str] = '', + time_column: Optional[str] = '', + time_series_attribute_columns: Optional[list] = [], + available_at_forecast_columns: Optional[list] = [], + unavailable_at_forecast_columns: Optional[list] = [], + quantiles: Optional[list] = [], + context_window: Optional[int] = -1, + forecast_horizon: Optional[int] = -1, + forecasting_model_type: Optional[str] = '', + forecasting_transformations: Optional[dict] = {}, + stage_1_deadline_hours: Optional[float] = None, + stage_2_deadline_hours: Optional[float] = None, + group_columns: Optional[list] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +): + # fmt: off + """Configures training and validates data and user-input configurations. + + Args: + dataset_stats: Dataset stats generated by + feature transform engine. + split_example_counts: JSON string of data split example counts for + train, validate, and test splits. + training_schema_path: Schema of input data to the tf_model + at training time. + instance_schema: Schema of input data to the tf_model at + serving time. + target_column: Target column of input data. + weight_column: Weight column of input data. + prediction_type: Model prediction type. One of "classification", + "regression", "time_series". + optimization_objective: Objective function the model is optimizing + towards. The training process creates a model that maximizes/minimizes + the value of the objective function over the validation set. The + supported optimization objectives depend on the prediction type. If the + field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the + area under the receiver operating characteristic (ROC) curve. + "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - + Maximize the area under the precision-recall curve. + "maximize-precision-at-recall" - Maximize precision for a specified + recall value. "maximize-recall-at-precision" - Maximize recall for a + specified precision value. + classification (multi-class): "minimize-log-loss" (default) - Minimize + log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared + error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when + optimization_objective is "maximize-precision-at-recall". Must be + between 0 and 1, inclusive. + optimization_objective_precision_value: Required when + optimization_objective is "maximize-recall-at-precision". Must be + between 0 and 1, inclusive. + run_evaluation: Whether we are running evaluation in the training + pipeline. + run_distill: Whether the distillation should be applied to the + training. + enable_probabilistic_inference: If probabilistic inference is + enabled, the model will fit a distribution that captures the uncertainty + of a prediction. At inference time, the predictive distribution is used + to make a point prediction that minimizes the optimization objective. + For example, the mean of a predictive distribution is the point + prediction that minimizes RMSE loss. If quantiles are specified, then + the quantiles of the distribution are also returned. + time_series_identifier_column: Time series idenfier column. Used by + forecasting only. + time_column: The column that indicates the time. Used by forecasting + only. + time_series_attribute_columns: The column names of the time series + attributes. + available_at_forecast_columns: The names of the columns that are + available at forecast time. + unavailable_at_forecast_columns: The names of the columns that are + not available at forecast time. + quantiles: All quantiles that the model need to predict. + context_window: The length of the context window. + forecast_horizon: The length of the forecast horizon. + forecasting_model_type: The model types, e.g. l2l, seq2seq, tft. + forecasting_transformations: Dict mapping auto and/or type-resolutions to + feature columns. The supported types are auto, categorical, numeric, + text, and timestamp. + stage_1_deadline_hours: Stage 1 training budget in + hours. + stage_2_deadline_hours: Stage 2 training budget in + hours. + group_columns: A list of time series attribute column + names that define the time series hierarchy. + group_total_weight: The weight of the loss for + predictions aggregated over time series in the same group. + temporal_total_weight: The weight of the loss for + predictions aggregated over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for + predictions aggregated over both the horizon and time series in the same + hierarchy group. + + Returns: + metadata: The tabular example gen metadata. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + command=[], + args=[ + 'training_configurator_and_validator', + dsl.ConcatPlaceholder( + items=['--instance_schema_path=', instance_schema.uri] + ), + dsl.ConcatPlaceholder( + items=['--training_schema_path=', training_schema.uri] + ), + dsl.ConcatPlaceholder( + items=['--dataset_stats_path=', dataset_stats.uri] + ), + dsl.ConcatPlaceholder( + items=['--split_example_counts=', split_example_counts] + ), + dsl.ConcatPlaceholder(items=['--target_column=', target_column]), + dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), + dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), + dsl.ConcatPlaceholder( + items=['--optimization_objective=', optimization_objective] + ), + dsl.ConcatPlaceholder( + items=[ + '--optimization_objective_recall_value=', + optimization_objective_recall_value, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--optimization_objective_precision_value=', + optimization_objective_precision_value, + ] + ), + dsl.ConcatPlaceholder(items=['--metadata_path=', metadata.uri]), + dsl.ConcatPlaceholder( + items=['--instance_baseline_path=', instance_baseline.uri] + ), + dsl.ConcatPlaceholder(items=['--run_evaluation=', run_evaluation]), + dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), + dsl.ConcatPlaceholder( + items=[ + '--enable_probabilistic_inference=', + enable_probabilistic_inference, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--time_series_identifier_column=', + time_series_identifier_column, + ] + ), + dsl.ConcatPlaceholder(items=['--time_column=', time_column]), + dsl.ConcatPlaceholder( + items=[ + '--time_series_attribute_columns=', + time_series_attribute_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--available_at_forecast_columns=', + available_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--unavailable_at_forecast_columns=', + unavailable_at_forecast_columns, + ] + ), + dsl.IfPresentPlaceholder( + input_name='quantiles', + then=dsl.ConcatPlaceholder( + items=[ + '--quantiles=', + quantiles, + ] + ), + ), + dsl.ConcatPlaceholder(items=['--context_window=', context_window]), + dsl.ConcatPlaceholder( + items=['--forecast_horizon=', forecast_horizon] + ), + dsl.ConcatPlaceholder( + items=['--forecasting_model_type=', forecasting_model_type] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_transformations=', + forecasting_transformations, + ] + ), + dsl.IfPresentPlaceholder( + input_name='stage_1_deadline_hours', + then=dsl.ConcatPlaceholder( + items=[ + '--stage_1_deadline_hours=', + stage_1_deadline_hours, + ] + ), + ), + dsl.IfPresentPlaceholder( + input_name='stage_2_deadline_hours', + then=dsl.ConcatPlaceholder( + items=[ + '--stage_2_deadline_hours=', + stage_2_deadline_hours, + ] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_columns', + then=dsl.ConcatPlaceholder( + items=['--group_columns=', group_columns] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_total_weight', + then=dsl.ConcatPlaceholder( + items=['--group_total_weight=', group_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=['--temporal_total_weight=', temporal_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=[ + '--group_temporal_total_weight=', + group_temporal_total_weight, + ] + ), + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py new file mode 100644 index 0000000000..c9ab7ef401 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -0,0 +1,200 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Transform component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_transform( + project: str, + location: str, + root_dir: str, + metadata: Input[Artifact], + dataset_schema: Input[Artifact], + train_split: Input[Dataset], + eval_split: Input[Dataset], + test_split: Input[Dataset], + materialized_train_split: Output[Artifact], + materialized_eval_split: Output[Artifact], + materialized_test_split: Output[Artifact], + training_schema_uri: Output[Artifact], + transform_output: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Transforms raw features to engineered features. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + metadata: The tabular example gen metadata. + dataset_schema: The schema of the dataset. + train_split: The train split. + eval_split: The eval split. + test_split: The test split. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + materialized_train_split: The materialized train split. + materialized_eval_split: The materialized eval split. + materialized_eval_split: The materialized test split. + training_schema_uri: The training schema. + transform_output: The transform output artifact. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + ( + '", "args": ["transform", "--is_mp=true",' + ' "--transform_output_artifact_path=' + ), + transform_output.uri, + '", "--transform_output_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform",' + ' "--materialized_splits_output_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform_materialized",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + '", "--train_split=', + train_split.uri, + '", "--eval_split=', + eval_split.uri, + '", "--test_split=', + test_split.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--materialized_test_split=', + materialized_test_split.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + ( + f'", "--job_name=automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--gcp_resources_path=' + ), + gcp_resources, + '"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py new file mode 100644 index 0000000000..2c19976e47 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py @@ -0,0 +1,1435 @@ +"""Util functions for AutoML Tabular pipeline.""" + +import json +import math +import os +import pathlib +from typing import Any, Dict, List, Optional, Tuple +import warnings + +_DEFAULT_NUM_PARALLEL_TRAILS = 35 +_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 +_NUM_FOLDS = 5 +_DISTILL_TOTAL_TRIALS = 100 +_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 +_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 +_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 +_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 +_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' +_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 +_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 +_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 + +# Needed because we reference the AutoML Tabular V2 pipeline. +_GCPC_STAGING_PATH = pathlib.Path( + __file__ +).parent.parent.parent.parent.resolve() +_GCPC_PREVIEW_TABULAR_PATH = ( + _GCPC_STAGING_PATH / 'preview' / 'automl' / 'tabular' +) + + +# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag +# to signify FTE usage instead of the presence of num_selected_features. +def _get_default_pipeline_params( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[float] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + max_selected_features: Optional[int] = None, + apply_feature_selection_tuning: bool = False, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Dict[str, Any]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. If specified, + enable_probabilistic_inference and run_distillation cannot be enabled. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not cv_trainer_worker_pool_specs_override: + cv_trainer_worker_pool_specs_override = [] + if not quantiles: + quantiles = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'optimization_objective': optimization_objective, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'weight_column': weight_column, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': ( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'dataflow_service_account': dataflow_service_account, + 'encryption_spec_key_name': encryption_spec_key_name, + 'max_selected_features': max_selected_features, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'quantiles': quantiles, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + } + parameter_values.update( + {param: value for param, value in parameters.items() if value is not None} + ) + + if run_evaluation: + eval_parameters = { + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_batch_explain_machine_type': ( + evaluation_batch_explain_machine_type + ), + 'evaluation_batch_explain_starting_replica_count': ( + evaluation_batch_explain_starting_replica_count + ), + 'evaluation_batch_explain_max_replica_count': ( + evaluation_batch_explain_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + } + parameter_values.update( + { + param: value + for param, value in eval_parameters.items() + if value is not None + } + ) + + # V1 pipeline without FTE + if num_selected_features is None: + if not additional_experiments: + additional_experiments = {} + + parameters = { + 'transformations': transformations, + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'additional_experiments': additional_experiments, + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + if apply_feature_selection_tuning: + parameter_values.update({ + 'apply_feature_selection_tuning': apply_feature_selection_tuning, + }) + + if run_distillation: + distillation_parameters = { + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + } + parameter_values.update( + { + param: value + for param, value in distillation_parameters.items() + if value is not None + } + ) + + # V2 pipeline (with FTE) + else: + if run_distillation: + raise ValueError( + 'Distillation is currently not supported' + ' when num_selected_features is specified.' + ) + + parameters = { + 'num_selected_features': num_selected_features, + 'dataset_level_custom_transformation_definitions': [], + 'dataset_level_transformations': [], + 'tf_auto_transform_features': {}, + 'tf_custom_transformation_definitions': [], + 'legacy_transformations_path': transformations, + 'feature_transform_engine_dataflow_machine_type': ( + transform_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + transform_dataflow_disk_size_gb + ), + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + return parameter_values + + +def get_automl_tabular_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=quantiles, + enable_probabilistic_inference=enable_probabilistic_inference, + num_selected_features=num_selected_features, + model_display_name=model_display_name, + model_description=model_description, + ) + + # V1 pipeline without FTE + if num_selected_features is None: + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'automl_tabular_pipeline.yaml' + ) + + # V2 pipeline with FTE + else: + pipeline_definition_path = os.path.join( + _GCPC_PREVIEW_TABULAR_PATH, + 'automl_tabular_v2_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: + """Convert json input dict to encoded parameter string. + + This function is required due to the limitation on YAML component definition + that YAML definition does not have a keyword for apply quote escape, so the + JSON argument's quote must be manually escaped using this function. + + Args: + input_dict: The input json dictionary. + + Returns: + The encoded string used for parameter. + """ + if not input_dict: + return '' + out = json.dumps(json.dumps(input_dict)) + return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo + + +def get_skip_evaluation_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips evaluation. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + return get_default_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column_name=target_column_name, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + split_spec=split_spec, + data_source=data_source, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + weight_column_name=weight_column_name, + study_spec_override=study_spec_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + run_evaluation=False, + run_distillation=False, + ) + + +def get_default_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: str = '', + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + run_distillation: bool = False, + distill_batch_predict_machine_type: str = 'n1-standard-16', + distill_batch_predict_starting_replica_count: int = 25, + distill_batch_predict_max_replica_count: int = 25, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'This method is deprecated,' + ' please use get_automl_tabular_pipeline_and_parameters instead.' + ) + + if stage_1_num_parallel_trials <= 0: + stage_1_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS + + if stage_2_num_parallel_trials <= 0: + stage_2_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS + + hours = float(train_budget_milli_node_hours) / 1000.0 + multiplier = stage_1_num_parallel_trials * hours / 500.0 + stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0) + phase_2_rounds = int( + math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials + 0.5 + ) + if phase_2_rounds < 1: + phase_2_rounds = 1 + + # All of magic number "1.3" above is because the trial doesn't always finish + # in time_per_trial. 1.3 is an empirical safety margin here. + stage_1_deadline_secs = int( + hours * 3600.0 - 1.3 * stage_1_single_run_max_secs * phase_2_rounds + ) + + if stage_1_deadline_secs < hours * 3600.0 * 0.5: + stage_1_deadline_secs = int(hours * 3600.0 * 0.5) + # Phase 1 deadline is the same as phase 2 deadline in this case. Phase 2 + # can't finish in time after the deadline is cut, so adjust the time per + # trial to meet the deadline. + stage_1_single_run_max_secs = int( + stage_1_deadline_secs / (1.3 * phase_2_rounds) + ) + + reduce_search_space_mode = 'minimal' + if multiplier > 2: + reduce_search_space_mode = 'regular' + if multiplier > 4: + reduce_search_space_mode = 'full' + + # Stage 2 number of trials is stage_1_num_selected_trials * + # _NUM_FOLDS, which should be equal to phase_2_rounds * + # stage_2_num_parallel_trials. Use this information to calculate + # stage_1_num_selected_trials: + stage_1_num_selected_trials = int( + phase_2_rounds * stage_2_num_parallel_trials / _NUM_FOLDS + ) + stage_1_deadline_hours = stage_1_deadline_secs / 3600.0 + + stage_2_deadline_hours = hours - stage_1_deadline_hours + stage_2_single_run_max_secs = stage_1_single_run_max_secs + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column_name': target_column_name, + 'prediction_type': prediction_type, + 'optimization_objective': optimization_objective, + 'transformations': input_dictionary_to_parameter(transformations), + 'split_spec': input_dictionary_to_parameter(split_spec), + 'data_source': input_dictionary_to_parameter(data_source), + 'stage_1_deadline_hours': stage_1_deadline_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_1_num_selected_trials': stage_1_num_selected_trials, + 'stage_1_single_run_max_secs': stage_1_single_run_max_secs, + 'reduce_search_space_mode': reduce_search_space_mode, + 'stage_2_deadline_hours': stage_2_deadline_hours, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'stage_2_single_run_max_secs': stage_2_single_run_max_secs, + 'weight_column_name': weight_column_name, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_override': input_dictionary_to_parameter(study_spec_override), + 'stage_1_tuner_worker_pool_specs_override': input_dictionary_to_parameter( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': input_dictionary_to_parameter( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + if additional_experiments: + parameter_values.update( + { + 'additional_experiments': input_dictionary_to_parameter( + additional_experiments + ) + } + ) + if run_evaluation: + parameter_values.update({ + 'dataflow_service_account': dataflow_service_account, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + }) + if run_distillation: + # All of magic number "1.3" above is because the trial doesn't always finish + # in time_per_trial. 1.3 is an empirical safety margin here. + distill_stage_1_deadline_hours = ( + math.ceil( + float(_DISTILL_TOTAL_TRIALS) + / parameter_values['stage_1_num_parallel_trials'] + ) + * parameter_values['stage_1_single_run_max_secs'] + * 1.3 + / 3600.0 + ) + + parameter_values.update({ + 'distill_stage_1_deadline_hours': distill_stage_1_deadline_hours, + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + }) + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'deprecated/default_pipeline.json', + ) + return pipeline_definition_path, parameter_values + + +def get_skip_architecture_search_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_tuning_result_artifact_uri: str, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips architecture search. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + + return get_automl_tabular_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=None, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=[], + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override={}, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=None, + distill_batch_predict_machine_type=None, + distill_batch_predict_starting_replica_count=None, + distill_batch_predict_max_replica_count=None, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=[], + enable_probabilistic_inference=False, + ) + + +def get_distill_skip_evaluation_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, + distill_batch_predict_machine_type: str = 'n1-standard-16', + distill_batch_predict_starting_replica_count: int = 25, + distill_batch_predict_max_replica_count: int = 25, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that distill and skips evaluation. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'Depreciated. Please use get_automl_tabular_pipeline_and_parameters.' + ) + + return get_default_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column_name=target_column_name, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + split_spec=split_spec, + data_source=data_source, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + weight_column_name=weight_column_name, + study_spec_override=study_spec_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + run_evaluation=False, + run_distillation=True, + ) From 3b6201779c7db58c6cbc5fae1e02cac5420280af Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 6 Jul 2023 15:36:13 -0700 Subject: [PATCH 016/253] chore(components): Migrate AutoML components to preview and v1 as needed docs(components): Revert doc changes from preview sync PiperOrigin-RevId: 546110190 --- .../preview/automl/forecasting/__init__.py | 25 - .../forecasting/forecasting_ensemble.py | 139 - .../forecasting/forecasting_stage_1_tuner.py | 159 - .../forecasting/forecasting_stage_2_tuner.py | 157 - .../learn_to_learn_forecasting_pipeline.yaml | 7790 ----------- ...ence_to_sequence_forecasting_pipeline.yaml | 7749 ----------- ...sion_transformer_forecasting_pipeline.yaml | 7735 ----------- ...es_dense_encoder_forecasting_pipeline.yaml | 7790 ----------- .../preview/automl/forecasting/utils.py | 1023 -- .../preview/automl/tabular/__init__.py | 35 - ...ml_tabular_feature_selection_pipeline.yaml | 11427 ---------------- .../tabular/automl_tabular_v2_pipeline.yaml | 8327 ----------- ..._params_large_data_large_search_space.json | 158 - ...params_large_data_medium_search_space.json | 158 - ..._params_large_data_small_search_space.json | 146 - ...params_medium_data_large_search_space.json | 158 - ...arams_medium_data_medium_search_space.json | 158 - ...params_medium_data_small_search_space.json | 146 - ..._params_small_data_large_search_space.json | 158 - ...params_small_data_medium_search_space.json | 158 - ..._params_small_data_small_search_space.json | 146 - .../tabular/configs/wide_and_deep_params.json | 132 - .../tabular/configs/xgboost_params.json | 309 - .../automl/tabular/feature_selection.py | 179 - .../tabular/feature_transform_engine.py | 976 -- .../tabnet_hyperparameter_tuning_job.py | 236 - ...et_hyperparameter_tuning_job_pipeline.yaml | 4661 ------- .../preview/automl/tabular/tabnet_trainer.py | 300 - .../tabular/tabnet_trainer_pipeline.yaml | 4302 ------ .../preview/automl/tabular/utils.py | 3360 ----- ...wide_and_deep_hyperparameter_tuning_job.py | 236 - ...ep_hyperparameter_tuning_job_pipeline.yaml | 4018 ------ .../automl/tabular/wide_and_deep_trainer.py | 281 - .../wide_and_deep_trainer_pipeline.yaml | 4048 ------ .../xgboost_hyperparameter_tuning_job.py | 124 - ...st_hyperparameter_tuning_job_pipeline.yaml | 4332 ------ .../preview/automl/tabular/xgboost_trainer.py | 77 - .../tabular/xgboost_trainer_pipeline.yaml | 4396 ------ .../v1/automl/forecasting/__init__.py | 21 - .../bqml_arima_predict_pipeline.yaml | 1159 -- .../bqml_arima_train_pipeline.yaml | 5085 ------- .../forecasting/prophet_predict_pipeline.yaml | 2150 --- .../v1/automl/forecasting/prophet_trainer.py | 211 - .../forecasting/prophet_trainer_pipeline.yaml | 2958 ---- .../v1/automl/forecasting/utils.py | 341 - .../v1/automl/tabular/__init__.py | 37 - .../tabular/automl_tabular_pipeline.yaml | 11149 --------------- .../v1/automl/tabular/cv_trainer.py | 166 - .../tabular/deprecated/default_pipeline.json | 7974 ----------- .../v1/automl/tabular/ensemble.py | 167 - .../v1/automl/tabular/finalizer.py | 88 - .../v1/automl/tabular/infra_validator.py | 39 - .../automl/tabular/split_materialized_data.py | 119 - .../v1/automl/tabular/stage_1_tuner.py | 189 - .../automl/tabular/stats_and_example_gen.py | 304 - .../training_configurator_and_validator.py | 285 - .../v1/automl/tabular/transform.py | 200 - .../v1/automl/tabular/utils.py | 1435 -- 58 files changed, 119786 deletions(-) delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py deleted file mode 100644 index befa20f9ad..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Experimental AutoML forecasting components.""" - -from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp -from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_1_tuner import automl_forecasting_stage_1_tuner as ForecastingStage1TunerOp -from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_2_tuner import automl_forecasting_stage_2_tuner as ForecastingStage2TunerOp - -__all__ = [ - 'ForecastingStage1TunerOp', - 'ForecastingEnsembleOp', - 'ForecastingStage2TunerOp', -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py deleted file mode 100644 index b7e0580c4e..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Forecasting Ensemble component spec.""" - -from typing import Optional - -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -# pylint: disable=g-bare-generic,g-doc-args,unused-argument -@dsl.container_component -def automl_forecasting_ensemble( - project: str, - location: str, - root_dir: str, - transform_output: Input[Artifact], - metadata: Input[Artifact], - tuning_result_input: Input[Artifact], - instance_baseline: Input[Artifact], - instance_schema_path: Input[Artifact], - prediction_image_uri: str, - gcp_resources: dsl.OutputPath(str), - model_architecture: Output[Artifact], - unmanaged_container_model: Output[UnmanagedContainerModel], - explanation_metadata: dsl.OutputPath(dict), - explanation_metadata_artifact: Output[Artifact], - explanation_parameters: dsl.OutputPath(dict), - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Ensembles AutoML Forecasting models. - - Args: - project: Project to run the job in. - location: Region to run the job in. - root_dir: The Cloud Storage path to store the output. - transform_output: The transform output artifact. - metadata: The tabular example gen metadata. - tuning_result_input: AutoML Tabular tuning - result. - instance_baseline: The instance baseline - used to calculate explanations. - instance_schema_path: The path to the instance schema, - describing the input data for the tf_model at serving time. - encryption_spec_key_name: Customer-managed encryption key. - prediction_image_uri: URI of the Docker image to be used as the - container for serving predictions. This URI must identify an image in - Artifact Registry or Container Registry. - - Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - model_architecture: The architecture of the output model. - unmanaged_container_model: Model information needed to perform batch prediction. - explanation_metadata: The explanation metadata used by Vertex online and batch explanations. - explanation_metadata_artifact: The explanation metadata used by Vertex online and batch explanations in the format of a KFP Artifact. - explanation_parameters: The explanation parameters used by Vertex online and batch explanations. - """ - # fmt: on - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', - '", "args": ["forecasting_mp_ensemble', - '", "--transform_output_path=', - transform_output.uri, - '", "--error_file_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', - '", "--metadata_path=', - metadata.uri, - '", "--tuning_result_input_path=', - tuning_result_input.uri, - '", "--instance_baseline_path=', - instance_baseline.uri, - '", "--instance_schema_path=', - instance_schema_path.uri, - '", "--prediction_docker_uri=', - prediction_image_uri, - '", "--model_relative_output_path=', - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model', - '", "--explanation_metadata_path=', - explanation_metadata, - ',', - explanation_metadata_artifact.uri, - '", "--explanation_parameters_path=', - explanation_parameters, - '", "--model_architecture_path=', - model_architecture.uri, - '", "--use_json=true', - '", "--executor_input={{$.json_escape[1]}}"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py deleted file mode 100644 index e82e55708b..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Forecasting Stage 1 Tuner component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument -@dsl.container_component -def automl_forecasting_stage_1_tuner( - project: str, - location: str, - root_dir: str, - num_selected_trials: int, - deadline_hours: float, - num_parallel_trials: int, - single_run_max_secs: int, - metadata: Input[Artifact], - transform_output: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - tuning_result_output: Output[Artifact], - study_spec_parameters_override: Optional[list] = [], - worker_pool_specs_override_json: Optional[list] = [], - reduce_search_space_mode: Optional[str] = 'regular', - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Searches AutoML Forecasting architectures and selects the top trials. - - Args: - project: Project to run hyperparameter tuning. - location: Location for running the hyperparameter tuning. - root_dir: The Cloud Storage location to store the output. - study_spec_parameters_override: JSON study spec. E.g., - [{"parameter_id": "activation","categorical_value_spec": {"values": - ["tanh"]}}] - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - reduce_search_space_mode: The reduce search space mode. Possible - values: "regular" (default), "minimal", "full". - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - deadline_hours: Number of hours the hyperparameter tuning should - run. - num_parallel_trials: Number of parallel training trials. - single_run_max_secs: Max number of seconds each training trial runs. - metadata: The tabular example gen metadata. - transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. - materialized_eval_split: The materialized eval split. - encryption_spec_key_name: Customer-managed encryption key. - - Returns: - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - tuning_result_output: The trained model and architectures. - """ - # fmt: on - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-forecasting-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', - '", "args": ["forecasting_mp_l2l_stage_1_tuner', - '", "--region=', - location, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', - '", "--reduce_search_space_mode=', - reduce_search_space_mode, - f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', - '", "--training_base_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', - '", "--num_parallel_trial=', - num_parallel_trials, - '", "--single_run_max_secs=', - single_run_max_secs, - '", "--deadline_hours=', - deadline_hours, - '", "--num_selected_trials=', - num_selected_trials, - '", "--lro_job_info=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', - '", "--error_file_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', - '", "--metadata_path=', - metadata.uri, - '", "--materialized_train_split=', - materialized_train_split.uri, - '", "--materialized_eval_split=', - materialized_eval_split.uri, - '", "--tuning_result_output_path=', - tuning_result_output.uri, - '", "--kms_key_name=', - encryption_spec_key_name, - '", "--gcp_resources_path=', - gcp_resources, - '", "--use_json=true', - '", "--log_level=ERROR', - '", "--executor_input={{$.json_escape[1]}}"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py deleted file mode 100644 index 5375f61955..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Forecasting Stage 2 Tuner component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument -@dsl.container_component -def automl_forecasting_stage_2_tuner( - project: str, - location: str, - root_dir: str, - num_selected_trials: int, - deadline_hours: float, - num_parallel_trials: int, - single_run_max_secs: int, - metadata: Input[Artifact], - transform_output: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - tuning_result_input_path: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - tuning_result_output: Output[Artifact], - worker_pool_specs_override_json: Optional[list] = [], - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Tunes AutoML Forecasting models and selects top trials. - - Args: - project: Project to run stage 2 tuner. - location: Cloud region for running the component: us-central1). - root_dir: The Cloud Storage location to store the output. - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - num_selected_trials: Number of selected trials. The number of weak - learners in the final model. - deadline_hours: Number of hours the cross-validation trainer - should run. - num_parallel_trials: Number of parallel training trials. - single_run_max_secs: Max number of seconds each training trial runs. - metadata: The forecasting example gen - metadata. - transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. - materialized_eval_split: The materialized eval split. - encryption_spec_key_name: Customer-managed encryption key. - tuning_result_input_path: Path to the json of hyperparameter - tuning results to use when evaluating models. - - Returns: - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - tuning_result_output: The trained (private) model artifact paths and their hyperparameters. - """ - # fmt: on - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-forecasting-stage-2-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', - '", "args": ["forecasting_mp_l2l_stage_2_tuner', - '", "--region=', - location, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', - f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', - '", "--training_base_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', - '", "--num_parallel_trial=', - num_parallel_trials, - '", "--single_run_max_secs=', - single_run_max_secs, - '", "--deadline_hours=', - deadline_hours, - '", "--num_selected_trials=', - num_selected_trials, - '", "--lro_job_info=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', - '", "--error_file_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', - '", "--metadata_path=', - metadata.uri, - '", "--materialized_train_split=', - materialized_train_split.uri, - '", "--materialized_eval_split=', - materialized_eval_split.uri, - '", "--tuning_result_input_path=', - tuning_result_input_path.uri, - '", "--kms_key_name=', - encryption_spec_key_name, - '", "--gcp_resources_path=', - gcp_resources, - '", "--tuning_result_output_path=', - tuning_result_output.uri, - ( - '", "--use_json=true", "--log_level=ERROR",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml deleted file mode 100644 index 3d28c0a17f..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ /dev/null @@ -1,7790 +0,0 @@ -# PIPELINE DEFINITION -# Name: learn-to-learn-forecasting -# Description: The AutoML Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# predefined_split_key: str [Default: ''] -# project: str -# quantiles: list -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the hyperparameter tuning should - - run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "activation","categorical_value_spec": {"values": - - ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The forecasting example gen - - metadata.' - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Path to the json of hyperparameter - - tuning results to use when evaluating models.' - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model.' - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: l2l - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: l2l - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: l2l - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: l2l - forecasting_transformations: - componentInputParameter: pipelinechannel--transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - componentInputParameter: pipelinechannel--quantiles - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The AutoML Forecasting pipeline. - name: learn-to-learn-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different - - time series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml deleted file mode 100644 index 4f656e1b99..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ /dev/null @@ -1,7749 +0,0 @@ -# PIPELINE DEFINITION -# Name: sequence-to-sequence-forecasting -# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the hyperparameter tuning should - - run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "activation","categorical_value_spec": {"values": - - ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The forecasting example gen - - metadata.' - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Path to the json of hyperparameter - - tuning results to use when evaluating models.' - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model.' - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: seq2seq - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: seq2seq - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: seq2seq - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - runtimeValue: - constant: 0.0 - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: seq2seq - forecasting_transformations: - componentInputParameter: pipelinechannel--transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - runtimeValue: - constant: [] - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. - name: sequence-to-sequence-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different - - time series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml deleted file mode 100644 index 6bad578312..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ /dev/null @@ -1,7735 +0,0 @@ -# PIPELINE DEFINITION -# Name: temporal-fusion-transformer-forecasting -# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# optimization_objective: str -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the hyperparameter tuning should - - run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "activation","categorical_value_spec": {"values": - - ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The forecasting example gen - - metadata.' - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Path to the json of hyperparameter - - tuning results to use when evaluating models.' - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model.' - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - selected_trials: - runtimeValue: - constant: 1.0 - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: tft - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - selected_trials: - runtimeValue: - constant: 1.0 - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: tft - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - runtimeValue: - constant: [] - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: tft - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - runtimeValue: - constant: 0.0 - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: tft - forecasting_transformations: - componentInputParameter: pipelinechannel--transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - runtimeValue: - constant: [] - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. - name: temporal-fusion-transformer-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different - - time series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml deleted file mode 100644 index afbf67ec9e..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ /dev/null @@ -1,7790 +0,0 @@ -# PIPELINE DEFINITION -# Name: time-series-dense-encoder-forecasting -# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. -# Inputs: -# available_at_forecast_columns: list -# context_window: int [Default: 0.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluated_examples_bigquery_path: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 22.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# evaluation_batch_predict_max_replica_count: int [Default: 25.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] -# evaluation_dataflow_max_num_workers: int [Default: 25.0] -# evaluation_dataflow_starting_num_workers: int [Default: 22.0] -# fast_testing: bool [Default: False] -# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int [Default: 0.0] -# group_columns: list -# group_temporal_total_weight: float [Default: 0.0] -# group_total_weight: float [Default: 0.0] -# holiday_regions: list -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] -# num_selected_trials: int [Default: 10.0] -# optimization_objective: str -# predefined_split_key: str [Default: ''] -# project: str -# quantiles: list -# root_dir: str -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_trainer_worker_pool_specs_override: list -# study_spec_parameters_override: list -# target_column: str -# temporal_total_weight: float [Default: 0.0] -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_attribute_columns: list -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transformations: dict -# unavailable_at_forecast_columns: list -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# window_max_count: int [Default: 0.0] -# window_predefined_column: str [Default: ''] -# window_stride_length: int [Default: 0.0] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -components: - comp-automl-forecasting-ensemble: - executorLabel: exec-automl-forecasting-ensemble - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-ensemble-2: - executorLabel: exec-automl-forecasting-ensemble-2 - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - instance_schema_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The path to the instance schema, - - describing the input data for the tf_model at serving time.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Region to run the job in. - parameterType: STRING - prediction_image_uri: - description: 'URI of the Docker image to be used as the - - container for serving predictions. This URI must identify an image in - - Artifact Registry or Container Registry.' - parameterType: STRING - project: - description: Project to run the job in. - parameterType: STRING - root_dir: - description: The Cloud Storage path to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The explanation metadata used by Vertex online and batch explanations - in the format of a KFP Artifact. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: Model information needed to perform batch prediction. - parameters: - explanation_metadata: - description: The explanation metadata used by Vertex online and batch explanations. - parameterType: STRUCT - explanation_parameters: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-1-tuner: - executorLabel: exec-automl-forecasting-stage-1-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the hyperparameter tuning should - - run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the hyperparameter tuning. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run hyperparameter tuning. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "activation","categorical_value_spec": {"values": - - ["tanh"]}}]' - isOptional: true - parameterType: LIST - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-forecasting-stage-2-tuner: - executorLabel: exec-automl-forecasting-stage-2-tuner - inputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The forecasting example gen - - metadata.' - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input_path: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Path to the json of hyperparameter - - tuning results to use when evaluating models.' - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Cloud region for running the component: us-central1).' - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model.' - parameterType: NUMBER_INTEGER - project: - description: Project to run stage 2 tuner. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained (private) model artifact paths and their hyperparameters. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - selected_trials: - description: Number of trials that should be selected. - parameterType: NUMBER_INTEGER - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - tasks: - automl-forecasting-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble - dependentTasks: - - automl-forecasting-stage-2-tuner - - get-prediction-image-uri - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-2-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble - automl-forecasting-stage-2-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-2-tuner - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input_path: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-2-tuner - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-forecasting-ensemble - - model-upload - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description - get-prediction-image-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri - inputs: - parameters: - model_type: - runtimeValue: - constant: tide - taskInfo: - name: get-prediction-image-uri - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: get-hyperparameter-tuning-results - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-forecasting-ensemble - - get-or-create-model-description - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - finalize-eval-quantile-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters - get-predictions-column: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column - dependentTasks: - - finalize-eval-quantile-parameters - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation-forecasting: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting - dependentTasks: - - finalize-eval-quantile-parameters - - get-predictions-column - - model-batch-predict - - table-to-uri - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation-forecasting - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - tasks: - automl-forecasting-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-ensemble-2 - dependentTasks: - - automl-forecasting-stage-1-tuner - - get-prediction-image-uri-2 - inputs: - artifacts: - instance_baseline: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline - instance_schema_path: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-forecasting-stage-1-tuner - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - prediction_image_uri: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-image-uri-2 - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-forecasting-ensemble-2 - automl-forecasting-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-forecasting-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-forecasting-stage-1-tuner - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-forecasting-ensemble-2 - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: should_run_model_evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - get-or-create-model-description-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-or-create-model-description-2 - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - original_description: - componentInputParameter: pipelinechannel--model_description - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-or-create-model-description-2 - get-prediction-image-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-image-uri-2 - inputs: - parameters: - model_type: - runtimeValue: - constant: tide - taskInfo: - name: get-prediction-image-uri-2 - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - dependentTasks: - - automl-forecasting-ensemble-2 - - get-or-create-model-description-2 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-forecasting-ensemble-2 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-forecasting-ensemble-2 - parameters: - description: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-or-create-model-description-2 - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-forecasting-ensemble-2 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--training-configurator-and-validator-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - finalize-eval-quantile-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-finalize-eval-quantile-parameters-2 - inputs: - parameters: - quantiles: - componentInputParameter: pipelinechannel--quantiles - taskInfo: - name: finalize-eval-quantile-parameters-2 - get-predictions-column-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-predictions-column-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - inputs: - parameters: - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: get-predictions-column-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model - parameters: - bigquery_destination_output_uri: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - generate_explanation: - runtimeValue: - constant: 0.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-forecasting-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-forecasting-2 - dependentTasks: - - finalize-eval-quantile-parameters-2 - - get-predictions-column-2 - - model-batch-predict-2 - - table-to-uri-2 - inputs: - artifacts: - predictions_bigquery_source: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_quantiles: - taskOutputParameter: - outputParameterKey: quantiles - producerTask: finalize-eval-quantile-parameters-2 - forecasting_type: - taskOutputParameter: - outputParameterKey: forecasting_type - producerTask: finalize-eval-quantile-parameters-2 - ground_truth_bigquery_source: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - ground_truth_format: - runtimeValue: - constant: bigquery - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-predictions-column-2 - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_field_name: - runtimeValue: - constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} - taskInfo: - name: model-evaluation-forecasting-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-forecasting-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - forecasting_metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-forecasting-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: Vertex Forecasting pipeline - problem_type: - runtimeValue: - constant: forecasting - taskInfo: - name: model-evaluation-import-2 - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict-2 - parameters: - use_bq_prefix: - runtimeValue: - constant: 1.0 - taskInfo: - name: table-to-uri-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - split-materialized-data - - string-not-empty - - training-configurator-and-validator - inputs: - artifacts: - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - pipelinechannel--training-configurator-and-validator-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - pipelinechannel--training-configurator-and-validator-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: pipelinechannel--num_selected_trials - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecasting_available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - forecasting_context_window: - componentInputParameter: pipelinechannel--context_window - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_holiday_regions: - componentInputParameter: pipelinechannel--holiday_regions - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_predefined_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - location: - componentInputParameter: pipelinechannel--location - model_type: - runtimeValue: - constant: tide - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--transformations - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-hyperparameter-tuning-results-are-supplied-by-user - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - available_at_forecast_columns: - componentInputParameter: pipelinechannel--available_at_forecast_columns - context_window: - componentInputParameter: pipelinechannel--context_window - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_model_type: - runtimeValue: - constant: tide - forecasting_transformations: - componentInputParameter: pipelinechannel--transformations - group_columns: - componentInputParameter: pipelinechannel--group_columns - group_temporal_total_weight: - componentInputParameter: pipelinechannel--group_temporal_total_weight - group_total_weight: - componentInputParameter: pipelinechannel--group_total_weight - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - prediction_type: - runtimeValue: - constant: time_series - quantiles: - componentInputParameter: pipelinechannel--quantiles - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - temporal_total_weight: - componentInputParameter: pipelinechannel--temporal_total_weight - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_attribute_columns: - componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - unavailable_at_forecast_columns: - componentInputParameter: pipelinechannel--unavailable_at_forecast_columns - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--available_at_forecast_columns: - parameterType: LIST - pipelinechannel--context_window: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluated_examples_bigquery_path: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--group_columns: - parameterType: LIST - pipelinechannel--group_temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--group_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--holiday_regions: - parameterType: LIST - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--temporal_total_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_attribute_columns: - parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transformations: - parameterType: STRUCT - pipelinechannel--unavailable_at_forecast_columns: - parameterType: LIST - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_predefined_column: - parameterType: STRING - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-finalize-eval-quantile-parameters: - executorLabel: exec-finalize-eval-quantile-parameters - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-finalize-eval-quantile-parameters-2: - executorLabel: exec-finalize-eval-quantile-parameters-2 - inputDefinitions: - parameters: - quantiles: - isOptional: true - parameterType: LIST - outputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - quantiles: - parameterType: LIST - comp-get-or-create-model-description: - executorLabel: exec-get-or-create-model-description - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-or-create-model-description-2: - executorLabel: exec-get-or-create-model-description-2 - inputDefinitions: - parameters: - location: - parameterType: STRING - original_description: - defaultValue: '' - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri: - executorLabel: exec-get-prediction-image-uri - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-prediction-image-uri-2: - executorLabel: exec-get-prediction-image-uri-2 - inputDefinitions: - parameters: - model_type: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column: - executorLabel: exec-get-predictions-column - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-predictions-column-2: - executorLabel: exec-get-predictions-column-2 - inputDefinitions: - parameters: - forecasting_type: - parameterType: STRING - target_column: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation-forecasting: - executorLabel: exec-model-evaluation-forecasting - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-forecasting-2: - executorLabel: exec-model-evaluation-forecasting-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - forecasting_quantiles: - defaultValue: - - 0.5 - isOptional: true - parameterType: LIST - forecasting_type: - defaultValue: point - isOptional: true - parameterType: STRING - ground_truth_bigquery_source: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - point_evaluation_quantile: - defaultValue: 0.5 - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - target_field_name: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-forecasting-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-forecasting-stage-2-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", - "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", - "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", - "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", - "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", - "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ - \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ - \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ - \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ - \ train_budget_milli_node_hours: The train budget of creating this model,\n\ - \ expressed in milli node hours i.e. 1,000 value in this field means\ - \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ - \ trails for stage 2.\n selected_trials: Number of trials that should\ - \ be selected.\n is_skip_architecture_search: If component is being called\ - \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ - \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ - \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 1\n training\ - \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ - \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ - \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ - \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ - \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ - \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ - \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ - \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ - \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ - \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ - \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ - \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-finalize-eval-quantile-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-finalize-eval-quantile-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - finalize_eval_quantile_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ - \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ - \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ - \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ - \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ - \ ),\n )(forecasting_type, quantiles)\n\n" - image: python:3.7 - exec-get-or-create-model-description: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-or-create-model-description-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_or_create_model_description - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ - \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ - \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ - \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ - \ actual template format doesn't get injected since\n # the Python isn't\ - \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" - image: python:3.7 - exec-get-prediction-image-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-prediction-image-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_image_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ - Returns the prediction image corresponding to the given model type.\"\"\"\ - \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ - \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ - \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ - \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ - \ )\n return images[model_type]\n\n" - image: python:3.7 - exec-get-predictions-column: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-get-predictions-column-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_predictions_column - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ - \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ - \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ - \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation-forecasting: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-forecasting-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - forecasting - - --forecasting_type - - '{{$.inputs.parameters[''forecasting_type'']}}' - - --forecasting_quantiles - - '{{$.inputs.parameters[''forecasting_quantiles'']}}' - - --point_evaluation_quantile - - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --target_field_name - - instance.{{$.inputs.parameters['target_field_name']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ - \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ - \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ - \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ - \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ - \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ - \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ - \ return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. - name: time-series-dense-encoder-forecasting -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--available_at_forecast_columns: - componentInputParameter: available_at_forecast_columns - pipelinechannel--context_window: - componentInputParameter: context_window - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluated_examples_bigquery_path: - componentInputParameter: evaluated_examples_bigquery_path - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: - componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--group_columns: - componentInputParameter: group_columns - pipelinechannel--group_temporal_total_weight: - componentInputParameter: group_temporal_total_weight - pipelinechannel--group_total_weight: - componentInputParameter: group_total_weight - pipelinechannel--holiday_regions: - componentInputParameter: holiday_regions - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--num_selected_trials: - componentInputParameter: num_selected_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_trainer_worker_pool_specs_override: - componentInputParameter: stage_2_trainer_worker_pool_specs_override - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--temporal_total_weight: - componentInputParameter: temporal_total_weight - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_attribute_columns: - componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--unavailable_at_forecast_columns: - componentInputParameter: unavailable_at_forecast_columns - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_predefined_column: - componentInputParameter: window_predefined_column - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - available_at_forecast_columns: - description: 'The columns that are available at the - - forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: 0.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: The full service account name. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: The dataflow subnetwork. - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: '`True` to enable dataflow public IPs.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluated_examples_bigquery_path: - defaultValue: '' - description: 'The bigquery dataset to write the - - predicted examples into for evaluation, in the format - - `bq://project.dataset`. Only necessary if evaluation is enabled.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 22.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 22.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the batch prediction - - job in evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The maximum count of replicas - - the batch prediction job can scale to.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'Number of replicas to use - - in the batch prediction cluster at startup time.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: The disk space in GB for dataflow. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'Machine type for the dataflow job in - - evaluation, such as ''n1-standard-16''.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 25.0 - description: Maximum number of dataflow workers. - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 22.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_transform_engine_bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The full id of - - the feature transform engine staging dataset.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size of the - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type of - - the feature transform engine.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of - - dataflow workers of the feature transform engine.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - defaultValue: 0.0 - description: The length of the horizon. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - description: 'A list of time series attribute column names that define the - - time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions - - aggregated over both the horizon and time series in the same hierarchy - - group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated over - - time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - holiday_regions: - description: 'The geographical regions where the holiday effect is - - applied in modeling.' - isOptional: true - parameterType: LIST - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: Optional description. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - description: Optional display name for model. - isOptional: true - parameterType: STRING - num_selected_trials: - defaultValue: 10.0 - description: Number of selected trails. - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", - - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - - "minimize-quantile-loss".' - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: '`True` to evaluate the ensembled model on the test split.' - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 2 trainer worker pool spec.' - isOptional: true - parameterType: LIST - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for predictions aggregated - - over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: The column that indicates the time. - parameterType: STRING - time_series_attribute_columns: - description: 'The columns that are invariant across the - - same time series.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different - - time series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Dict mapping auto and/or type-resolutions to feature - - columns. The supported types are: auto, categorical, numeric, text, and - - timestamp.' - parameterType: STRUCT - unavailable_at_forecast_columns: - description: 'The columns that are unavailable at the - - forecast time.' - isOptional: true - parameterType: LIST - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: 0.0 - description: The maximum number of windows that will be generated. - isOptional: true - parameterType: NUMBER_INTEGER - window_predefined_column: - defaultValue: '' - description: The column that indicate the start of each window. - isOptional: true - parameterType: STRING - window_stride_length: - defaultValue: 0.0 - description: The stride length to generate the window. - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py deleted file mode 100644 index 2cf4444e5a..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py +++ /dev/null @@ -1,1023 +0,0 @@ -"""Util functions for Vertex Forecasting pipelines.""" - -import os -import pathlib -from typing import Any, Dict, FrozenSet, List, Optional, Tuple - -_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() - -_RETAIL_MODEL_DISABLED_OPTIONS = frozenset([ - 'quantiles', - 'enable_probabilistic_inference', -]) - - -def _get_base_forecasting_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_column: str, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, - fields_to_exclude: FrozenSet[str] = frozenset(), -) -> Dict[str, Any]: - """Formats a set of parameters common across Vertex forecasting pipelines.""" - if not study_spec_parameters_override: - study_spec_parameters_override = [] - if not stage_1_tuner_worker_pool_specs_override: - stage_1_tuner_worker_pool_specs_override = [] - if not stage_2_trainer_worker_pool_specs_override: - stage_2_trainer_worker_pool_specs_override = [] - - parameter_values = {} - parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'dataflow_service_account': dataflow_service_account, - 'evaluated_examples_bigquery_path': evaluated_examples_bigquery_path, - 'target_column': target_column, - 'optimization_objective': optimization_objective, - 'transformations': transformations, - 'train_budget_milli_node_hours': train_budget_milli_node_hours, - 'time_column': time_column, - 'time_series_identifier_column': time_series_identifier_column, - 'time_series_attribute_columns': time_series_attribute_columns, - 'available_at_forecast_columns': available_at_forecast_columns, - 'unavailable_at_forecast_columns': unavailable_at_forecast_columns, - 'forecast_horizon': forecast_horizon, - 'context_window': context_window, - 'window_predefined_column': window_predefined_column, - 'window_stride_length': window_stride_length, - 'window_max_count': window_max_count, - 'holiday_regions': holiday_regions, - 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, - 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, - 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, - 'num_selected_trials': num_selected_trials, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'weight_column': weight_column, - 'dataflow_subnetwork': dataflow_subnetwork, - 'feature_transform_engine_dataflow_machine_type': ( - feature_transform_engine_dataflow_machine_type - ), - 'feature_transform_engine_dataflow_max_num_workers': ( - feature_transform_engine_dataflow_max_num_workers - ), - 'feature_transform_engine_dataflow_disk_size_gb': ( - feature_transform_engine_dataflow_disk_size_gb - ), - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'feature_transform_engine_bigquery_staging_full_dataset_id': ( - feature_transform_engine_bigquery_staging_full_dataset_id - ), - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'study_spec_parameters_override': study_spec_parameters_override, - 'stage_1_tuner_worker_pool_specs_override': ( - stage_1_tuner_worker_pool_specs_override - ), - 'stage_2_trainer_worker_pool_specs_override': ( - stage_2_trainer_worker_pool_specs_override - ), - 'quantiles': quantiles, - 'encryption_spec_key_name': encryption_spec_key_name, - 'enable_probabilistic_inference': enable_probabilistic_inference, - 'model_display_name': model_display_name, - 'model_description': model_description, - 'run_evaluation': run_evaluation, - 'group_columns': group_columns, - 'group_total_weight': group_total_weight, - 'temporal_total_weight': temporal_total_weight, - 'group_temporal_total_weight': group_temporal_total_weight, - } - - # Filter out empty values and those excluded from the particular pipeline. - # (example: TFT and Seq2Seq don't support `quantiles`.) - parameter_values.update( - { - param: value - for param, value in parameters.items() - if value is not None and param not in fields_to_exclude - } - ) - return parameter_values - - -def get_learn_to_learn_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_column: str, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -) -> Tuple[str, Dict[str, Any]]: - """Returns l2l_forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature - columns. The supported types are: auto, categorical, numeric, text, and - timestamp. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time - series. - time_series_attribute_columns: The columns that are invariant across the - same time series. - available_at_forecast_columns: The columns that are available at the - forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the - forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the - predicted examples into for evaluation, in the format - `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is - applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of - the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of - the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of - dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the - dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction - job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use - in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas - the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in - evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding - stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding - stage 2 trainer worker pool spec. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. If quantiles are specified, then the quantiles of the - distribution are also returned. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - group_columns: A list of time series attribute column names that define the - time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over - time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated - over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions - aggregated over both the horizon and time series in the same hierarchy - group. - """ - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - quantiles=quantiles, - encryption_spec_key_name=encryption_spec_key_name, - enable_probabilistic_inference=enable_probabilistic_inference, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - group_columns=group_columns, - group_total_weight=group_total_weight, - temporal_total_weight=temporal_total_weight, - group_temporal_total_weight=group_temporal_total_weight, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'learn_to_learn_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_column: str, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - enable_probabilistic_inference: bool = False, - quantiles: Optional[List[float]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, - group_columns: Optional[List[str]] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -) -> Tuple[str, Dict[str, Any]]: - """Returns timeseries_dense_encoder_forecasting pipeline and parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature - columns. The supported types are: auto, categorical, numeric, text, and - timestamp. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time - series. - time_series_attribute_columns: The columns that are invariant across the - same time series. - available_at_forecast_columns: The columns that are available at the - forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the - forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the - predicted examples into for evaluation, in the format - `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is - applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of - the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of - the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of - dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the - dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction - job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use - in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas - the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in - evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding - stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding - stage 2 trainer worker pool spec. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. If quantiles are specified, then the quantiles of the - distribution are also returned. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - group_columns: A list of time series attribute column names that define the - time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over - time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated - over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions - aggregated over both the horizon and time series in the same hierarchy - group. - """ - - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - quantiles=quantiles, - encryption_spec_key_name=encryption_spec_key_name, - enable_probabilistic_inference=enable_probabilistic_inference, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - group_columns=group_columns, - group_total_weight=group_total_weight, - temporal_total_weight=temporal_total_weight, - group_temporal_total_weight=group_temporal_total_weight, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'time_series_dense_encoder_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_column: str, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, -): - """Returns tft_forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature - columns. The supported types are: auto, categorical, numeric, text, and - timestamp. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time - series. - time_series_attribute_columns: The columns that are invariant across the - same time series. - available_at_forecast_columns: The columns that are available at the - forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the - forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the - predicted examples into for evaluation, in the format - `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is - applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of - the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of - the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of - dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the - dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction - job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use - in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas - the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in - evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding - stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding - stage 2 trainer worker pool spec. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - """ - # TFT should only have 1 selected trial to freeze the ensemble size at 1. - excluded_parameters = _RETAIL_MODEL_DISABLED_OPTIONS.union({ - 'num_selected_trials', - }) - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - encryption_spec_key_name=encryption_spec_key_name, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - fields_to_exclude=excluded_parameters, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'temporal_fusion_transformer_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_sequence_to_sequence_forecasting_pipeline_and_parameters( - *, - project: str, - location: str, - root_dir: str, - target_column: str, - optimization_objective: str, - transformations: Dict[str, List[str]], - train_budget_milli_node_hours: float, - time_column: str, - time_series_identifier_column: str, - time_series_attribute_columns: Optional[List[str]] = None, - available_at_forecast_columns: Optional[List[str]] = None, - unavailable_at_forecast_columns: Optional[List[str]] = None, - forecast_horizon: Optional[int] = None, - context_window: Optional[int] = None, - evaluated_examples_bigquery_path: Optional[str] = None, - window_predefined_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - stage_1_num_parallel_trials: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - stage_2_num_parallel_trials: Optional[int] = None, - num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - feature_transform_engine_bigquery_staging_full_dataset_id: str = '', - feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', - feature_transform_engine_dataflow_max_num_workers: int = 10, - feature_transform_engine_dataflow_disk_size_gb: int = 40, - evaluation_batch_predict_machine_type: str = 'n1-standard-16', - evaluation_batch_predict_starting_replica_count: int = 25, - evaluation_batch_predict_max_replica_count: int = 25, - evaluation_dataflow_machine_type: str = 'n1-standard-16', - evaluation_dataflow_max_num_workers: int = 25, - evaluation_dataflow_disk_size_gb: int = 50, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - encryption_spec_key_name: Optional[str] = None, - model_display_name: Optional[str] = None, - model_description: Optional[str] = None, - run_evaluation: bool = True, -): - """Returns seq2seq forecasting pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", - "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or - "minimize-quantile-loss". - transformations: Dict mapping auto and/or type-resolutions to feature - columns. The supported types are: auto, categorical, numeric, text, and - timestamp. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time - series. - time_series_attribute_columns: The columns that are invariant across the - same time series. - available_at_forecast_columns: The columns that are available at the - forecast time. - unavailable_at_forecast_columns: The columns that are unavailable at the - forecast time. - forecast_horizon: The length of the horizon. - context_window: The length of the context window. - evaluated_examples_bigquery_path: The bigquery dataset to write the - predicted examples into for evaluation, in the format - `bq://project.dataset`. - window_predefined_column: The column that indicate the start of each window. - window_stride_length: The stride length to generate the window. - window_max_count: The maximum number of windows that will be generated. - holiday_regions: The geographical regions where the holiday effect is - applied in modeling. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - num_selected_trials: Number of selected trails. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: The test fraction. - weight_column: The weight column name. - dataflow_service_account: The full service account name. - dataflow_subnetwork: The dataflow subnetwork. - dataflow_use_public_ips: `True` to enable dataflow public IPs. - feature_transform_engine_bigquery_staging_full_dataset_id: The full id of - the feature transform engine staging dataset. - feature_transform_engine_dataflow_machine_type: The dataflow machine type of - the feature transform engine. - feature_transform_engine_dataflow_max_num_workers: The max number of - dataflow workers of the feature transform engine. - feature_transform_engine_dataflow_disk_size_gb: The disk size of the - dataflow workers of the feature transform engine. - evaluation_batch_predict_machine_type: Machine type for the batch prediction - job in evaluation, such as 'n1-standard-16'. - evaluation_batch_predict_starting_replica_count: Number of replicas to use - in the batch prediction cluster at startup time. - evaluation_batch_predict_max_replica_count: The maximum count of replicas - the batch prediction job can scale to. - evaluation_dataflow_machine_type: Machine type for the dataflow job in - evaluation, such as 'n1-standard-16'. - evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. - evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. - study_spec_parameters_override: The list for overriding study spec. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding - stage 1 tuner worker pool spec. - stage_2_trainer_worker_pool_specs_override: The dictionary for overriding - stage 2 trainer worker pool spec. - encryption_spec_key_name: The KMS key name. - model_display_name: Optional display name for model. - model_description: Optional description. - run_evaluation: `True` to evaluate the ensembled model on the test split. - """ - - parameter_values = _get_base_forecasting_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - time_column=time_column, - dataflow_service_account=dataflow_service_account, - time_series_identifier_column=time_series_identifier_column, - time_series_attribute_columns=time_series_attribute_columns, - available_at_forecast_columns=available_at_forecast_columns, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - forecast_horizon=forecast_horizon, - context_window=context_window, - window_predefined_column=window_predefined_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - num_selected_trials=num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - dataflow_use_public_ips=dataflow_use_public_ips, - dataflow_subnetwork=dataflow_subnetwork, - feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, - feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, - feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, - feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - study_spec_parameters_override=study_spec_parameters_override, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, - encryption_spec_key_name=encryption_spec_key_name, - model_display_name=model_display_name, - model_description=model_description, - run_evaluation=run_evaluation, - fields_to_exclude=_RETAIL_MODEL_DISABLED_OPTIONS, - ) - - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, - 'sequence_to_sequence_forecasting_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py deleted file mode 100644 index 764539056a..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Preview AutoML tabular components.""" - -from google_cloud_pipeline_components.preview.automl.tabular.feature_selection import tabular_feature_ranking_and_selection as FeatureSelectionOp -from google_cloud_pipeline_components.preview.automl.tabular.feature_transform_engine import feature_transform_engine as FeatureTransformEngineOp -from google_cloud_pipeline_components.preview.automl.tabular.tabnet_hyperparameter_tuning_job import tabnet_hyperparameter_tuning_job as TabNetHyperparameterTuningJobOp -from google_cloud_pipeline_components.preview.automl.tabular.tabnet_trainer import tabnet_trainer as TabNetTrainerOp -from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_hyperparameter_tuning_job import wide_and_deep_hyperparameter_tuning_job as WideAndDeepHyperparameterTuningJobOp -from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_trainer import wide_and_deep_trainer as WideAndDeepTrainerOp -from google_cloud_pipeline_components.preview.automl.tabular.xgboost_hyperparameter_tuning_job import xgboost_hyperparameter_tuning_job as XGBoostHyperparameterTuningJobOp -from google_cloud_pipeline_components.preview.automl.tabular.xgboost_trainer import xgboost_trainer as XGBoostTrainerOp - -__all__ = [ - 'FeatureSelectionOp', - 'WideAndDeepHyperparameterTuningJobOp', - 'WideAndDeepTrainerOp', - 'TabNetHyperparameterTuningJobOp', - 'TabNetTrainerOp', - 'FeatureTransformEngineOp', - 'XGBoostHyperparameterTuningJobOp', - 'XGBoostTrainerOp', -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml deleted file mode 100644 index b10b4b421a..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +++ /dev/null @@ -1,11427 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-feature-selection-pipeline -# Description: The AutoML Tabular pipeline. -# Inputs: -# additional_experiments: dict -# apply_feature_selection_tuning: bool [Default: False] -# cv_trainer_worker_pool_specs_override: list -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# disable_early_stopping: bool [Default: False] -# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# distill_batch_predict_max_replica_count: int [Default: 25.0] -# distill_batch_predict_starting_replica_count: int [Default: 25.0] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 10.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# export_additional_model_without_custom_ops: bool [Default: False] -# fast_testing: bool [Default: False] -# location: str -# max_selected_features: int [Default: 1000.0] -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# optimization_objective: str -# optimization_objective_precision_value: float [Default: -1.0] -# optimization_objective_recall_value: float [Default: -1.0] -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# quantiles: list -# root_dir: str -# run_distillation: bool [Default: False] -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_num_selected_trials: int [Default: 5.0] -# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] -# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] -# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] -# stratified_split_key: str [Default: ''] -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# transformations: str -# validation_fraction: float [Default: -1.0] -# weight_column: str [Default: ''] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-3-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -# model-evaluation-2-evaluation_metrics: system.Metrics -# model-evaluation-3-evaluation_metrics: system.Metrics -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-cv-trainer: - executorLabel: exec-automl-tabular-cv-trainer - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-cv-trainer-2: - executorLabel: exec-automl-tabular-cv-trainer-2 - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble: - executorLabel: exec-automl-tabular-ensemble - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble-2: - executorLabel: exec-automl-tabular-ensemble-2 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble-3: - executorLabel: exec-automl-tabular-ensemble-3 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-infra-validator-2: - executorLabel: exec-automl-tabular-infra-validator-2 - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-infra-validator-3: - executorLabel: exec-automl-tabular-infra-validator-3 - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-stage-1-tuner: - executorLabel: exec-automl-tabular-stage-1-tuner - inputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - disable_early_stopping: - defaultValue: false - description: 'True if disable early stopping. Default - - value is false.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "model_type","categorical_value_spec": {"values": - - ["nn"]}}]' - isOptional: true - parameterType: LIST - tune_feature_selection_rate: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-stage-1-tuner-2: - executorLabel: exec-automl-tabular-stage-1-tuner-2 - inputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - disable_early_stopping: - defaultValue: false - description: 'True if disable early stopping. Default - - value is false.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "model_type","categorical_value_spec": {"values": - - ["nn"]}}]' - isOptional: true - parameterType: LIST - tune_feature_selection_rate: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-transform: - executorLabel: exec-automl-tabular-transform - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized test split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-transform-2: - executorLabel: exec-automl-tabular-transform-2 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized test split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-2: - executorLabel: exec-bool-identity-2 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-3: - executorLabel: exec-bool-identity-3 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-check-if-binary-classification: - executorLabel: exec-check-if-binary-classification - inputDefinitions: - artifacts: - example_gen_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: metadata generated by example gen. - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-3 - tasks: - automl-tabular-cv-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer - automl-tabular-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble - dependentTasks: - - automl-tabular-cv-trainer - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-tabular-ensemble - - bool-identity - - model-upload - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - runtimeValue: - constant: '' - taskInfo: - name: importer - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-transform-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--purge-unused-features-output_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-7 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-5 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-7 - tasks: - automl-tabular-cv-trainer-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer-2 - dependentTasks: - - automl-tabular-stage-1-tuner - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-stage-1-tuner - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters-2 - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer-2 - automl-tabular-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble-2 - dependentTasks: - - automl-tabular-cv-trainer-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer-2 - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble-2 - automl-tabular-infra-validator-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator-2 - dependentTasks: - - automl-tabular-ensemble-2 - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - taskInfo: - name: automl-tabular-infra-validator-2 - automl-tabular-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - feature_ranking: - componentInputArtifact: pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking - materialized_eval_split: - componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - taskOutputParameter: - outputParameterKey: stage_1_num_selected_trials - producerTask: calculate-training-parameters-2 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - taskOutputParameter: - outputParameterKey: reduce_search_space_mode - producerTask: calculate-training-parameters-2 - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - tune_feature_selection_rate: - componentInputParameter: pipelinechannel--apply_feature_selection_tuning - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-tabular-stage-1-tuner - bool-identity-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-2 - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity-2 - bool-identity-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-3 - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_distillation - taskInfo: - name: bool-identity-3 - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-tabular-ensemble-2 - - bool-identity-2 - - bool-identity-3 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-2 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-2 - pipelinechannel--bool-identity-2-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-2 - pipelinechannel--bool-identity-3-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-3 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: no-distill - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] - == 'false' - condition-7: - componentRef: - name: comp-condition-7 - dependentTasks: - - automl-tabular-ensemble-2 - - bool-identity-2 - - bool-identity-3 - - calculate-training-parameters-2 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - pipelinechannel--purge-unused-features-output_metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - pipelinechannel--tabular-stats-and-example-gen-eval_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - pipelinechannel--tabular-stats-and-example-gen-test_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split - pipelinechannel--tabular-stats-and-example-gen-train_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split - parameters: - pipelinechannel--bool-identity-2-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-2 - pipelinechannel--bool-identity-3-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-3 - pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: - taskOutputParameter: - outputParameterKey: distill_stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: - taskOutputParameter: - outputParameterKey: reduce_search_space_mode - producerTask: calculate-training-parameters-2 - pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - taskInfo: - name: is-distill - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] - == 'true' - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-transform-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-transform-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-transform-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--purge-unused-features-output_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - parameters: - pipelinechannel--apply_feature_selection_tuning: - parameterType: BOOLEAN - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-6 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-6 - tasks: - condition-6: - componentRef: - name: comp-condition-6 - dependentTasks: - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - pipelinechannel--bool-identity-2-Output: - componentInputParameter: pipelinechannel--bool-identity-2-Output - pipelinechannel--bool-identity-3-Output: - componentInputParameter: pipelinechannel--bool-identity-3-Output - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] - == 'true' - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-6: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-7: - dag: - outputs: - artifacts: - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-8 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-8 - tasks: - automl-tabular-ensemble-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble-3 - dependentTasks: - - automl-tabular-stage-1-tuner-2 - - automl-tabular-transform-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform-2 - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-stage-1-tuner-2 - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble-3 - automl-tabular-infra-validator-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator-3 - dependentTasks: - - automl-tabular-ensemble-3 - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - taskInfo: - name: automl-tabular-infra-validator-3 - automl-tabular-stage-1-tuner-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-stage-1-tuner-2 - dependentTasks: - - automl-tabular-transform-2 - inputs: - artifacts: - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform-2 - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform-2 - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform-2 - parameters: - deadline_hours: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours - disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_distillation: - runtimeValue: - constant: 1.0 - single_run_max_secs: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-tabular-stage-1-tuner-2 - automl-tabular-transform-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-transform-2 - dependentTasks: - - write-bp-result-path - - write-bp-result-path-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - eval_split: - taskOutputArtifact: - outputArtifactKey: result - producerTask: write-bp-result-path-2 - metadata: - componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata - test_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split - train_split: - taskOutputArtifact: - outputArtifactKey: result - producerTask: write-bp-result-path - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-transform-2 - condition-8: - componentRef: - name: comp-condition-8 - dependentTasks: - - automl-tabular-ensemble-3 - - model-upload-3 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-3 - pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - pipelinechannel--model-upload-3-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-3 - parameters: - pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-3 - pipelinechannel--bool-identity-2-Output: - componentInputParameter: pipelinechannel--bool-identity-2-Output - pipelinechannel--bool-identity-3-Output: - componentInputParameter: pipelinechannel--bool-identity-3-Output - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] - == 'true' - model-batch-predict-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-3 - dependentTasks: - - read-input-uri - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - taskOutputParameter: - outputParameterKey: Output - producerTask: read-input-uri - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-train-split - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: tf-record - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-3 - model-batch-predict-4: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-4 - dependentTasks: - - read-input-uri-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - taskOutputParameter: - outputParameterKey: Output - producerTask: read-input-uri-2 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-eval-split - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: tf-record - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-4 - model-upload-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-3 - dependentTasks: - - automl-tabular-ensemble-3 - - automl-tabular-infra-validator-3 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-3 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - parameters: - display_name: - runtimeValue: - constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-3 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-3 - read-input-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-read-input-uri - inputs: - artifacts: - split_uri: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split - taskInfo: - name: read-input-uri - read-input-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-read-input-uri-2 - inputs: - artifacts: - split_uri: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - taskInfo: - name: read-input-uri-2 - write-bp-result-path: - cachingOptions: - enableCache: true - componentRef: - name: comp-write-bp-result-path - dependentTasks: - - model-batch-predict-3 - inputs: - artifacts: - bp_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-3 - taskInfo: - name: write-bp-result-path - write-bp-result-path-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-write-bp-result-path-2 - dependentTasks: - - model-batch-predict-4 - inputs: - artifacts: - bp_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-4 - taskInfo: - name: write-bp-result-path-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--purge-unused-features-output_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: - parameterType: STRING - pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-8: - dag: - outputs: - artifacts: - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-3 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation-3 - tasks: - feature-attribution-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-3 - dependentTasks: - - model-batch-explanation-3 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-3 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-3 - model-batch-explanation-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-3 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-3 - model-batch-predict-5: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-5 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-5 - model-evaluation-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-3 - dependentTasks: - - model-batch-predict-5 - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-5 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation-3 - model-evaluation-import-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-3 - dependentTasks: - - feature-attribution-3 - - model-evaluation-3 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-3 - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-3 - model: - componentInputArtifact: pipelinechannel--model-upload-3-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import-3 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-3-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-transform: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-transform - dependentTasks: - - purge-unused-features - - tabular-stats-and-example-gen - inputs: - artifacts: - dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - metadata: - taskOutputArtifact: - outputArtifactKey: output_metadata - producerTask: purge-unused-features - test_split: - taskOutputArtifact: - outputArtifactKey: test_split - producerTask: tabular-stats-and-example-gen - train_split: - taskOutputArtifact: - outputArtifactKey: train_split - producerTask: tabular-stats-and-example-gen - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-transform - check-if-binary-classification: - cachingOptions: - enableCache: true - componentRef: - name: comp-check-if-binary-classification - dependentTasks: - - tabular-stats-and-example-gen - inputs: - artifacts: - example_gen_metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - taskInfo: - name: check-if-binary-classification - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - automl-tabular-transform - - merge-materialized-splits - - purge-unused-features - - string-not-empty - - tabular-stats-and-example-gen - inputs: - artifacts: - pipelinechannel--automl-tabular-transform-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--purge-unused-features-output_metadata: - taskOutputArtifact: - outputArtifactKey: output_metadata - producerTask: purge-unused-features - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - taskOutputParameter: - outputParameterKey: downsampled_test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - taskOutputParameter: - outputParameterKey: test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - automl-tabular-transform - - merge-materialized-splits - - purge-unused-features - - string-not-empty - - tabular-feature-ranking-and-selection - - tabular-stats-and-example-gen - inputs: - artifacts: - pipelinechannel--automl-tabular-transform-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform - pipelinechannel--automl-tabular-transform-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform - pipelinechannel--automl-tabular-transform-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--purge-unused-features-output_metadata: - taskOutputArtifact: - outputArtifactKey: output_metadata - producerTask: purge-unused-features - pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: - taskOutputArtifact: - outputArtifactKey: feature_ranking - producerTask: tabular-feature-ranking-and-selection - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split: - taskOutputArtifact: - outputArtifactKey: test_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-train_split: - taskOutputArtifact: - outputArtifactKey: train_split - producerTask: tabular-stats-and-example-gen - parameters: - pipelinechannel--apply_feature_selection_tuning: - componentInputParameter: pipelinechannel--apply_feature_selection_tuning - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--model_display_name: - componentInputParameter: pipelinechannel--model_display_name - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - taskOutputParameter: - outputParameterKey: downsampled_test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - taskOutputParameter: - outputParameterKey: test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - merge-materialized-splits: - cachingOptions: - enableCache: true - componentRef: - name: comp-merge-materialized-splits - dependentTasks: - - automl-tabular-transform - inputs: - artifacts: - split_0: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform - split_1: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform - taskInfo: - name: merge-materialized-splits - purge-unused-features: - cachingOptions: - enableCache: true - componentRef: - name: comp-purge-unused-features - dependentTasks: - - tabular-feature-ranking-and-selection - - tabular-stats-and-example-gen - inputs: - artifacts: - selected_features: - taskOutputArtifact: - outputArtifactKey: selected_features - producerTask: tabular-feature-ranking-and-selection - unpurged_metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - taskInfo: - name: purge-unused-features - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - runtimeValue: - constant: '' - taskInfo: - name: string-not-empty - tabular-feature-ranking-and-selection: - cachingOptions: - enableCache: true - componentRef: - name: comp-tabular-feature-ranking-and-selection - dependentTasks: - - check-if-binary-classification - - tabular-stats-and-example-gen - inputs: - artifacts: - data_source: - taskOutputArtifact: - outputArtifactKey: train_split - producerTask: tabular-stats-and-example-gen - parameters: - binary_classification: - taskOutputParameter: - outputParameterKey: Output - producerTask: check-if-binary-classification - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column_name: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: tabular-feature-ranking-and-selection - tabular-stats-and-example-gen: - cachingOptions: - enableCache: true - componentRef: - name: comp-tabular-stats-and-example-gen - inputs: - parameters: - additional_experiments_json: - componentInputParameter: pipelinechannel--additional_experiments - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - quantiles: - componentInputParameter: pipelinechannel--quantiles - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column_name: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - transformations: - runtimeValue: - constant: '[]' - transformations_path: - componentInputParameter: pipelinechannel--transformations - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column_name: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: tabular-stats-and-example-gen - inputDefinitions: - parameters: - pipelinechannel--additional_experiments: - parameterType: STRUCT - pipelinechannel--apply_feature_selection_tuning: - parameterType: BOOLEAN - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--data_source_csv_filenames: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--model_display_name: - parameterType: STRING - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--stats_and_example_gen_dataflow_machine_type: - parameterType: STRING - pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--transformations: - parameterType: STRING - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-3: - executorLabel: exec-feature-attribution-3 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-merge-materialized-splits: - executorLabel: exec-merge-materialized-splits - inputDefinitions: - artifacts: - split_0: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The first materialized split. - split_1: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The second materialized split. - outputDefinitions: - artifacts: - splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-3: - executorLabel: exec-model-batch-explanation-3 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-3: - executorLabel: exec-model-batch-predict-3 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-4: - executorLabel: exec-model-batch-predict-4 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-5: - executorLabel: exec-model-batch-predict-5 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-2: - executorLabel: exec-model-evaluation-2 - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-3: - executorLabel: exec-model-evaluation-3 - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-3: - executorLabel: exec-model-evaluation-import-3 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-3: - executorLabel: exec-model-upload-3 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-purge-unused-features: - executorLabel: exec-purge-unused-features - inputDefinitions: - artifacts: - selected_features: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: selected feature names separated by comma. - unpurged_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: metadata generated by example gen. - outputDefinitions: - artifacts: - output_metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-read-input-uri: - executorLabel: exec-read-input-uri - inputDefinitions: - artifacts: - split_uri: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: Tbe path to the file that contains Dataset data. - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-read-input-uri-2: - executorLabel: exec-read-input-uri-2 - inputDefinitions: - artifacts: - split_uri: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: Tbe path to the file that contains Dataset data. - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-tabular-feature-ranking-and-selection: - executorLabel: exec-tabular-feature-ranking-and-selection - inputDefinitions: - artifacts: - data_source: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - parameters: - algorithm: - defaultValue: AMI - isOptional: true - parameterType: STRING - binary_classification: - defaultValue: 'false' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key. - - If this is set, then all resources will be encrypted with the provided - - encryption key. data_source(Dataset): The input dataset artifact which - - references csv, BigQuery, or TF Records. target_column_name(str): Target - - column name of the input dataset.' - isOptional: true - parameterType: STRING - location: - description: 'Location for running the feature selection. If not set, - - default to us-central1.' - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'number of features to select by the - - algorithm. If not set, default to 1000.' - isOptional: true - parameterType: NUMBER_INTEGER - prediction_type: - defaultValue: unknown - isOptional: true - parameterType: STRING - project: - description: Project to run feature selection. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - target_column_name: - parameterType: STRING - outputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: the dictionary of feature names and feature ranking values. - selected_features: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: A json array of selected feature names. - parameters: - gcp_resources: - description: 'GCP resources created by this component. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-tabular-stats-and-example-gen: - executorLabel: exec-tabular-stats-and-example-gen - inputDefinitions: - parameters: - additional_experiments: - defaultValue: '' - isOptional: true - parameterType: STRING - additional_experiments_json: - defaultValue: {} - isOptional: true - parameterType: STRUCT - data_source_bigquery_table_path: - defaultValue: '' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Location for running dataset statistics and example - - generation.' - parameterType: STRING - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - predefined_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_type: - description: 'The prediction type. Supported values: - - "classification", "regression".' - parameterType: STRING - project: - description: 'Project to run dataset statistics and example - - generation.' - parameterType: STRING - quantiles: - defaultValue: [] - isOptional: true - parameterType: LIST - request_type: - defaultValue: COLUMN_STATS_ONLY - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - target_column_name: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - timestamp_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Quote escaped JSON string for transformations. Each - - transformation will apply transform function to given input column. And - - the result will be used for training. When creating transformation for - - BigQuery Struct column, the column should be flattened using "." as the - - delimiter.' - parameterType: STRING - transformations_path: - defaultValue: '' - description: 'Path to a GCS file containing JSON - - string for transformations.' - isOptional: true - parameterType: STRING - validation_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column_name: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - downsampled_test_split_json: - description: The downsampled test split JSON object. - parameterType: LIST - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - test_split_json: - description: The test split JSON object. - parameterType: LIST - comp-write-bp-result-path: - executorLabel: exec-write-bp-result-path - inputDefinitions: - artifacts: - bp_job: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The batch prediction job artifact. - outputDefinitions: - artifacts: - result: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - comp-write-bp-result-path-2: - executorLabel: exec-write-bp-result-path-2 - inputDefinitions: - artifacts: - bp_job: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The batch prediction job artifact. - outputDefinitions: - artifacts: - result: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 -deploymentSpec: - executors: - exec-automl-tabular-cv-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-cv-trainer-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble-3: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-infra-validator-2: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-infra-validator-3: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", - "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", - "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", - "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", - "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", - "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", - \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-stage-1-tuner-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", - "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", - "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", - "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", - "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", - "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", - \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-transform: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", - "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", - \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", - "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", - "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", - "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", - \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", - "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-transform-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", - "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", - \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", - "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", - "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", - "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", - \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", - "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-check-if-binary-classification: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _check_if_binary_classification - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _check_if_binary_classification(\n example_gen_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ - ) -> str:\n \"\"\"Construct Dataset based on the batch prediction job.\n\ - \n Args:\n example_gen_metadata: metadata generated by example gen.\n\ - \n Returns:\n \"true\" if binary classification, \"false\" otherwise.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(example_gen_metadata, 'r') as f:\n metadata_path = f.read()\n\ - \ metadata = json.loads(metadata_path)\n return str(metadata['objective']\ - \ == 'binary_classification').lower()\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-3: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-importer: - importer: - artifactUri: - constant: '' - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-merge-materialized-splits: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _merge_materialized_splits - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ - \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ - ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ - \ first materialized split.\n split_1: The second materialized split.\n\ - \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ - \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ - \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ - \ f.write(','.join([split_0_content, split_1_content]))\n\n" - image: python:3.7 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-3: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-3: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-4: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-5: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-3: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-3: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-3: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-purge-unused-features: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _purge_unused_features - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _purge_unused_features(\n unpurged_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ - \ selected_features: dsl.InputPath('SelectedFeatures'),\n output_metadata:\ - \ dsl.OutputPath('TabularExampleGenMetadata'),\n):\n \"\"\"Purge features\ - \ from metadata if not included in selected features.\n\n Args:\n unpurged_metadata:\ - \ metadata generated by example gen.\n selected_features: selected feature\ - \ names separated by comma.\n output_metadata: purged metadata.\n \"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(unpurged_metadata, 'r') as f:\n metadata_path = f.read()\n\ - \ metadata = json.loads(metadata_path)\n\n with open(selected_features,\ - \ 'r') as f:\n selected_features_path = f.read()\n features = json.loads(selected_features_path)\n\ - \n train_spec = metadata['train_spec']\n\n features_set = set(features)\n\ - \n purged_transformation_list = []\n for transformation in train_spec['transformations']:\n\ - \ if 'numeric' in transformation:\n if transformation['numeric']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'categorical' in transformation:\n if transformation['categorical']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'timestamp' in transformation:\n if transformation['timestamp']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'text' in transformation:\n if transformation['text']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'repeated_numeric' in transformation:\n if transformation['repeated_numeric']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'repeated_categorical' in transformation:\n if transformation['repeated_categorical']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ elif 'repeated_text' in transformation:\n if transformation['repeated_text']['column_name']\ - \ in features_set:\n purged_transformation_list.append(transformation)\n\ - \ else:\n raise ValueError(f'unsupported transformation: {transformation}')\n\ - \n train_spec['transformations'] = purged_transformation_list\n metadata['train_spec']\ - \ = train_spec\n\n with open(output_metadata, 'w') as f:\n f.write(json.dumps(metadata))\n\ - \n" - image: python:3.7 - exec-read-input-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _read_input_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ - ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ - \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ - \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ - \ list of string that represents the batch prediction input files.\n \"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ - \ return data_source['tf_record_data_source']['file_patterns']\n\n" - image: python:3.7 - exec-read-input-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _read_input_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ - ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ - \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ - \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ - \ list of string that represents the batch prediction input files.\n \"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ - \ return data_source['tf_record_data_source']['file_patterns']\n\n" - image: python:3.7 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-tabular-feature-ranking-and-selection: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", - "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", - "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", - "\", \"--binary_classification=", "{{$.inputs.parameters[''binary_classification'']}}", - "\", \"--algorithm=", "{{$.inputs.parameters[''algorithm'']}}", "\", \"--feature_selection_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection/\", - \"--job_name=tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}", - "\", \"--feature_selection_result_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}", - "\", \"--selected_features_path=", "{{$.outputs.artifacts[''selected_features''].uri}}", - "\", \"--parse_json=true\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-tabular-stats-and-example-gen: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": - \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": - \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": - \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": - \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": - ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": - ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": - ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", - \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", - "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", - "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", - "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", - "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", - "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", - "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", - "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", - "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", - "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", - "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", - \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", - "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", - "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", - \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", - \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", - "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", - "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", - "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", - "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", - \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", - \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", - "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", - "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", - "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", - "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", - \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-write-bp-result-path: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _write_bp_result_path - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ - \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ - \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ - \ job artifact.\n result: Tbe path to the file that contains Dataset\ - \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ - \ 'tf_record_data_source': {\n 'file_patterns': [\n \ - \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ - \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ - \n" - image: python:3.7 - exec-write-bp-result-path-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _write_bp_result_path - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ - \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ - \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ - \ job artifact.\n result: Tbe path to the file that contains Dataset\ - \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ - \ 'tf_record_data_source': {\n 'file_patterns': [\n \ - \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ - \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ - \n" - image: python:3.7 -pipelineInfo: - description: The AutoML Tabular pipeline. - name: automl-tabular-feature-selection-pipeline -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: exit-handler-1 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: exit-handler-1 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--additional_experiments: - componentInputParameter: additional_experiments - pipelinechannel--apply_feature_selection_tuning: - componentInputParameter: apply_feature_selection_tuning - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: cv_trainer_worker_pool_specs_override - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - pipelinechannel--data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: distill_batch_predict_starting_replica_count - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--model_display_name: - componentInputParameter: model_display_name - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: optimization_objective_recall_value - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_distillation: - componentInputParameter: run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: stage_2_num_selected_trials - pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: - componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb - pipelinechannel--stats_and_example_gen_dataflow_machine_type: - componentInputParameter: stats_and_example_gen_dataflow_machine_type - pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: - componentInputParameter: stats_and_example_gen_dataflow_max_num_workers - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - taskInfo: - name: exit-handler-1 - inputDefinitions: - parameters: - additional_experiments: - description: Use this field to config private preview features. - isOptional: true - parameterType: STRUCT - apply_feature_selection_tuning: - defaultValue: false - description: tuning feature selection rate if true. - isOptional: true - parameterType: BOOLEAN - cv_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding stage - - cv trainer worker pool spec.' - isOptional: true - parameterType: LIST - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - disable_early_stopping: - defaultValue: false - description: If disable easly stopping. - isOptional: true - parameterType: BOOLEAN - distill_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'The prediction server machine type for - - batch predict component in the model distillation.' - isOptional: true - parameterType: STRING - distill_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The max number of prediction server - - for batch predict component in the model distillation.' - isOptional: true - parameterType: NUMBER_INTEGER - distill_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'The initial number of - - prediction server for batch predict component in the model distillation.' - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. At inference time, the predictive distribution is used to make - - a point prediction that minimizes the optimization objective. For example, - - the mean of a predictive distribution is the point prediction that - - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 10.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 10.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - export_additional_model_without_custom_ops: - defaultValue: false - description: 'Whether to export additional - - model without custom TensorFlow operators.' - isOptional: true - parameterType: BOOLEAN - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: number of features to select for training. - isOptional: true - parameterType: NUMBER_INTEGER - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - optimization_objective: - description: 'For binary classification, "maximize-au-roc", - - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", - or - - "maximize-recall-at-precision". For multi class classification, - - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - - "minimize-rmsle".' - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when optimization_objective - - is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when optimization_objective is - - ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_num_selected_trials: - defaultValue: 5.0 - description: Number of selected trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stats_and_example_gen_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in - - GB for stats_and_example_gen component.' - isOptional: true - parameterType: NUMBER_INTEGER - stats_and_example_gen_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for - - stats_and_example_gen component.' - isOptional: true - parameterType: STRING - stats_and_example_gen_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow - - workers for stats_and_example_gen component.' - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: The stratified_split column name. - isOptional: true - parameterType: STRING - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: float = The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transformations: - description: 'The path to a GCS file containing the transformations to - - apply.' - parameterType: STRING - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml deleted file mode 100644 index c625e042bc..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +++ /dev/null @@ -1,8327 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-v2 -# Description: The AutoML Tabular pipeline v2. -# Inputs: -# apply_feature_selection_tuning: bool [Default: False] -# bigquery_staging_full_dataset_id: str [Default: ''] -# cv_trainer_worker_pool_specs_override: list -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# disable_early_stopping: bool [Default: False] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 10.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# export_additional_model_without_custom_ops: bool [Default: False] -# fast_testing: bool [Default: False] -# feature_selection_algorithm: str [Default: 'AMI'] -# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] -# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] -# feature_transform_engine_dataflow_max_num_workers: int [Default: 25.0] -# legacy_transformations_path: str [Default: ''] -# location: str -# max_selected_features: int [Default: 1000.0] -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# num_selected_features: int [Default: 0.0] -# optimization_objective: str -# optimization_objective_precision_value: float [Default: -1.0] -# optimization_objective_recall_value: float [Default: -1.0] -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# quantiles: list -# root_dir: str -# run_distillation: bool [Default: False] -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_num_selected_trials: int [Default: 5.0] -# stratified_split_key: str [Default: ''] -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transformations_path: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -# model-evaluation-2-evaluation_metrics: system.Metrics -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-cv-trainer: - executorLabel: exec-automl-tabular-cv-trainer - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-cv-trainer-2: - executorLabel: exec-automl-tabular-cv-trainer-2 - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble: - executorLabel: exec-automl-tabular-ensemble - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble-2: - executorLabel: exec-automl-tabular-ensemble-2 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-infra-validator-2: - executorLabel: exec-automl-tabular-infra-validator-2 - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-stage-1-tuner: - executorLabel: exec-automl-tabular-stage-1-tuner - inputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - disable_early_stopping: - defaultValue: false - description: 'True if disable early stopping. Default - - value is false.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "model_type","categorical_value_spec": {"values": - - ["nn"]}}]' - isOptional: true - parameterType: LIST - tune_feature_selection_rate: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-2: - executorLabel: exec-bool-identity-2 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-3: - executorLabel: exec-bool-identity-3 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-3 - tasks: - automl-tabular-cv-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer - dependentTasks: - - calculate-training-parameters - - importer - - training-configurator-and-validator - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer - automl-tabular-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble - dependentTasks: - - automl-tabular-cv-trainer - - training-configurator-and-validator - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: check-if-is-eval - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-tabular-ensemble - - bool-identity - - model-upload - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: importer - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - calculate-training-parameters - inputs: - artifacts: - dataset_stats: - componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats - instance_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - training_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema - parameters: - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - quantiles: - componentInputParameter: pipelinechannel--quantiles - run_distill: - componentInputParameter: pipelinechannel--run_distillation - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts - stage_1_deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters - stage_2_deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-split_example_counts: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-5 - tasks: - automl-tabular-cv-trainer-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer-2 - dependentTasks: - - automl-tabular-stage-1-tuner - - calculate-training-parameters-2 - - training-configurator-and-validator-2 - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator-2 - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-stage-1-tuner - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_features: - componentInputParameter: pipelinechannel--num_selected_features - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters-2 - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer-2 - automl-tabular-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble-2 - dependentTasks: - - automl-tabular-cv-trainer-2 - - training-configurator-and-validator-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator-2 - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator-2 - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer-2 - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble-2 - automl-tabular-infra-validator-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator-2 - dependentTasks: - - automl-tabular-ensemble-2 - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - taskInfo: - name: automl-tabular-infra-validator-2 - automl-tabular-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - - training-configurator-and-validator-2 - inputs: - artifacts: - feature_ranking: - componentInputArtifact: pipelinechannel--feature-transform-engine-feature_ranking - materialized_eval_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator-2 - transform_output: - componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_features: - componentInputParameter: pipelinechannel--num_selected_features - num_selected_trials: - taskOutputParameter: - outputParameterKey: stage_1_num_selected_trials - producerTask: calculate-training-parameters-2 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - taskOutputParameter: - outputParameterKey: reduce_search_space_mode - producerTask: calculate-training-parameters-2 - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - tune_feature_selection_rate: - componentInputParameter: pipelinechannel--apply_feature_selection_tuning - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-tabular-stage-1-tuner - bool-identity-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-2 - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: check-if-is-eval - bool-identity-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-3 - inputs: - parameters: - value: - runtimeValue: - constant: 0.0 - taskInfo: - name: check-if-is-distillation - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-tabular-ensemble-2 - - bool-identity-2 - - bool-identity-3 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-2 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-2 - pipelinechannel--bool-identity-2-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-2 - pipelinechannel--bool-identity-3-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-3 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: no-distill - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] - == 'false' - training-configurator-and-validator-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator-2 - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - dataset_stats: - componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats - instance_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema - training_schema: - componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema - parameters: - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - quantiles: - componentInputParameter: pipelinechannel--quantiles - run_distill: - componentInputParameter: pipelinechannel--run_distillation - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts - stage_1_deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - stage_2_deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters-2 - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator-2 - inputDefinitions: - artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--feature-transform-engine-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--split-materialized-data-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--apply_feature_selection_tuning: - parameterType: BOOLEAN - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-split_example_counts: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--num_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-6 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-6 - tasks: - condition-6: - componentRef: - name: comp-condition-6 - dependentTasks: - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - pipelinechannel--bool-identity-2-Output: - componentInputParameter: pipelinechannel--bool-identity-2-Output - pipelinechannel--bool-identity-3-Output: - componentInputParameter: pipelinechannel--bool-identity-3-Output - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] - == 'true' - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-6: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - parameterType: STRING - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - feature-transform-engine - - merge-materialized-splits - - string-not-empty - inputs: - artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - feature-transform-engine - - merge-materialized-splits - - split-materialized-data - - string-not-empty - inputs: - artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-feature_ranking: - taskOutputArtifact: - outputArtifactKey: feature_ranking - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--split-materialized-data-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - parameters: - pipelinechannel--apply_feature_selection_tuning: - componentInputParameter: pipelinechannel--apply_feature_selection_tuning - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--num_selected_features: - componentInputParameter: pipelinechannel--num_selected_features - pipelinechannel--optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - legacy_transformations_path: - componentInputParameter: pipelinechannel--legacy_transformations_path - location: - componentInputParameter: pipelinechannel--location - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_distill: - componentInputParameter: pipelinechannel--run_distillation - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - merge-materialized-splits: - cachingOptions: - enableCache: true - componentRef: - name: comp-merge-materialized-splits - dependentTasks: - - split-materialized-data - inputs: - artifacts: - split_0: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - split_1: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - taskInfo: - name: merge-materialized-splits - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty - inputDefinitions: - parameters: - pipelinechannel--apply_feature_selection_tuning: - parameterType: BOOLEAN - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_transform_engine_dataflow_machine_type: - parameterType: STRING - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--legacy_transformations_path: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--num_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-merge-materialized-splits: - executorLabel: exec-merge-materialized-splits - inputDefinitions: - artifacts: - split_0: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The first materialized split. - split_1: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The second materialized split. - outputDefinitions: - artifacts: - splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-2: - executorLabel: exec-model-evaluation-2 - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - comp-training-configurator-and-validator-2: - executorLabel: exec-training-configurator-and-validator-2 - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-tabular-cv-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-cv-trainer-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-infra-validator-2: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", - "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", - "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", - "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", - "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", - "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", - \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-merge-materialized-splits: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _merge_materialized_splits - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ - \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ - ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ - \ first materialized split.\n split_1: The second materialized split.\n\ - \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ - \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ - \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ - \ f.write(','.join([split_0_content, split_1_content]))\n\n" - image: python:3.7 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-training-configurator-and-validator-2: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The AutoML Tabular pipeline v2. - name: automl-tabular-v2 -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: exit-handler-1 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--apply_feature_selection_tuning: - componentInputParameter: apply_feature_selection_tuning - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--disable_early_stopping: - componentInputParameter: disable_early_stopping - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: - componentInputParameter: feature_transform_engine_dataflow_disk_size_gb - pipelinechannel--feature_transform_engine_dataflow_machine_type: - componentInputParameter: feature_transform_engine_dataflow_machine_type - pipelinechannel--feature_transform_engine_dataflow_max_num_workers: - componentInputParameter: feature_transform_engine_dataflow_max_num_workers - pipelinechannel--legacy_transformations_path: - componentInputParameter: legacy_transformations_path - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--num_selected_features: - componentInputParameter: num_selected_features - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: optimization_objective_recall_value - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_distillation: - componentInputParameter: run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: stage_2_num_selected_trials - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - apply_feature_selection_tuning: - defaultValue: false - description: tuning feature selection rate if true. - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The BigQuery staging full dataset id for - - storing intermediate tables.' - isOptional: true - parameterType: STRING - cv_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding stage - - cv trainer worker pool spec.' - isOptional: true - parameterType: LIST - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - disable_early_stopping: - defaultValue: false - description: If disable easly stopping. - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. At inference time, the predictive distribution is used to make - - a point prediction that minimizes the optimization objective. For example, - - the mean of a predictive distribution is the point prediction that - - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 10.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 10.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - export_additional_model_without_custom_ops: - defaultValue: false - description: 'Whether to export additional - - model without custom TensorFlow operators.' - isOptional: true - parameterType: BOOLEAN - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size - - in GB for feature transform engine component.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_transform_engine_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type - - for feature transform engine component.' - isOptional: true - parameterType: STRING - feature_transform_engine_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of - - Dataflow workers for feature transform engine component.' - isOptional: true - parameterType: NUMBER_INTEGER - legacy_transformations_path: - defaultValue: '' - description: Path to train spec transformations json. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features for feature selection, - - defaults to None, in which case all features are used.' - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: 'For binary classification, "maximize-au-roc", - - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", - or - - "maximize-recall-at-precision". For multi class classification, - - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - - "minimize-rmsle".' - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when optimization_objective - - is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when optimization_objective is - - ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to apply feature selection or not. - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_num_selected_trials: - defaultValue: 5.0 - description: Number of selected trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: The stratified_split column name. - isOptional: true - parameterType: STRING - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: float = The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: 'List of auto transform features in the - - comma-separated string format.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json deleted file mode 100644 index 65e64d953d..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [50000, 70000, 90000, 110000, 130000, 150000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [4096, 8192, 16384, 32768, 65536] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.00007, - "max_value": 0.03 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [3, 5, 10] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [5, 10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 100, - "max_value": 700 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 3, - "max_value": 8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.05, - "max_value": 3.2 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.0000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 4, - "max_value": 10 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false", "true"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json deleted file mode 100644 index e7346ea9ae..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [50000, 60000, 70000, 80000, 90000, 100000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [4096, 8192, 16384, 32768] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.03 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 200, - "max_value": 500 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 3, - "max_value": 7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.5 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 4, - "max_value": 8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json deleted file mode 100644 index 90ed01db8f..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json +++ /dev/null @@ -1,146 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [50000, 60000, 70000, 80000, 90000, 100000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [8192, 16384, 32768] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0002, - "max_value": 0.02 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 100, - "max_value": 400 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 3, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.5 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.3, - "max_value": 0.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 10.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy"] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json deleted file mode 100644 index b9350f33b6..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [50000, 60000, 70000, 80000, 90000, 100000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [1024, 2048, 4096, 8192, 16384] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.00007, - "max_value": 0.03 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [3, 5, 10] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [5, 10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 50, - "max_value": 500 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.05, - "max_value": 3.2 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.0000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 4, - "max_value": 10 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false", "true"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json deleted file mode 100644 index e7143fae84..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [5000, 10000, 20000, 30000, 40000, 50000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [1024, 2048, 4096, 8192, 16384] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.00007, - "max_value": 0.02 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 50, - "max_value": 400 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.5 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.0000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 4, - "max_value": 10 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json deleted file mode 100644 index 46968c00c8..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json +++ /dev/null @@ -1,146 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [10000, 20000, 30000, 40000, 50000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [1024, 4096, 8192, 16384] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.02 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 100, - "max_value": 300 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.3, - "max_value": 0.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.5 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 10000, - "max_value": 50000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.0000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.3, - "max_value": 0.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy"] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json deleted file mode 100644 index 40d2e7f85b..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [10000, 20000, 30000, 40000, 50000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [512, 1024, 2048, 4096] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.00007, - "max_value": 0.03 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [3, 5, 10] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [5, 10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 50, - "max_value": 300 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.05, - "max_value": 3.2 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 1000, - "max_value": 10000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.0000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false", "true"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json deleted file mode 100644 index 3a75145edf..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json +++ /dev/null @@ -1,158 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [5000, 10000, 20000, 30000, 40000, 50000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [512, 1024, 2048, 4096] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.03 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 50, - "max_value": 200 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 1000, - "max_value": 10000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.0625, 0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.2, - "max_value": 0.8 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy", "focal_loss"] - } - }, - { - "parameter_id": "alpha_focal_loss", - "discrete_value_spec": { - "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] - } - }, - { - "parameter_id": "gamma_focal_loss", - "discrete_value_spec": { - "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json deleted file mode 100644 index eb7a4c99f7..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json +++ /dev/null @@ -1,146 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [5000, 10000, 15000, 20000, 25000, 30000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [512, 1024, 2048, 4096] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.02 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "large_category_dim", - "discrete_value_spec": { - "values": [5] - } - }, - { - "parameter_id": "large_category_thresh", - "discrete_value_spec": { - "values": [10] - } - }, - { - "parameter_id": "feature_dim", - "integer_value_spec": { - "min_value": 50, - "max_value": 200 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "feature_dim_ratio", - "double_value_spec": { - "min_value": 0.3, - "max_value": 0.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_decision_steps", - "integer_value_spec": { - "min_value": 2, - "max_value": 6 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "relaxation_factor", - "double_value_spec": { - "min_value": 1.2, - "max_value": 2.5 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "decay_rate", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.999 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "decay_every", - "integer_value_spec": { - "min_value": 1000, - "max_value": 5000 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "sparsity_loss_weight", - "double_value_spec": { - "min_value": 0.000001, - "max_value": 0.001 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - { - "parameter_id": "batch_momentum", - "double_value_spec": { - "min_value": 0.5, - "max_value": 0.95 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "batch_size_ratio", - "discrete_value_spec": { - "values": [0.125, 0.25, 0.5] - } - }, - { - "parameter_id": "num_transformer_layers", - "integer_value_spec": { - "min_value": 2, - "max_value": 4 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "num_transformer_layers_ratio", - "double_value_spec": { - "min_value": 0.3, - "max_value": 0.7 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "class_weight", - "double_value_spec": { - "min_value": 1.0, - "max_value": 100.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "loss_function_type", - "categorical_value_spec": { - "values": ["weighted_cross_entropy"] - } - }, - { - "parameter_id": "yeo_johnson_transform", - "categorical_value_spec": { - "values": ["false"] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json deleted file mode 100644 index 6458b992d0..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json +++ /dev/null @@ -1,132 +0,0 @@ -[ - { - "parameter_id": "max_steps", - "discrete_value_spec": { - "values": [5000, 10000, 20000, 30000, 40000, 50000] - } - }, - { - "parameter_id": "max_train_secs", - "discrete_value_spec": { - "values": [-1] - } - }, - { - "parameter_id": "learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.0005 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "optimizer_type", - "categorical_value_spec": { - "values": ["adam", "ftrl", "sgd"] - } - }, - { - "parameter_id": "l1_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "l2_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "l2_shrinkage_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "beta_1", - "discrete_value_spec": { - "values": [0.7, 0.8, 0.9] - } - }, - { - "parameter_id": "beta_2", - "discrete_value_spec": { - "values": [0.8, 0.9, 0.999] - } - }, - { - "parameter_id": "hidden_units", - "categorical_value_spec": { - "values": ["30,30,30"] - } - }, - { - "parameter_id": "use_wide", - "categorical_value_spec": { - "values": ["true", "false"] - } - }, - { - "parameter_id": "embed_categories", - "categorical_value_spec": { - "values": ["true", "false"] - } - }, - { - "parameter_id": "dnn_dropout", - "discrete_value_spec": { - "values": [0, 0.1, 0.2] - } - }, - { - "parameter_id": "dnn_learning_rate", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 0.0005 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - { - "parameter_id": "dnn_optimizer_type", - "categorical_value_spec": { - "values": ["adam", "ftrl", "sgd"] - } - }, - { - "parameter_id": "dnn_l1_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "dnn_l2_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "dnn_l2_shrinkage_regularization_strength", - "discrete_value_spec": { - "values": [0, 0.01, 0.02] - } - }, - { - "parameter_id": "dnn_beta_1", - "discrete_value_spec": { - "values": [0.7, 0.8, 0.9] - } - }, - { - "parameter_id": "dnn_beta_2", - "discrete_value_spec": { - "values": [0.8, 0.9, 0.999] - } - }, - { - "parameter_id": "batch_size", - "discrete_value_spec": { - "values": [1024, 2048, 4096, 8192, 16384] - } - } -] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json deleted file mode 100644 index 245a738beb..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json +++ /dev/null @@ -1,309 +0,0 @@ -[{ - "parameter_id": "num_boost_round", - "discrete_value_spec": { - "values": [1, 5, 10, 15, 20] - } -}, { - "parameter_id": "early_stopping_rounds", - "discrete_value_spec": { - "values": [3, 5, 10] - } -}, { - "parameter_id": "base_score", - "discrete_value_spec": { - "values": [0.5] - } -}, { - "parameter_id": "booster", - "categorical_value_spec": { - "values": ["gbtree", "gblinear", "dart"] - }, - "conditional_parameter_specs": [{ - "parameter_spec": { - "parameter_id": "eta", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "gamma", - "discrete_value_spec": { - "values": [0, 10, 50, 100, 500, 1000] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "max_depth", - "integer_value_spec": { - "min_value": 6, - "max_value": 10 - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "min_child_weight", - "double_value_spec": { - "min_value": 0.0, - "max_value": 10.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "max_delta_step", - "discrete_value_spec": { - "values": [0.0, 1.0, 3.0, 5.0, 7.0, 9.0] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "subsample", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "colsample_bytree", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "colsample_bylevel", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "colsample_bynode", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LINEAR_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "lambda", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_REVERSE_LOG_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart", "gblinear"] - } - }, { - "parameter_spec": { - "parameter_id": "alpha", - "double_value_spec": { - "min_value": 0.0001, - "max_value": 1.0 - }, - "scale_type": "UNIT_LOG_SCALE" - }, - "parent_categorical_values": { - "values": ["gbtree", "dart", "gblinear"] - } - }, { - "parameter_spec": { - "parameter_id": "tree_method", - "categorical_value_spec": { - "values": ["auto"] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "scale_pos_weight", - "discrete_value_spec": { - "values": [1.0] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "refresh_leaf", - "discrete_value_spec": { - "values": [1] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "process_type", - "categorical_value_spec": { - "values": ["default"] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "grow_policy", - "categorical_value_spec": { - "values": ["depthwise"] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "sampling_method", - "categorical_value_spec": { - "values": ["uniform"] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "sample_type", - "categorical_value_spec": { - "values": ["uniform"] - } - }, - "parent_categorical_values": { - "values": ["dart"] - } - }, { - "parameter_spec": { - "parameter_id": "normalize_type", - "categorical_value_spec": { - "values": ["tree"] - } - }, - "parent_categorical_values": { - "values": ["dart"] - } - }, { - "parameter_spec": { - "parameter_id": "rate_drop", - "discrete_value_spec": { - "values": [0.0] - } - }, - "parent_categorical_values": { - "values": ["dart"] - } - }, { - "parameter_spec": { - "parameter_id": "one_drop", - "discrete_value_spec": { - "values": [0] - } - }, - "parent_categorical_values": { - "values": ["dart"] - } - }, { - "parameter_spec": { - "parameter_id": "skip_drop", - "discrete_value_spec": { - "values": [0.0] - } - }, - "parent_categorical_values": { - "values": ["dart"] - } - }, { - "parameter_spec": { - "parameter_id": "num_parallel_tree", - "discrete_value_spec": { - "values": [1] - } - }, - "parent_categorical_values": { - "values": ["gblinear"] - } - }, { - "parameter_spec": { - "parameter_id": "feature_selector", - "categorical_value_spec": { - "values": ["cyclic"] - } - }, - "parent_categorical_values": { - "values": ["gblinear"] - } - }, { - "parameter_spec": { - "parameter_id": "top_k", - "discrete_value_spec": { - "values": [0] - } - }, - "parent_categorical_values": { - "values": ["gblinear"] - } - }, { - "parameter_spec": { - "parameter_id": "max_leaves", - "discrete_value_spec": { - "values": [0] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }, { - "parameter_spec": { - "parameter_id": "max_bin", - "discrete_value_spec": { - "values": [256] - } - }, - "parent_categorical_values": { - "values": ["gbtree", "dart"] - } - }] -}] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py deleted file mode 100644 index c1f753bd03..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Feature Ranking and Selection component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Input -from kfp.dsl import Output - - -# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument -@dsl.container_component -def tabular_feature_ranking_and_selection( - project: str, - location: str, - root_dir: str, - data_source: Input[Dataset], - target_column_name: str, - feature_ranking: Output[Artifact], - selected_features: Output[Artifact], - gcp_resources: dsl.OutputPath(str), - dataflow_machine_type: Optional[str] = 'n1-standard-16', - dataflow_max_num_workers: Optional[int] = 25, - dataflow_disk_size_gb: Optional[int] = 40, - dataflow_subnetwork: Optional[str] = '', - dataflow_use_public_ips: Optional[bool] = True, - dataflow_service_account: Optional[str] = '', - encryption_spec_key_name: Optional[str] = '', - algorithm: Optional[str] = 'AMI', - prediction_type: Optional[str] = 'unknown', - binary_classification: Optional[str] = 'false', - max_selected_features: Optional[int] = 1000, -): - # fmt: off - """Launches a feature selection task to pick top features. - - Args: - project: Project to run feature selection. - location: Location for running the feature selection. If not set, - default to us-central1. - root_dir: The Cloud Storage location to store the output. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. - encryption_spec_key_name: Customer-managed encryption key. - If this is set, then all resources will be encrypted with the provided - encryption key. data_source(Dataset): The input dataset artifact which - references csv, BigQuery, or TF Records. target_column_name(str): Target - column name of the input dataset. - max_selected_features: number of features to select by the - algorithm. If not set, default to 1000. - - Returns: - feature_ranking: the dictionary of feature names and feature ranking values. - selected_features: A json array of selected feature names. - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["feature_selection", "--data_source=', - data_source.uri, - '", "--target_column=', - target_column_name, - '", "--prediction_type=', - prediction_type, - '", "--binary_classification=', - binary_classification, - '", "--algorithm=', - algorithm, - '", "--feature_selection_dir=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection/",' - f' "--job_name=tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' - ), - '", "--dataflow_project=', - project, - '", "--error_file_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--dataflow_staging_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' - ' "--dataflow_tmp_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' - ' "--dataflow_max_num_workers=' - ), - dataflow_max_num_workers, - '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - '", "--dataflow_machine_type=', - dataflow_machine_type, - '", "--dataflow_disk_size_gb=', - dataflow_disk_size_gb, - '", "--dataflow_subnetwork_fully_qualified=', - dataflow_subnetwork, - '", "--dataflow_use_public_ips=', - dataflow_use_public_ips, - '", "--dataflow_service_account=', - dataflow_service_account, - '", "--dataflow_kms_key=', - encryption_spec_key_name, - '", "--max_selected_features=', - max_selected_features, - '", "--feature_selection_result_path=', - feature_ranking.uri, - '", "--selected_features_path=', - selected_features.uri, - '", "--parse_json=true"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py deleted file mode 100644 index 4f93bbf285..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +++ /dev/null @@ -1,976 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Feature Transform Engine component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Output - - -@dsl.container_component -def feature_transform_engine( - root_dir: str, - project: str, - location: str, - dataset_stats: Output[Artifact], - materialized_data: Output[Dataset], - transform_output: Output[Artifact], - split_example_counts: dsl.OutputPath(str), - instance_schema: Output[Artifact], - training_schema: Output[Artifact], - bigquery_train_split_uri: dsl.OutputPath(str), - bigquery_validation_split_uri: dsl.OutputPath(str), - bigquery_test_split_uri: dsl.OutputPath(str), - bigquery_downsampled_test_split_uri: dsl.OutputPath(str), - feature_ranking: Output[Artifact], - gcp_resources: dsl.OutputPath(str), - dataset_level_custom_transformation_definitions: Optional[list] = [], - dataset_level_transformations: Optional[list] = [], - forecasting_time_column: Optional[str] = '', - forecasting_time_series_identifier_column: Optional[str] = '', - forecasting_time_series_attribute_columns: Optional[list] = [], - forecasting_unavailable_at_forecast_columns: Optional[list] = [], - forecasting_available_at_forecast_columns: Optional[list] = [], - forecasting_forecast_horizon: Optional[int] = -1, - forecasting_context_window: Optional[int] = -1, - forecasting_predefined_window_column: Optional[str] = '', - forecasting_window_stride_length: Optional[int] = -1, - forecasting_window_max_count: Optional[int] = -1, - forecasting_holiday_regions: Optional[list] = [], - forecasting_apply_windowing: Optional[bool] = True, - predefined_split_key: Optional[str] = '', - stratified_split_key: Optional[str] = '', - timestamp_split_key: Optional[str] = '', - training_fraction: Optional[float] = -1, - validation_fraction: Optional[float] = -1, - test_fraction: Optional[float] = -1, - tf_transform_execution_engine: Optional[str] = 'dataflow', - tf_auto_transform_features: Optional[dict] = {}, - tf_custom_transformation_definitions: Optional[list] = [], - tf_transformations_path: Optional[str] = '', - legacy_transformations_path: Optional[str] = '', - target_column: Optional[str] = '', - weight_column: Optional[str] = '', - prediction_type: Optional[str] = '', - model_type: Optional[str] = None, - multimodal_image_columns: Optional[list] = [], - multimodal_text_columns: Optional[list] = [], - run_distill: Optional[bool] = False, - run_feature_selection: Optional[bool] = False, - feature_selection_algorithm: Optional[str] = 'AMI', - materialized_examples_format: Optional[str] = 'tfrecords_gzip', - max_selected_features: Optional[int] = 1000, - data_source_csv_filenames: Optional[str] = '', - data_source_bigquery_table_path: Optional[str] = '', - bigquery_staging_full_dataset_id: Optional[str] = '', - dataflow_machine_type: Optional[str] = 'n1-standard-16', - dataflow_max_num_workers: Optional[int] = 25, - dataflow_disk_size_gb: Optional[int] = 40, - dataflow_subnetwork: Optional[str] = '', - dataflow_use_public_ips: Optional[bool] = True, - dataflow_service_account: Optional[str] = '', - encryption_spec_key_name: Optional[str] = '', - autodetect_csv_schema: Optional[bool] = False, - group_columns: Optional[list] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -): - # fmt: off - """Transforms raw data to engineered features. - - FTE performs dataset level transformations, data splitting, data statistic - generation, and TensorFlow-based row level transformations on the input - dataset based on the provided transformation configuration. - - Args: - root_dir: The Cloud Storage location to store the output. - project: Project to run feature transform engine. - location: Location for the created GCP services. - dataset_level_custom_transformation_definitions: List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE's built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - Example: .. code-block:: python [ { "transformation": "ConcatCols", - "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function - together with FTE's built-in transformations: .. code-block:: - python [ { "transformation": "Join", "right_table_uri": - "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": - "ConcatCols", "cols": ["feature_1", "feature_2"], "output_col": - "feature_1_2" } ] - dataset_level_transformations: List of dataset-level - transformations. - Example: .. code-block:: python [ { "transformation": "Join", - "right_table_uri": "bq://test-project.dataset_test.table", - "join_keys": [["join_key_col", "join_key_col"]] }, ... ] Additional - information about FTE's currently supported built-in - transformations: - Join: Joins features from right_table_uri. For each join key, the - left table keys will be included and the right table keys will - be dropped. - Example: .. code-block:: python { "transformation": "Join", - "right_table_uri": "bq://test-project.dataset_test.table", - "join_keys": [["join_key_col", "join_key_col"]] } - Arguments: - right_table_uri: Right table BigQuery uri to join - with input_full_table_id. - join_keys: Features to join on. For each - nested list, the first element is a left table column - and the second is its corresponding right table column. - TimeAggregate: Creates a new feature composed of values of an - existing feature from a fixed time period ago or in the future. - Ex: A feature for sales by store 1 year ago. - Example: .. code-block:: python { "transformation": - "TimeAggregate", "time_difference": 40, - "time_difference_units": "DAY", - "time_series_identifier_columns": ["store_id"], - "time_column": "time_col", "time_difference_target_column": - "target_col", "output_column": "output_col" } - Arguments: - time_difference: Number of time_difference_units to - look back or into the future on our - time_difference_target_column. - time_difference_units: Units of time_difference to - look back or into the future on our - time_difference_target_column. Must be one of * 'DAY' * - 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * - 'YEAR' - time_series_identifier_columns: Names of the - time series identifier columns. - time_column: Name of the time column. - time_difference_target_column: Column we wish to get - the value of time_difference time_difference_units in - the past or future. - output_column: Name of our new time aggregate - feature. - is_future: Whether we wish to look - forward in time. Defaults to False. - PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum: - Performs a partition by reduce operation (one of max, - min, avg, or sum) with a fixed historic time period. Ex: - Getting avg sales (the reduce column) for each store - (partition_by_column) over the previous 5 days - (time_column, time_ago_units, and time_ago). - Example: .. code-block:: python { "transformation": - "PartitionByMax", "reduce_column": "sell_price", - "partition_by_columns": ["store_id", "state_id"], - "time_column": "date", "time_ago": 1, "time_ago_units": - "WEEK", "output_column": "partition_by_reduce_max_output" } - Arguments: - reduce_column: Column to apply the reduce operation - on. Reduce operations include the - following: Max, Min, Avg, Sum. - partition_by_columns: List of columns to - partition by. - time_column: Time column for the partition by - operation's window function. - time_ago: Number of time_ago_units to look back on - our target_column, starting from time_column - (inclusive). - time_ago_units: Units of time_ago to look back on - our target_column. Must be one of * 'DAY' * 'WEEK' - output_column: Name of our output feature. - forecasting_time_column: Forecasting time column. - forecasting_time_series_identifier_column: Forecasting - time series identifier column. - forecasting_time_series_attribute_columns: Forecasting - time series attribute columns. - forecasting_unavailable_at_forecast_columns: Forecasting - unavailable at forecast columns. - forecasting_available_at_forecast_columns: Forecasting - available at forecast columns. - forecasting_forecast_horizon: Forecasting horizon. - forecasting_context_window: Forecasting context window. - forecasting_predefined_window_column: Forecasting predefined window column. - forecasting_window_stride_length: Forecasting window stride length. - forecasting_window_max_count: Forecasting window max count. - forecasting_holiday_regions: The geographical region based on which the - holiday effect is applied in modeling by adding holiday categorical - array feature that include all holidays matching the date. This option - only allowed when data granularity is day. By default, holiday effect - modeling is disabled. To turn it on, specify the holiday region using - this option. - Top level: * 'GLOBAL' - Second level: continental regions: * 'NA': North America - * 'JAPAC': Japan and Asia Pacific - * 'EMEA': Europe, the Middle East and Africa - * 'LAC': Latin America and the Caribbean - Third level: countries from ISO 3166-1 Country codes. - Valid regions: * 'GLOBAL' * 'NA' * 'JAPAC' * 'EMEA' * 'LAC' * 'AE' - * 'AR' * 'AT' * 'AU' * 'BE' * 'BR' * 'CA' * 'CH' * 'CL' * 'CN' * 'CO' - * 'CZ' * 'DE' * 'DK' * 'DZ' * 'EC' * 'EE' * 'EG' * 'ES' * 'FI' * 'FR' - * 'GB' * 'GR' * 'HK' * 'HU' * 'ID' * 'IE' * 'IL' * 'IN' * 'IR' * 'IT' - * 'JP' * 'KR' * 'LV' * 'MA' * 'MX' * 'MY' * 'NG' * 'NL' * 'NO' * 'NZ' - * 'PE' * 'PH' * 'PK' * 'PL' * 'PT' * 'RO' * 'RS' * 'RU' * 'SA' * 'SE' - * 'SG' * 'SI' * 'SK' * 'TH' * 'TR' * 'TW' * 'UA' * 'US' * 'VE' * 'VN' - * 'ZA' - forecasting_apply_windowing: Whether to apply window strategy. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - timestamp_split_key: Timestamp split key. - training_fraction: Fraction of input data for training. - validation_fraction: Fraction of input data for validation. - test_fraction: Fraction of input data for testing. - tf_transform_execution_engine: Execution engine to perform - row-level TF transformations. Can be one of: "dataflow" (by default) or - "bigquery". Using "bigquery" as the execution engine is experimental and - is for allowlisted customers only. In addition, executing on "bigquery" - only supports auto transformations (i.e., specified by - tf_auto_transform_features) and will raise an error when - tf_custom_transformation_definitions or tf_transformations_path is set. - tf_auto_transform_features: Dict mapping auto and/or type-resolutions to - TF transform features. FTE will automatically configure a set of - built-in transformations for each feature based on its data statistics. - If users do not want auto type resolution, but want the set of - transformations for a given type to be automatically generated, they - may specify pre-resolved transformations types. The following type hint - dict keys are supported: * 'auto' * 'categorical' * 'numeric' * 'text' - * 'timestamp' - Example: .. code-block:: python { "auto": ["feature1"], - "categorical": ["feature2", "feature3"], } Note that the target and - weight column may not be included as an auto transformation unless - users are running forecasting. - tf_custom_transformation_definitions: List of - TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE's built-in - transformations. - Example: .. code-block:: python [ { "transformation": "PlusOne", - "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": - "MultiplyTwo", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "multiply_two_transform" } ] Using custom - transform function together with FTE's built-in transformations: .. - code-block:: python [ { "transformation": "CastToFloat", - "input_columns": ["feature_1"], "output_columns": ["feature_1"] },{ - "transformation": "PlusOne", "input_columns": ["feature_1"] - "output_columns": ["feature_1_plused_one"] },{ "transformation": - "MultiplyTwo", "input_columns": ["feature_1"] "output_columns": - ["feature_1_multiplied_two"] } ] - tf_transformations_path: Path to TensorFlow-based - transformation configuration. Path to a JSON file used to specified - FTE's TF transformation configurations. In the following, we provide - some sample transform configurations to demonstrate FTE's capabilities. - All transformations on input columns are explicitly specified with FTE's - built-in transformations. Chaining of multiple transformations on a - single column is also supported. For example: .. code-block:: python [ - { "transformation": "ZScale", "input_columns": ["feature_1"] }, { - "transformation": "ZScale", "input_columns": ["feature_2"] } ] - Additional information about FTE's currently supported built-in - transformations: - Datetime: Extracts datetime featues from a column containing - timestamp strings. - Example: .. code-block:: python { "transformation": - "Datetime", "input_columns": ["feature_1"], "time_format": - "%Y-%m-%d" } - Arguments: - input_columns: A list with a single column to - perform the datetime transformation on. - output_columns: Names of output - columns, one for each datetime_features element. - time_format: Datetime format string. Time format is - a combination of Date + Time Delimiter (optional) + Time - (optional) directives. Valid date directives are as - follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # - 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' # - 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' # - 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # - 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' # - 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y' - # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # - 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' # - 11302018 * '%Y%m%d' # 20181130 Valid time delimiters - are as follows * 'T' * ' ' Valid time directives are as - follows * '%H:%M' # 23:59 * '%H:%M:%S' # - 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * - '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 * - '%H:%M:%S%z', # 23:59:58+0000 - datetime_features: List of datetime - features to be extract. Each entry must be one of * - 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR' - * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * - 'SECOND' Defaults to ['YEAR', 'MONTH', 'DAY', - 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR'] - Log: Performs the natural log on a numeric column. - Example: .. code-block:: python { "transformation": "Log", - "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the log transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - ZScale: Performs Z-scale normalization on a numeric column. - Example: .. code-block:: python { "transformation": - "ZScale", "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the z-scale transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - Vocabulary: Converts strings to integers, where each unique string - gets a unique integer representation. - Example: .. code-block:: python { "transformation": - "Vocabulary", "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the vocabulary transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the vocabulary - only to words whose number of occurrences in the input - exceeds frequency_threshold. If not specified, all words - in the vocabulary will be included. If both top_k and - frequency_threshold are specified, a word must satisfy - both conditions to be included. Defaults to None. - Categorical: Transforms categorical columns to integer columns. - Example: .. code-block:: python { "transformation": - "Categorical", "input_columns": ["feature_1"], "top_k": 10 } - Arguments: - input_columns: A list with a single column to - perform the categorical transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. - frequency_threshold: Limit the vocabulary - only to words whose number of occurrences in the input - exceeds frequency_threshold. If not specified, all words - in the vocabulary will be included. If both top_k and - frequency_threshold are specified, a word must satisfy - both conditions to be included. - Reduce: Given a column where each entry is a numeric array, - reduces arrays according to our reduce_mode. - Example: .. code-block:: python { "transformation": - "Reduce", "input_columns": ["feature_1"], "reduce_mode": - "MEAN", "output_columns": ["feature_1_mean"] } - Arguments: - input_columns: A list with a single column to - perform the reduce transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - reduce_mode: One of * 'MAX' * 'MIN' * - 'MEAN' * 'LAST_K' Defaults to 'MEAN'. - last_k: The number of last k elements when - 'LAST_K' reduce mode is used. Defaults to 1. - SplitString: Given a column of strings, splits strings into token - arrays. - Example: .. code-block:: python { "transformation": - "SplitString", "input_columns": ["feature_1"], "separator": - "$" } - Arguments: - input_columns: A list with a single column to - perform the split string transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - separator: Separator to split input string - into tokens. Defaults to ' '. - missing_token: Missing token to use when - no string is included. Defaults to ' _MISSING_ '. - NGram: Given a column of strings, splits strings into token arrays - where each token is an integer. - Example: .. code-block:: python { "transformation": "NGram", - "input_columns": ["feature_1"], "min_ngram_size": 1, - "max_ngram_size": 2, "separator": " " } - Arguments: - input_columns: A list with a single column to - perform the n-gram transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - min_ngram_size: Minimum n-gram size. Must - be a positive number and <= max_ngram_size. Defaults to - 1. - max_ngram_size: Maximum n-gram size. Must - be a positive number and >= min_ngram_size. Defaults to - 2. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the - dictionary's vocabulary only to words whose number of - occurrences in the input exceeds frequency_threshold. If - not specified, all words in the vocabulary will be - included. If both top_k and frequency_threshold are - specified, a word must satisfy both conditions to be - included. Defaults to None. - separator: Separator to split input string - into tokens. Defaults to ' '. - missing_token: Missing token to use when - no string is included. Defaults to ' _MISSING_ '. - Clip: Given a numeric column, clips elements such that elements < - min_value are assigned min_value, and elements > max_value are - assigned max_value. - Example: .. code-block:: python { "transformation": "Clip", - "input_columns": ["col1"], "output_columns": - ["col1_clipped"], "min_value": 1., "max_value": 10., } - Arguments: - input_columns: A list with a single column to - perform the n-gram transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - min_value: Number where all values below - min_value are set to min_value. If no min_value is - provided, min clipping will not occur. Defaults to None. - max_value: Number where all values above - max_value are set to max_value If no max_value is - provided, max clipping will not occur. Defaults to None. - MultiHotEncoding: Performs multi-hot encoding on a categorical - array column. - Example: .. code-block:: python { "transformation": - "MultiHotEncoding", "input_columns": ["col1"], } The number - of classes is determened by the largest number included in - the input if it is numeric or the total number of unique - values of the input if it is type str. If the input is has - type str and an element contians separator tokens, the input - will be split at separator indices, and the each element of - the split list will be considered a seperate class. For - example, - Input: .. code-block:: python [ ["foo bar"], # Example - 0 ["foo", "bar"], # Example 1 ["foo"], # Example - 2 ["bar"], # Example 3 ] - Output (with default separator=" "): .. code-block:: python [ - [1, 1], # Example 0 [1, 1], # Example 1 - [1, 0], # Example 2 [0, 1], # Example 3 ] - Arguments: - input_columns: A list with a single column to - perform the multi-hot-encoding on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the - dictionary's vocabulary only to words whose number of - occurrences in the input exceeds frequency_threshold. If - not specified, all words in the vocabulary will be - included. If both top_k and frequency_threshold are - specified, a word must satisfy both conditions to be - included. Defaults to None. - separator: Separator to split input string - into tokens. Defaults to ' '. - MaxAbsScale: Performs maximum absolute scaling on a numeric - column. - Example: .. code-block:: python { "transformation": - "MaxAbsScale", "input_columns": ["col1"], "output_columns": - ["col1_max_abs_scaled"] } - Arguments: - input_columns: A list with a single column to - perform max-abs-scale on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - Custom: Transformations defined in - tf_custom_transformation_definitions are included here in the - TensorFlow-based transformation configuration. For example, - given the following tf_custom_transformation_definitions: .. - code-block:: python [ { "transformation": "PlusX", - "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" } ] We can include the - following transformation: .. code-block:: python { - "transformation": "PlusX", "input_columns": ["col1"], - "output_columns": ["col1_max_abs_scaled"] "x": 5 } Note that - input_columns must still be included in our arguments and - output_columns is optional. All other arguments are those - defined in custom_transform_fn.py, which includes `"x"` in this - case. See tf_custom_transformation_definitions above. - legacy_transformations_path (Optional[str]) Deprecated. Prefer - tf_auto_transform_features. Path to a GCS file containing JSON - string for legacy style transformations. Note that - legacy_transformations_path and tf_auto_transform_features - cannot both be specified. - target_column: Target column of input data. - weight_column: Weight column of input data. - prediction_type: Model prediction type. One of - "classification", "regression", "time_series". - run_distill: Whether the distillation should be applied - to the training. - run_feature_selection: Whether the feature selection - should be applied to the dataset. - feature_selection_algorithm: The algorithm of feature - selection. One of "AMI", "CMIM", "JMIM", "MRMR", default to be "AMI". - The algorithms available are: AMI(Adjusted Mutual Information): - Reference: - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html - Arrays are not yet supported in this algorithm. CMIM(Conditional - Mutual Information Maximization): Reference paper: Mohamed - Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using - Joint Mutual Information Maximisation,” Expert Systems with - Applications, vol. 42, issue 22, 1 December 2015, Pages - 8520-8532. JMIM(Joint Mutual Information Maximization): Reference - paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature - selection using Joint Mutual Information Maximisation,” Expert - Systems with Applications, vol. 42, issue 22, 1 December 2015, - Pages 8520-8532. MRMR(MIQ Minimum-redundancy - Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long, - and Chris Ding. "Feature selection based on mutual information - criteria of max-dependency, max-relevance, and min-redundancy." - IEEE Transactions on pattern analysis and machine intelligence - 27, no. - 8: 1226-1238. - materialized_examples_format: The format to use for the - materialized examples. Should be either 'tfrecords_gzip' (default) or - 'parquet'. - max_selected_features: Maximum number of features to - select. If specified, the transform config will be purged by only using - the selected features that ranked top in the feature ranking, which has - the ranking value for all supported features. If the number of input - features is smaller than max_selected_features specified, we will still - run the feature selection process and generate the feature ranking, no - features will be excluded. The value will be set to 1000 by default if - run_feature_selection is enabled. - data_source_csv_filenames: CSV input data source to run - feature transform on. - data_source_bigquery_table_path: BigQuery input data - source to run feature transform on. - bigquery_staging_full_dataset_id: Dataset in - "projectId.datasetId" format for storing intermediate-FTE BigQuery - tables. If the specified dataset does not exist in BigQuery, FTE will - create the dataset. If no bigquery_staging_full_dataset_id is specified, - all intermediate tables will be stored in a dataset created under the - provided project in the input data source's location during FTE - execution called - "vertex_feature_transform_engine_staging_{location.replace('-', '_')}". - All tables generated by FTE will have a 30 day TTL. - model_type: Model type, which we wish to engineer features - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or - tide. Defaults to the empty value, `None`. - multimodal_image_columns: List of multimodal image - columns. Defaults to an empty list. - multimodal_text_columns: List of multimodal text - columns. Defaults to an empty list. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - Dataflow jobs. - encryption_spec_key_name: Customer-managed encryption key. - autodetect_csv_schema: If True, infers the column types - when importing CSVs into BigQuery. - - Returns: - dataset_stats: The stats of the dataset. - materialized_data: The materialized dataset. - transform_output: The transform output artifact. - split_example_counts: JSON string of data split example counts for train, - validate, and test splits. - bigquery_train_split_uri: BigQuery URI for the train split to pass to the - batch prediction component during distillation. - bigquery_validation_split_uri: BigQuery URI for the validation split to - pass to the batch prediction component during distillation. - bigquery_test_split_uri: BigQuery URI for the test split to pass to the - batch prediction component during evaluation. - bigquery_downsampled_test_split_uri: BigQuery URI for the downsampled test - split to pass to the batch prediction component during batch explain. - instance_schema_path: Schema of input data to the tf_model at serving - time. - training_schema_path: Schema of input data to the tf_model at training - time. - feature_ranking: The ranking of features, all features supported in the - dataset will be included. For "AMI" algorithm, array features won't be - available in the ranking as arrays are not supported yet. - gcp_resources: GCP resources created by this component. For more details, - see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - group_columns: A list of time series attribute column names that define - the time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over - time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated - over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions - aggregated over both the horizon and time series in the same hierarchy - group. - """ - # fmt: on - - return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', - command=[], - args=[ - 'feature_transform_engine', - dsl.ConcatPlaceholder(items=['--project=', project]), - dsl.ConcatPlaceholder(items=['--location=', location]), - dsl.ConcatPlaceholder( - items=[ - '--dataset_level_custom_transformation_definitions=', - dataset_level_custom_transformation_definitions, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--dataset_level_transformations=', - dataset_level_transformations, - ] - ), - dsl.ConcatPlaceholder( - items=['--forecasting_time_column=', forecasting_time_column] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_time_series_identifier_column=', - forecasting_time_series_identifier_column, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_time_series_attribute_columns=', - forecasting_time_series_attribute_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_unavailable_at_forecast_columns=', - forecasting_unavailable_at_forecast_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_available_at_forecast_columns=', - forecasting_available_at_forecast_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_forecast_horizon=', - forecasting_forecast_horizon, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_context_window=', - forecasting_context_window, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_predefined_window_column=', - forecasting_predefined_window_column, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_window_stride_length=', - forecasting_window_stride_length, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_window_max_count=', - forecasting_window_max_count, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_holiday_regions=', - forecasting_holiday_regions, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_apply_windowing=', - forecasting_apply_windowing, - ] - ), - dsl.ConcatPlaceholder( - items=['--predefined_split_key=', predefined_split_key] - ), - dsl.ConcatPlaceholder( - items=['--stratified_split_key=', stratified_split_key] - ), - dsl.ConcatPlaceholder( - items=['--timestamp_split_key=', timestamp_split_key] - ), - dsl.ConcatPlaceholder( - items=['--training_fraction=', training_fraction] - ), - dsl.ConcatPlaceholder( - items=['--validation_fraction=', validation_fraction] - ), - dsl.ConcatPlaceholder(items=['--test_fraction=', test_fraction]), - dsl.ConcatPlaceholder( - items=[ - '--tf_transform_execution_engine=', - tf_transform_execution_engine, - ] - ), - dsl.IfPresentPlaceholder( - input_name='tf_auto_transform_features', - then=dsl.ConcatPlaceholder( - items=[ - '--tf_auto_transform_features=', - tf_auto_transform_features, - ] - ), - ), - dsl.ConcatPlaceholder( - items=[ - '--tf_custom_transformation_definitions=', - tf_custom_transformation_definitions, - ] - ), - dsl.ConcatPlaceholder( - items=['--tf_transformations_path=', tf_transformations_path] - ), - dsl.ConcatPlaceholder( - items=[ - '--legacy_transformations_path=', - legacy_transformations_path, - ] - ), - dsl.ConcatPlaceholder( - items=['--data_source_csv_filenames=', data_source_csv_filenames] - ), - dsl.ConcatPlaceholder( - items=[ - '--data_source_bigquery_table_path=', - data_source_bigquery_table_path, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--bigquery_staging_full_dataset_id=', - bigquery_staging_full_dataset_id, - ] - ), - dsl.ConcatPlaceholder(items=['--target_column=', target_column]), - dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), - dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), - dsl.IfPresentPlaceholder( - input_name='model_type', - then=dsl.ConcatPlaceholder(items=['--model_type=', model_type]), - ), - dsl.ConcatPlaceholder( - items=[ - '--multimodal_image_columns=', - multimodal_image_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--multimodal_text_columns=', - multimodal_text_columns, - ] - ), - dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), - dsl.ConcatPlaceholder( - items=['--run_feature_selection=', run_feature_selection] - ), - dsl.ConcatPlaceholder( - items=[ - '--materialized_examples_format=', - materialized_examples_format, - ] - ), - dsl.ConcatPlaceholder( - items=['--max_selected_features=', max_selected_features] - ), - dsl.ConcatPlaceholder( - items=[ - '--feature_selection_staging_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection_staging_dir', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--feature_selection_algorithm=', - feature_selection_algorithm, - ] - ), - dsl.ConcatPlaceholder( - items=['--feature_ranking_path=', feature_ranking.uri] - ), - dsl.ConcatPlaceholder( - items=[ - '--error_file_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.txt', - ] - ), - dsl.ConcatPlaceholder( - items=['--stats_result_path=', dataset_stats.uri] - ), - dsl.ConcatPlaceholder( - items=['--transform_output_artifact_path=', transform_output.uri] - ), - dsl.ConcatPlaceholder( - items=[ - '--transform_output_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--materialized_examples_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--export_data_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/export', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--materialized_data_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized_data', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--materialized_data_artifact_path=', - materialized_data.uri, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--bigquery_train_split_uri_path=', - bigquery_train_split_uri, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--bigquery_validation_split_uri_path=', - bigquery_validation_split_uri, - ] - ), - dsl.ConcatPlaceholder( - items=['--bigquery_test_split_uri_path=', bigquery_test_split_uri] - ), - dsl.ConcatPlaceholder( - items=[ - '--bigquery_downsampled_test_split_uri_path=', - bigquery_downsampled_test_split_uri, - ] - ), - dsl.ConcatPlaceholder( - items=['--split_example_counts_path=', split_example_counts] - ), - dsl.ConcatPlaceholder( - items=['--instance_schema_path=', instance_schema.path] - ), - dsl.ConcatPlaceholder( - items=['--training_schema_path=', training_schema.path] - ), - f'--job_name=feature-transform-engine-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}', - dsl.ConcatPlaceholder(items=['--dataflow_project=', project]), - dsl.ConcatPlaceholder( - items=[ - '--dataflow_staging_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging', - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--dataflow_tmp_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp', - ] - ), - dsl.ConcatPlaceholder( - items=['--dataflow_max_num_workers=', dataflow_max_num_workers] - ), - dsl.ConcatPlaceholder( - items=['--dataflow_machine_type=', dataflow_machine_type] - ), - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', - dsl.ConcatPlaceholder( - items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] - ), - dsl.ConcatPlaceholder( - items=[ - '--dataflow_subnetwork_fully_qualified=', - dataflow_subnetwork, - ] - ), - dsl.ConcatPlaceholder( - items=['--dataflow_use_public_ips=', dataflow_use_public_ips] - ), - dsl.ConcatPlaceholder( - items=['--dataflow_service_account=', dataflow_service_account] - ), - dsl.ConcatPlaceholder( - items=['--dataflow_kms_key=', encryption_spec_key_name] - ), - dsl.ConcatPlaceholder( - items=['--autodetect_csv_schema=', autodetect_csv_schema] - ), - dsl.ConcatPlaceholder(items=['--gcp_resources_path=', gcp_resources]), - dsl.IfPresentPlaceholder( - input_name='group_columns', - then=dsl.ConcatPlaceholder( - items=['--group_columns=', group_columns] - ), - ), - dsl.IfPresentPlaceholder( - input_name='group_total_weight', - then=dsl.ConcatPlaceholder( - items=['--group_total_weight=', group_total_weight] - ), - ), - dsl.IfPresentPlaceholder( - input_name='temporal_total_weight', - then=dsl.ConcatPlaceholder( - items=['--temporal_total_weight=', temporal_total_weight] - ), - ), - dsl.IfPresentPlaceholder( - input_name='group_temporal_total_weight', - then=dsl.ConcatPlaceholder( - items=[ - '--group_temporal_total_weight=', - group_temporal_total_weight, - ] - ), - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py deleted file mode 100644 index a9b09479a8..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Tabnet Hyperparameter Tuning component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input - - -@dsl.container_component -def tabnet_hyperparameter_tuning_job( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: list, - max_trial_count: int, - parallel_trial_count: int, - instance_baseline: Input[Artifact], - metadata: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - transform_output: Input[Artifact], - training_schema_uri: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - instance_schema_uri: dsl.OutputPath(str), - prediction_schema_uri: dsl.OutputPath(str), - trials: dsl.OutputPath(str), - prediction_docker_uri_output: dsl.OutputPath(str), - execution_metrics: dsl.OutputPath(dict), - weight_column: Optional[str] = '', - enable_profiler: Optional[bool] = False, - cache_data: Optional[str] = 'auto', - seed: Optional[int] = 1, - eval_steps: Optional[int] = 0, - eval_frequency_secs: Optional[int] = 600, - max_failed_trial_count: Optional[int] = 0, - study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', - training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, - training_disk_spec: Optional[dict] = { - 'boot_disk_type': 'pd-ssd', - 'boot_disk_size_gb': 100, - }, - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Tunes TabNet hyperparameters using Vertex HyperparameterTuningJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". - weight_column: The weight column name. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - study_spec_metric_id: Metric to optimize, possible - values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. - training_disk_spec: The training disk spec. - instance_baseline: The path to a JSON file for baseline values. - metadata: Amount of time in seconds to run the trainer for. - materialized_train_split: The path to the materialized train split. - materialized_eval_split: The path to the materialized validation split. - transform_output: The path to transform output. - training_schema_uri: The path to the training schema. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training job. - instance_schema_uri: The path to the instance schema. - prediction_schema_uri: The path to the prediction schema. - trials: The path to the hyperparameter tuning trials - prediction_docker_uri_output: The URI of the prediction container. - execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', - ], - args=[ - '--type', - 'HyperparameterTuningJobWithMetrics', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--execution_metrics', - execution_metrics, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "tabnet-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "study_spec": {"metrics": [{"metric_id": "', - study_spec_metric_id, - '", "goal": "', - study_spec_metric_goal, - '"}], "parameters": ', - study_spec_parameters_override, - ', "algorithm": "', - study_spec_algorithm, - '", "measurement_selection_type": "', - study_spec_measurement_selection_type, - '"}, "max_trial_count": ', - max_trial_count, - ', "parallel_trial_count": ', - parallel_trial_count, - ', "max_failed_trial_count": ', - max_failed_trial_count, - ( - ', "trial_job_spec": {"worker_pool_specs":' - ' [{"replica_count":"' - ), - '1', - '", "machine_spec": ', - training_machine_spec, - ', "disk_spec": ', - training_disk_spec, - ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', - '", "args": ["--target_column=', - target_column, - '", "--weight_column=', - weight_column, - '", "--model_type=', - prediction_type, - '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - '", "--prediction_docker_uri_artifact_path=', - prediction_docker_uri_output, - '", "--baseline_path=', - instance_baseline.uri, - '", "--metadata_path=', - metadata.uri, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_schema_path=', - training_schema_uri.uri, - '", "--instance_schema_path=', - instance_schema_uri, - '", "--prediction_schema_path=', - prediction_schema_uri, - '", "--trials_path=', - trials, - '", "--job_dir=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--training_data_path=' - ), - materialized_train_split.uri, - '", "--validation_data_path=', - materialized_eval_split.uri, - '", "--enable_profiler=', - enable_profiler, - '", "--cache_data=', - cache_data, - '", "--seed=', - seed, - '", "--measurement_selection_type=', - study_spec_measurement_selection_type, - '", "--metric_goal=', - study_spec_metric_goal, - '", "--eval_steps=', - eval_steps, - '", "--eval_frequency_secs=', - eval_frequency_secs, - '", "--generate_feature_importance=true"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml deleted file mode 100644 index e687acd6bf..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +++ /dev/null @@ -1,4661 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-tabnet-hyperparameter-tuning-job -# Description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. -# Inputs: -# bigquery_staging_full_dataset_id: str [Default: ''] -# cache_data: str [Default: 'auto'] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# enable_profiler: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# eval_frequency_secs: int [Default: 600.0] -# eval_steps: int [Default: 0.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_selection_algorithm: str [Default: 'AMI'] -# location: str -# materialized_examples_format: str [Default: 'tfrecords_gzip'] -# max_failed_trial_count: int [Default: 0.0] -# max_selected_features: int [Default: -1.0] -# max_trial_count: int -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# parallel_trial_count: int -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# seed: int [Default: 1.0] -# stratified_split_key: str [Default: ''] -# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] -# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] -# study_spec_metric_goal: str -# study_spec_metric_id: str -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: ''] -# tf_transformations_path: str [Default: ''] -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# worker_pool_specs_override: list -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - model-evaluation - inputs: - artifacts: - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: TabNet Hyperparameter Tuning - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import - inputDefinitions: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - get-best-hyperparameter-tuning-job-trial - - model-upload - inputs: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - materialized_examples_format: - componentInputParameter: pipelinechannel--materialized_examples_format - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - model_type: - runtimeValue: - constant: neural_network - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transform_execution_engine: - componentInputParameter: pipelinechannel--tf_transform_execution_engine - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - get-best-hyperparameter-tuning-job-trial: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-best-hyperparameter-tuning-job-trial - dependentTasks: - - tabnet-hyperparameter-tuning-job - inputs: - parameters: - gcp_resources: - taskOutputParameter: - outputParameterKey: gcp_resources - producerTask: tabnet-hyperparameter-tuning-job - instance_schema_uri: - taskOutputParameter: - outputParameterKey: instance_schema_uri - producerTask: tabnet-hyperparameter-tuning-job - prediction_docker_uri: - taskOutputParameter: - outputParameterKey: prediction_docker_uri_output - producerTask: tabnet-hyperparameter-tuning-job - prediction_schema_uri: - taskOutputParameter: - outputParameterKey: prediction_schema_uri - producerTask: tabnet-hyperparameter-tuning-job - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - trials_dir: - taskOutputParameter: - outputParameterKey: trials - producerTask: tabnet-hyperparameter-tuning-job - taskInfo: - name: get-best-hyperparameter-tuning-job-trial - get-tabnet-study-spec-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-tabnet-study-spec-parameters - dependentTasks: - - training-configurator-and-validator - inputs: - artifacts: - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - parameters: - max_trial_count: - componentInputParameter: pipelinechannel--max_trial_count - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - taskInfo: - name: get-tabnet-study-spec-parameters - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-infra-validator - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - parse-worker-pool-specs-override: - cachingOptions: - enableCache: true - componentRef: - name: comp-parse-worker-pool-specs-override - inputs: - parameters: - worker_pool_specs_override: - componentInputParameter: pipelinechannel--worker_pool_specs_override - taskInfo: - name: parse-worker-pool-specs-override - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - tabnet-hyperparameter-tuning-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-tabnet-hyperparameter-tuning-job - dependentTasks: - - feature-transform-engine - - get-tabnet-study-spec-parameters - - parse-worker-pool-specs-override - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - cache_data: - componentInputParameter: pipelinechannel--cache_data - enable_profiler: - componentInputParameter: pipelinechannel--enable_profiler - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - eval_frequency_secs: - componentInputParameter: pipelinechannel--eval_frequency_secs - eval_steps: - componentInputParameter: pipelinechannel--eval_steps - location: - componentInputParameter: pipelinechannel--location - max_failed_trial_count: - componentInputParameter: pipelinechannel--max_failed_trial_count - max_trial_count: - componentInputParameter: pipelinechannel--max_trial_count - parallel_trial_count: - componentInputParameter: pipelinechannel--parallel_trial_count - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - seed: - componentInputParameter: pipelinechannel--seed - study_spec_algorithm: - componentInputParameter: pipelinechannel--study_spec_algorithm - study_spec_measurement_selection_type: - componentInputParameter: pipelinechannel--study_spec_measurement_selection_type - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - study_spec_metric_id: - componentInputParameter: pipelinechannel--study_spec_metric_id - study_spec_parameters_override: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-tabnet-study-spec-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - training_disk_spec: - taskOutputParameter: - outputParameterKey: training_disk_spec - producerTask: parse-worker-pool-specs-override - training_machine_spec: - taskOutputParameter: - outputParameterKey: training_machine_spec - producerTask: parse-worker-pool-specs-override - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: tabnet-hyperparameter-tuning-job - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--cache_data: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--enable_profiler: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eval_frequency_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--eval_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--materialized_examples_format: - parameterType: STRING - pipelinechannel--max_failed_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--max_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--parallel_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_algorithm: - parameterType: STRING - pipelinechannel--study_spec_measurement_selection_type: - parameterType: STRING - pipelinechannel--study_spec_metric_goal: - parameterType: STRING - pipelinechannel--study_spec_metric_id: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transform_execution_engine: - parameterType: STRING - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--worker_pool_specs_override: - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-get-best-hyperparameter-tuning-job-trial: - executorLabel: exec-get-best-hyperparameter-tuning-job-trial - inputDefinitions: - parameters: - gcp_resources: - description: Proto tracking the hyperparameter tuning job. - parameterType: STRING - instance_schema_uri: - defaultValue: '' - description: The instance schema uri. - isOptional: true - parameterType: STRING - prediction_docker_uri: - defaultValue: '' - description: The prediction docker container uri. - isOptional: true - parameterType: STRING - prediction_schema_uri: - defaultValue: '' - description: The prediction schema_uri. - isOptional: true - parameterType: STRING - read_value_from_file: - defaultValue: false - description: If true, read file to get the relevant value. - isOptional: true - parameterType: BOOLEAN - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - trials_dir: - defaultValue: '' - description: The path to the hyperparameter tuning trials. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-get-tabnet-study-spec-parameters: - executorLabel: exec-get-tabnet-study-spec-parameters - inputDefinitions: - artifacts: - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Metadata generated by example gen. - parameters: - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - prediction_type: - description: 'The type of prediction the model is to produce. - - ''classification'' or ''regression''.' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-parse-worker-pool-specs-override: - executorLabel: exec-parse-worker-pool-specs-override - inputDefinitions: - parameters: - worker_pool_specs_override: - description: 'The list of dictionaries for overriding training - - and evaluation worker pool specs.' - parameterType: LIST - outputDefinitions: - parameters: - eval_machine_spec: - description: The eval machine spec. - parameterType: STRUCT - eval_replica_count: - description: The replica count for eval. - parameterType: NUMBER_INTEGER - training_disk_spec: - description: The training disk spec. - parameterType: STRUCT - training_machine_spec: - description: The training machine spec. - parameterType: STRUCT - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-tabnet-hyperparameter-tuning-job: - executorLabel: exec-tabnet-hyperparameter-tuning-job - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to a JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized validation split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Amount of time in seconds to run the trainer for. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to transform output. - parameters: - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to - - ''auto'', caching is determined based on the dataset size.' - isOptional: true - parameterType: STRING - enable_profiler: - defaultValue: false - description: 'Enables profiling and saves a trace - - during evaluation.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and - - checkpointing will take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not - - specified or negative, it means run evaluation on the whole validation - - dataset. If set to 0, it means run evaluation for a fixed number of - - samples.' - isOptional: true - parameterType: NUMBER_INTEGER - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that - - need to be seen before failing the HyperparameterTuningJob. If set to - 0, - - Vertex AI decides how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - parallel_trial_count: - description: 'The desired number of trials to run - - in parallel.' - parameterType: NUMBER_INTEGER - prediction_type: - description: 'The type of prediction the model is to - - produce. "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for - - the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or - - ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement - - to use if/when the service automatically selects the final measurement - - from previously reported intermediate measurements. One of - - "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, - - possible values: "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize, possible - - values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', - ''auc'', ''precision'', ''recall''].' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries - - representing parameters to optimize. The dictionary key is the - - parameter_id, which is passed to training job as a command line - - argument, and the dictionary value is the parameter specification of the - - metric.' - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - training_disk_spec: - defaultValue: - boot_disk_size_gb: 100.0 - boot_disk_type: pd-ssd - description: The training disk spec. - isOptional: true - parameterType: STRUCT - training_machine_spec: - defaultValue: - machine_type: c2-standard-16 - description: 'The training machine - - spec. See https://cloud.google.com/compute/docs/machine-types for - - options.' - isOptional: true - parameterType: STRUCT - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - execution_metrics: - description: Core metrics in dictionary of hyperparameter tuning job execution. - parameterType: STRUCT - gcp_resources: - description: Serialized gcp_resources proto tracking the custom training - job. - parameterType: STRING - instance_schema_uri: - description: The path to the instance schema. - parameterType: STRING - prediction_docker_uri_output: - description: The URI of the prediction container. - parameterType: STRING - prediction_schema_uri: - description: The path to the prediction schema. - parameterType: STRING - trials: - description: The path to the hyperparameter tuning trials - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-get-best-hyperparameter-tuning-job-trial: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_best_hyperparameter_tuning_job_trial - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ - \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ - \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ - \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ - \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ - \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ - \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ - \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ - .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ - \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ - \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ - \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ - \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ - \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ - \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ - \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ - \ provided, read the file before continuing.\n if read_value_from_file:\n\ - \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ - \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ - \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ - \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ - \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ - \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ - \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ - \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ - \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ - \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ - \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ - \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ - \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ - \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ - \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ - \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ - \ for trial in response.trials:\n if trial.final_measurement:\n \ - \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ - \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ - \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ - \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ - \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ - \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ - \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ - \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ - \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ - \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ - \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ - \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim - exec-get-tabnet-study-spec-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_tabnet_study_spec_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_tabnet_study_spec_parameters(\n metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ - \ max_trial_count: int,\n prediction_type: str,\n study_spec_parameters_override:\ - \ list, # Required for KFP validation; pylint:disable=g-bare-generic\n\ - ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ - \ study_spec_parameters for a TabNet hyperparameter tuning job.\n\n Args:\n\ - \ metadata: Metadata generated by example gen.\n max_trial_count:\ - \ The desired total number of trials.\n prediction_type: The type of\ - \ prediction the model is to produce.\n 'classification' or 'regression'.\n\ - \ study_spec_parameters_override: List of dictionaries representing parameters\n\ - \ to optimize. The dictionary key is the parameter_id, which is passed\ - \ to\n training job as a command line argument, and the dictionary\ - \ value is the\n parameter specification of the metric.\n\n Returns:\n\ - \ List of final Vizier study_spec_parameters of type ParameterSpec.\n\ - \ \"\"\"\n # Define different search space constants\n tabnet_params_small_data_small_search_space\ - \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [5000, 10000, 15000, 20000, 25000, 30000]\n\ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ - \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ - \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 1000, 'max_value': 5000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ - \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'batch_momentum',\n \ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.125,\ - \ 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 4},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'class_weight',\n \ - \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'loss_function_type',\n 'categorical_value_spec': {'values':\ - \ ['weighted_cross_entropy']},\n },\n {\n 'parameter_id':\ - \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ - \ ['false']},\n },\n ]\n tabnet_params_small_data_medium_search_space\ - \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n\ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ - \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ - \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ - \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'batch_momentum',\n \ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ - \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'class_weight',\n \ - \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'loss_function_type',\n 'categorical_value_spec': {\n \ - \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ - \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ - \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ - \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ - \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ - \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ - \ 'categorical_value_spec': {'values': ['false']},\n },\n\ - \ ]\n tabnet_params_small_data_large_search_space = [\n {\n \ - \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ - \ 'values': [10000, 20000, 30000, 40000, 50000]\n \ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n \ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ - \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.00007,\ - \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n {\n\ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 50, 'max_value': 300},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 7},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ - \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ - \ 0.0000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'batch_momentum',\n \ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ - \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'class_weight',\n \ - \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'loss_function_type',\n 'categorical_value_spec': {\n \ - \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ - \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ - \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ - \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ - \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ - \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ - \ 'categorical_value_spec': {'values': ['false', 'true']},\n \ - \ },\n ]\n tabnet_params_medium_data_small_search_space = [\n \ - \ {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [10000, 20000, 30000, 40000, 50000]\n \ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [1024, 4096, 8192, 16384]},\n },\n {\n 'parameter_id':\ - \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ - \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 100, 'max_value': 300},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ - \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ - \ 'categorical_value_spec': {'values': ['false']},\n },\n\ - \ ]\n tabnet_params_medium_data_medium_search_space = [\n {\n \ - \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ - \ 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n \ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ - \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ - \ {'min_value': 0.00007, 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 50, 'max_value': 400},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ - \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ - \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ - \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ - \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ - \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ - \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ - \ ['false']},\n },\n ]\n tabnet_params_medium_data_large_search_space\ - \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ - \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ - \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ - \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 50, 'max_value': 500},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 2, 'max_value': 8},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ - \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ - \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ - \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ - \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ - \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ - \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ - \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ - \ ['false', 'true']},\n },\n ]\n tabnet_params_large_data_small_search_space\ - \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [8192, 16384, 32768]},\n },\n {\n 'parameter_id':\ - \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0002,\ - \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 100, 'max_value': 400},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 3, 'max_value': 6},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 10.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ - \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ - \ 'categorical_value_spec': {'values': ['false']},\n },\n\ - \ ]\n tabnet_params_large_data_medium_search_space = [\n {\n \ - \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ - \ 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n \ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [4096, 8192, 16384, 32768]},\n },\n {\n \ - \ 'parameter_id': 'learning_rate',\n 'double_value_spec': {'min_value':\ - \ 0.0001, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ - \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 200, 'max_value': 500},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 3, 'max_value': 7},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ - \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 4, 'max_value': 8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ - \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ - \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ - \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ - \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ - \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ - \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ - \ ['false']},\n },\n ]\n tabnet_params_large_data_large_search_space\ - \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ - \ {\n 'values': [50000, 70000, 90000, 110000, 130000, 150000]\n\ - \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ - \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ - \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ - \ {'values': [4096, 8192, 16384, 32768, 65536]},\n },\n {\n \ - \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ - \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'large_category_dim',\n \ - \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ - \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ - \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ - \ 'integer_value_spec': {'min_value': 100, 'max_value': 700},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ - \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ - \ 'integer_value_spec': {'min_value': 3, 'max_value': 8},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ - \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ - \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ - \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n {\n 'parameter_id': 'decay_every',\n \ - \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ - \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ - \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ - \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ - \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ - \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ - \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ - \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ - \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'loss_function_type',\n \ - \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ - \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ - \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ - \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ - \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ - \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ - \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ - \ ['false', 'true']},\n },\n ]\n search_spaces = {\n 'tabnet_params_small_data_small_search_space':\ - \ (\n tabnet_params_small_data_small_search_space\n ),\n \ - \ 'tabnet_params_small_data_medium_search_space': (\n tabnet_params_small_data_medium_search_space\n\ - \ ),\n 'tabnet_params_small_data_large_search_space': (\n \ - \ tabnet_params_small_data_large_search_space\n ),\n 'tabnet_params_medium_data_small_search_space':\ - \ (\n tabnet_params_medium_data_small_search_space\n ),\n\ - \ 'tabnet_params_medium_data_medium_search_space': (\n tabnet_params_medium_data_medium_search_space\n\ - \ ),\n 'tabnet_params_medium_data_large_search_space': (\n \ - \ tabnet_params_medium_data_large_search_space\n ),\n 'tabnet_params_large_data_small_search_space':\ - \ (\n tabnet_params_large_data_small_search_space\n ),\n \ - \ 'tabnet_params_large_data_medium_search_space': (\n tabnet_params_large_data_medium_search_space\n\ - \ ),\n 'tabnet_params_large_data_large_search_space': (\n \ - \ tabnet_params_large_data_large_search_space\n ),\n }\n\n #\ - \ pylint: disable=g-import-not-at-top,import-outside-toplevel\n import\ - \ json\n import warnings\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \ with open(metadata, 'r') as f:\n metadata_path = f.read()\n metadata\ - \ = json.loads(metadata_path)\n # Calculate dataset size bucket. One of\ - \ 'small' (< 1M rows),\n # 'medium' (1M - 100M rows), or 'large' (> 100M\ - \ rows)\n num_rows = (\n metadata['num_examples']['train']\n \ - \ + metadata['num_examples']['valid']\n + metadata['num_examples']['test']\n\ - \ )\n dataset_size_bucket = 'medium'\n if num_rows < 10000000:\n dataset_size_bucket\ - \ = 'small'\n elif num_rows > 100000000:\n dataset_size_bucket = 'large'\n\ - \n # Calculate search space bucket using max_trial_count.\n # One of 'small'\ - \ (< 10), medium (1 - 100), and large (> 100).\n search_space = 'medium'\n\ - \ if max_trial_count < 10:\n search_space = 'small'\n elif max_trial_count\ - \ > 100:\n search_space = 'large'\n\n # Get params for classification.\n\ - \ params = search_spaces[\n f'tabnet_params_{dataset_size_bucket}_data_{search_space}_search_space'\n\ - \ ]\n\n # Format for regression. To get regression study_spec_parameters,\ - \ we need\n # to set `loss_function_type` to \u2018mae\u2019 (\u2018mae\u2019\ - \ and \u2018mse\u2019 for 'large'\n # search space), remove the `alpha_focal_loss`,\ - \ `gamma_focal_loss`\n # and `class_weight` parameters and increase the\ - \ max for\n # `sparsity_loss_weight` to 100.\n if prediction_type == 'regression':\n\ - \ formatted_params = []\n for param in params:\n if param['parameter_id']\ - \ in [\n 'alpha_focal_loss',\n 'gamma_focal_loss',\n \ - \ 'class_weight',\n ]:\n continue\n elif param['parameter_id']\ - \ == 'sparsity_loss_weight':\n param['double_value_spec']['max_value']\ - \ = 100\n elif param['parameter_id'] == 'loss_function_type':\n \ - \ if search_space == 'large':\n param['categorical_value_spec']['values']\ - \ = ['mae', 'mse']\n else:\n param['categorical_value_spec']['values']\ - \ = ['mae']\n formatted_params.append(param)\n else:\n formatted_params\ - \ = params\n\n # Create parameter_id -> parameter_config dictionary for\ - \ params to override\n # and override parameters.\n override_params =\ - \ {}\n for param in study_spec_parameters_override:\n override_params[param['parameter_id']]\ - \ = param\n\n study_spec_parameters = []\n for param in formatted_params:\n\ - \ study_spec_parameters.append(\n override_params.get(param['parameter_id'],\ - \ param)\n )\n\n extra_overrides = set(override_params) - set(\n \ - \ p['parameter_id'] for p in params\n )\n if extra_overrides:\n extra_override_str\ - \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ - \ {extra_override_str} were not found in the params and '\n 'will\ - \ be ignored.'\n )\n\n return study_spec_parameters\n\n" - image: python:3.7-slim - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-parse-worker-pool-specs-override: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _parse_worker_pool_specs_override - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ - \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ - \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ - \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ - \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ - \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ - \ The list of dictionaries for overriding training\n and evaluation\ - \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ - \ machine spec.\n training_disk_spec: The training disk spec.\n \ - \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ - \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ - \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ - \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ - \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ - \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ - \ training_machine_spec = worker_pool_specs_override[0].get(\n \ - \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ - \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ - \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ - \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ - \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ - \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ - \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'training_machine_spec',\n \ - \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ - \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ - \ eval_machine_spec,\n eval_replica_count,\n )\n\n" - image: python:3.7 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-tabnet-hyperparameter-tuning-job: - container: - args: - - --type - - HyperparameterTuningJobWithMetrics - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --execution_metrics - - '{{$.outputs.parameters[''execution_metrics''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"tabnet-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", - "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", - "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", - ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", - "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", - "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", - ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", - ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", - ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", - "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", - ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", - "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", - "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", - "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", - "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", - "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", - "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", - "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", - "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", - "{{$.inputs.parameters[''seed'']}}", "\", \"--measurement_selection_type=", - "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", - \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", - "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", - "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. - name: automl-tabular-tabnet-hyperparameter-tuning-job -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--cache_data: - componentInputParameter: cache_data - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--enable_profiler: - componentInputParameter: enable_profiler - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eval_frequency_secs: - componentInputParameter: eval_frequency_secs - pipelinechannel--eval_steps: - componentInputParameter: eval_steps - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--materialized_examples_format: - componentInputParameter: materialized_examples_format - pipelinechannel--max_failed_trial_count: - componentInputParameter: max_failed_trial_count - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--max_trial_count: - componentInputParameter: max_trial_count - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--parallel_trial_count: - componentInputParameter: parallel_trial_count - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_algorithm: - componentInputParameter: study_spec_algorithm - pipelinechannel--study_spec_measurement_selection_type: - componentInputParameter: study_spec_measurement_selection_type - pipelinechannel--study_spec_metric_goal: - componentInputParameter: study_spec_metric_goal - pipelinechannel--study_spec_metric_id: - componentInputParameter: study_spec_metric_id - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transform_execution_engine: - componentInputParameter: tf_transform_execution_engine - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--worker_pool_specs_override: - componentInputParameter: worker_pool_specs_override - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Staging directory for BigQuery tables. - isOptional: true - parameterType: STRING - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to ''auto'', caching is - - determined based on the dataset size.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - enable_profiler: - defaultValue: false - description: Enables profiling and saves a trace during evaluation. - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and checkpointing will - - take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not specified or - - negative, it means run evaluation on the whole validation dataset. If set - - to 0, it means run evaluation for a fixed number of samples.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format for the materialized examples. - isOptional: true - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that need to be seen - - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - - how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - parallel_trial_count: - description: The desired number of trials to run in parallel. - parameterType: NUMBER_INTEGER - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for the study. One of - - ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: ' Which measurement to use if/when the - - service automatically selects the final measurement from previously - - reported intermediate measurements. One of "BEST_MEASUREMENT" or - - "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize, possible values: [ ''loss'', - - ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', - - ''recall''].' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: List of auto transform features. - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: '' - description: 'Execution engine to run TF-based - - transformations. Currently supports "dataflow" or "bigquery"' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - worker_pool_specs_override: - description: 'The dictionary for overriding training and - - evaluation worker pool specs. The dictionary should be of format - - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py deleted file mode 100644 index e0ceeb08f9..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Tabnet Trainer component spec.""" - -from typing import Optional - -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def tabnet_trainer( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - learning_rate: float, - instance_baseline: Input[Artifact], - metadata: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - transform_output: Input[Artifact], - training_schema_uri: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument - weight_column: Optional[str] = '', - max_steps: Optional[int] = -1, - max_train_secs: Optional[int] = -1, - large_category_dim: Optional[int] = 1, - large_category_thresh: Optional[int] = 300, - yeo_johnson_transform: Optional[bool] = True, - feature_dim: Optional[int] = 64, - feature_dim_ratio: Optional[float] = 0.5, - num_decision_steps: Optional[int] = 6, - relaxation_factor: Optional[float] = 1.5, - decay_every: Optional[float] = 100, - decay_rate: Optional[float] = 0.95, - gradient_thresh: Optional[float] = 2000, - sparsity_loss_weight: Optional[float] = 1e-05, - batch_momentum: Optional[float] = 0.95, - batch_size_ratio: Optional[float] = 0.25, - num_transformer_layers: Optional[int] = 4, - num_transformer_layers_ratio: Optional[float] = 0.25, - class_weight: Optional[float] = 1.0, - loss_function_type: Optional[str] = 'default', - alpha_focal_loss: Optional[float] = 0.25, - gamma_focal_loss: Optional[float] = 2.0, - enable_profiler: Optional[bool] = False, - cache_data: Optional[str] = 'auto', - seed: Optional[int] = 1, - eval_steps: Optional[int] = 0, - batch_size: Optional[int] = 100, - measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', - optimization_metric: Optional[str] = '', - eval_frequency_secs: Optional[int] = 600, - training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, - training_disk_spec: Optional[dict] = { - 'boot_disk_type': 'pd-ssd', - 'boot_disk_size_gb': 100, - }, - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Trains a TabNet model using Vertex CustomJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". - weight_column: The weight column name. - max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the - trainer for. - learning_rate: The learning rate used by the linear optimizer. - large_category_dim: Embedding dimension for categorical - feature with large number of categories. - large_category_thresh: Threshold for number of categories - to apply large_category_dim embedding dimension to. - yeo_johnson_transform: Enables trainable Yeo-Johnson - power transform. - feature_dim: Dimensionality of the hidden representation - in feature transformation block. - feature_dim_ratio: The ratio of output dimension - (dimensionality of the outputs of each decision step) to feature - dimension. - num_decision_steps: Number of sequential decision steps. - relaxation_factor: Relaxation factor that promotes the - reuse of each feature at different decision steps. When it is 1, a - feature is enforced to be used only at one decision step and as it - increases, more flexibility is provided to use a feature at multiple - decision steps. - decay_every: Number of iterations for periodically - applying learning rate decaying. - decay_rate: Learning rate decaying. - gradient_thresh: Threshold for the norm of gradients for clipping. - sparsity_loss_weight: Weight of the loss for sparsity - regularization (increasing it will yield more sparse feature selection). - batch_momentum: Momentum in ghost batch normalization. - batch_size_ratio: The ratio of virtual batch size (size - of the ghost batch normalization) to batch size. - num_transformer_layers: The number of transformer layers - for each decision step. used only at one decision step and as it - increases, more flexibility is provided to use a feature at multiple - decision steps. - num_transformer_layers_ratio: The ratio of shared - transformer layer to transformer layers. - class_weight: The class weight is used to computes a - weighted cross entropy which is helpful in classify imbalanced dataset. - Only used for classification. - loss_function_type: Loss function type. Loss function in - classification [cross_entropy, weighted_cross_entropy, focal_loss], - default is cross_entropy. Loss function in regression: [rmse, mae, mse], - default is mse. - alpha_focal_loss: Alpha value (balancing factor) in - focal_loss function. Only used for classification. - gamma_focal_loss: Gamma value (modulating factor) for - focal loss for focal loss. Only used for classification. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - batch_size: Batch size for training. - measurement_selection_type: Which measurement to use - if/when the service automatically selects the final measurement from - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - or "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. - training_disk_spec: The training disk spec. - instance_baseline: The path to a JSON file for baseline values. - metadata: Amount of time in seconds to run the trainer for. - materialized_train_split: The path to the materialized train split. - materialized_eval_split: The path to the materialized validation split. - transform_output: The path to transform output. - training_schema_uri: The path to the training schema. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training job. - unmanaged_container_model: The UnmanagedContainerModel artifact. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "tabnet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', - '1', - '", "machine_spec": ', - training_machine_spec, - ', "disk_spec": ', - training_disk_spec, - ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', - '", "args": ["--target_column=', - target_column, - '", "--weight_column=', - weight_column, - '", "--model_type=', - prediction_type, - '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - '", "--baseline_path=', - instance_baseline.uri, - '", "--metadata_path=', - metadata.uri, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_schema_path=', - training_schema_uri.uri, - '", "--job_dir=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--training_data_path=' - ), - materialized_train_split.uri, - '", "--validation_data_path=', - materialized_eval_split.uri, - '", "--max_steps=', - max_steps, - '", "--max_train_secs=', - max_train_secs, - '", "--learning_rate=', - learning_rate, - '", "--large_category_dim=', - large_category_dim, - '", "--large_category_thresh=', - large_category_thresh, - '", "--yeo_johnson_transform=', - yeo_johnson_transform, - '", "--feature_dim=', - feature_dim, - '", "--feature_dim_ratio=', - feature_dim_ratio, - '", "--num_decision_steps=', - num_decision_steps, - '", "--relaxation_factor=', - relaxation_factor, - '", "--decay_every=', - decay_every, - '", "--decay_rate=', - decay_rate, - '", "--gradient_thresh=', - gradient_thresh, - '", "--sparsity_loss_weight=', - sparsity_loss_weight, - '", "--batch_momentum=', - batch_momentum, - '", "--batch_size_ratio=', - batch_size_ratio, - '", "--num_transformer_layers=', - num_transformer_layers, - '", "--num_transformer_layers_ratio=', - num_transformer_layers_ratio, - '", "--class_weight=', - class_weight, - '", "--loss_function_type=', - loss_function_type, - '", "--alpha_focal_loss=', - alpha_focal_loss, - '", "--gamma_focal_loss=', - gamma_focal_loss, - '", "--enable_profiler=', - enable_profiler, - '", "--cache_data=', - cache_data, - '", "--seed=', - seed, - '", "--eval_steps=', - eval_steps, - '", "--batch_size=', - batch_size, - '", "--measurement_selection_type=', - measurement_selection_type, - '", "--optimization_metric=', - optimization_metric, - '", "--eval_frequency_secs=', - eval_frequency_secs, - ( - '", "--generate_feature_importance=true",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml deleted file mode 100644 index 32f5b41c9e..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +++ /dev/null @@ -1,4302 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-tabnet-trainer -# Description: The TabNet training pipeline. -# Inputs: -# alpha_focal_loss: float [Default: 0.25] -# batch_momentum: float [Default: 0.95] -# batch_size: int [Default: 100.0] -# batch_size_ratio: float [Default: 0.25] -# bigquery_staging_full_dataset_id: str [Default: ''] -# cache_data: str [Default: 'auto'] -# class_weight: float [Default: 1.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# decay_every: float [Default: 100.0] -# decay_rate: float [Default: 0.95] -# enable_profiler: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# eval_frequency_secs: int [Default: 600.0] -# eval_steps: int [Default: 0.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_dim: int [Default: 64.0] -# feature_dim_ratio: float [Default: 0.5] -# feature_selection_algorithm: str [Default: 'AMI'] -# gamma_focal_loss: float [Default: 2.0] -# gradient_thresh: float [Default: 2000.0] -# large_category_dim: int [Default: 1.0] -# large_category_thresh: int [Default: 300.0] -# learning_rate: float -# location: str -# loss_function_type: str [Default: 'default'] -# materialized_examples_format: str [Default: 'tfrecords_gzip'] -# max_selected_features: int [Default: -1.0] -# max_steps: int [Default: -1.0] -# max_train_secs: int [Default: -1.0] -# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# num_decision_steps: int [Default: 6.0] -# num_transformer_layers: int [Default: 4.0] -# num_transformer_layers_ratio: float [Default: 0.25] -# optimization_metric: str [Default: ''] -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# relaxation_factor: float [Default: 1.5] -# root_dir: str -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# seed: int [Default: 1.0] -# sparsity_loss_weight: float [Default: 1e-05] -# stratified_split_key: str [Default: ''] -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: ''] -# tf_transformations_path: str [Default: ''] -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# worker_pool_specs_override: list -# yeo_johnson_transform: bool [Default: True] -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--tabnet-trainer-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - model-evaluation - inputs: - artifacts: - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_path: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - dataset_type: - runtimeValue: - constant: bigquery - display_name: - runtimeValue: - constant: TabNet Trainer - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import - inputDefinitions: - artifacts: - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - pipelinechannel--tabnet-trainer-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - tabnet-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: tabnet-trainer - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - model-upload - - tabnet-trainer - inputs: - artifacts: - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - pipelinechannel--tabnet-trainer-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: tabnet-trainer - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - materialized_examples_format: - componentInputParameter: pipelinechannel--materialized_examples_format - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - model_type: - runtimeValue: - constant: neural_network - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transform_execution_engine: - componentInputParameter: pipelinechannel--tf_transform_execution_engine - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-infra-validator - - tabnet-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: tabnet-trainer - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - parse-worker-pool-specs-override: - cachingOptions: - enableCache: true - componentRef: - name: comp-parse-worker-pool-specs-override - inputs: - parameters: - worker_pool_specs_override: - componentInputParameter: pipelinechannel--worker_pool_specs_override - taskInfo: - name: parse-worker-pool-specs-override - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - tabnet-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-tabnet-trainer - dependentTasks: - - feature-transform-engine - - parse-worker-pool-specs-override - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - alpha_focal_loss: - componentInputParameter: pipelinechannel--alpha_focal_loss - batch_momentum: - componentInputParameter: pipelinechannel--batch_momentum - batch_size: - componentInputParameter: pipelinechannel--batch_size - batch_size_ratio: - componentInputParameter: pipelinechannel--batch_size_ratio - cache_data: - componentInputParameter: pipelinechannel--cache_data - class_weight: - componentInputParameter: pipelinechannel--class_weight - decay_every: - componentInputParameter: pipelinechannel--decay_every - decay_rate: - componentInputParameter: pipelinechannel--decay_rate - enable_profiler: - componentInputParameter: pipelinechannel--enable_profiler - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - eval_frequency_secs: - componentInputParameter: pipelinechannel--eval_frequency_secs - eval_steps: - componentInputParameter: pipelinechannel--eval_steps - feature_dim: - componentInputParameter: pipelinechannel--feature_dim - feature_dim_ratio: - componentInputParameter: pipelinechannel--feature_dim_ratio - gamma_focal_loss: - componentInputParameter: pipelinechannel--gamma_focal_loss - gradient_thresh: - componentInputParameter: pipelinechannel--gradient_thresh - large_category_dim: - componentInputParameter: pipelinechannel--large_category_dim - large_category_thresh: - componentInputParameter: pipelinechannel--large_category_thresh - learning_rate: - componentInputParameter: pipelinechannel--learning_rate - location: - componentInputParameter: pipelinechannel--location - loss_function_type: - componentInputParameter: pipelinechannel--loss_function_type - max_steps: - componentInputParameter: pipelinechannel--max_steps - max_train_secs: - componentInputParameter: pipelinechannel--max_train_secs - measurement_selection_type: - componentInputParameter: pipelinechannel--measurement_selection_type - num_decision_steps: - componentInputParameter: pipelinechannel--num_decision_steps - num_transformer_layers: - componentInputParameter: pipelinechannel--num_transformer_layers - num_transformer_layers_ratio: - componentInputParameter: pipelinechannel--num_transformer_layers_ratio - optimization_metric: - componentInputParameter: pipelinechannel--optimization_metric - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - relaxation_factor: - componentInputParameter: pipelinechannel--relaxation_factor - root_dir: - componentInputParameter: pipelinechannel--root_dir - seed: - componentInputParameter: pipelinechannel--seed - sparsity_loss_weight: - componentInputParameter: pipelinechannel--sparsity_loss_weight - target_column: - componentInputParameter: pipelinechannel--target_column - training_disk_spec: - taskOutputParameter: - outputParameterKey: training_disk_spec - producerTask: parse-worker-pool-specs-override - training_machine_spec: - taskOutputParameter: - outputParameterKey: training_machine_spec - producerTask: parse-worker-pool-specs-override - weight_column: - componentInputParameter: pipelinechannel--weight_column - yeo_johnson_transform: - componentInputParameter: pipelinechannel--yeo_johnson_transform - taskInfo: - name: tabnet-trainer - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - inputDefinitions: - parameters: - pipelinechannel--alpha_focal_loss: - parameterType: NUMBER_DOUBLE - pipelinechannel--batch_momentum: - parameterType: NUMBER_DOUBLE - pipelinechannel--batch_size: - parameterType: NUMBER_INTEGER - pipelinechannel--batch_size_ratio: - parameterType: NUMBER_DOUBLE - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--cache_data: - parameterType: STRING - pipelinechannel--class_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--decay_every: - parameterType: NUMBER_DOUBLE - pipelinechannel--decay_rate: - parameterType: NUMBER_DOUBLE - pipelinechannel--enable_profiler: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eval_frequency_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--eval_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_dim: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_dim_ratio: - parameterType: NUMBER_DOUBLE - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--gamma_focal_loss: - parameterType: NUMBER_DOUBLE - pipelinechannel--gradient_thresh: - parameterType: NUMBER_DOUBLE - pipelinechannel--large_category_dim: - parameterType: NUMBER_INTEGER - pipelinechannel--large_category_thresh: - parameterType: NUMBER_INTEGER - pipelinechannel--learning_rate: - parameterType: NUMBER_DOUBLE - pipelinechannel--location: - parameterType: STRING - pipelinechannel--loss_function_type: - parameterType: STRING - pipelinechannel--materialized_examples_format: - parameterType: STRING - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--max_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--max_train_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--measurement_selection_type: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--num_decision_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--num_transformer_layers: - parameterType: NUMBER_INTEGER - pipelinechannel--num_transformer_layers_ratio: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_metric: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--relaxation_factor: - parameterType: NUMBER_DOUBLE - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--sparsity_loss_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transform_execution_engine: - parameterType: STRING - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--worker_pool_specs_override: - parameterType: LIST - pipelinechannel--yeo_johnson_transform: - parameterType: BOOLEAN - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-parse-worker-pool-specs-override: - executorLabel: exec-parse-worker-pool-specs-override - inputDefinitions: - parameters: - worker_pool_specs_override: - description: 'The list of dictionaries for overriding training - - and evaluation worker pool specs.' - parameterType: LIST - outputDefinitions: - parameters: - eval_machine_spec: - description: The eval machine spec. - parameterType: STRUCT - eval_replica_count: - description: The replica count for eval. - parameterType: NUMBER_INTEGER - training_disk_spec: - description: The training disk spec. - parameterType: STRUCT - training_machine_spec: - description: The training machine spec. - parameterType: STRUCT - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-tabnet-trainer: - executorLabel: exec-tabnet-trainer - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to a JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized validation split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Amount of time in seconds to run the trainer for. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to transform output. - parameters: - alpha_focal_loss: - defaultValue: 0.25 - description: 'Alpha value (balancing factor) in - - focal_loss function. Only used for classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - batch_momentum: - defaultValue: 0.95 - description: Momentum in ghost batch normalization. - isOptional: true - parameterType: NUMBER_DOUBLE - batch_size: - defaultValue: 100.0 - description: Batch size for training. - isOptional: true - parameterType: NUMBER_INTEGER - batch_size_ratio: - defaultValue: 0.25 - description: 'The ratio of virtual batch size (size - - of the ghost batch normalization) to batch size.' - isOptional: true - parameterType: NUMBER_DOUBLE - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to - - ''auto'', caching is determined based on the dataset size.' - isOptional: true - parameterType: STRING - class_weight: - defaultValue: 1.0 - description: 'The class weight is used to computes a - - weighted cross entropy which is helpful in classify imbalanced dataset. - - Only used for classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - decay_every: - defaultValue: 100.0 - description: 'Number of iterations for periodically - - applying learning rate decaying.' - isOptional: true - parameterType: NUMBER_DOUBLE - decay_rate: - defaultValue: 0.95 - description: Learning rate decaying. - isOptional: true - parameterType: NUMBER_DOUBLE - enable_profiler: - defaultValue: false - description: 'Enables profiling and saves a trace - - during evaluation.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and - - checkpointing will take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not - - specified or negative, it means run evaluation on the whole validation - - dataset. If set to 0, it means run evaluation for a fixed number of - - samples.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_dim: - defaultValue: 64.0 - description: 'Dimensionality of the hidden representation - - in feature transformation block.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_dim_ratio: - defaultValue: 0.5 - description: 'The ratio of output dimension - - (dimensionality of the outputs of each decision step) to feature - - dimension.' - isOptional: true - parameterType: NUMBER_DOUBLE - gamma_focal_loss: - defaultValue: 2.0 - description: 'Gamma value (modulating factor) for - - focal loss for focal loss. Only used for classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - gradient_thresh: - defaultValue: 2000.0 - description: Threshold for the norm of gradients for clipping. - isOptional: true - parameterType: NUMBER_DOUBLE - large_category_dim: - defaultValue: 1.0 - description: 'Embedding dimension for categorical - - feature with large number of categories.' - isOptional: true - parameterType: NUMBER_INTEGER - large_category_thresh: - defaultValue: 300.0 - description: 'Threshold for number of categories - - to apply large_category_dim embedding dimension to.' - isOptional: true - parameterType: NUMBER_INTEGER - learning_rate: - description: The learning rate used by the linear optimizer. - parameterType: NUMBER_DOUBLE - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - loss_function_type: - defaultValue: default - description: 'Loss function type. Loss function in - - classification [cross_entropy, weighted_cross_entropy, focal_loss], - - default is cross_entropy. Loss function in regression: [rmse, mae, mse], - - default is mse.' - isOptional: true - parameterType: STRING - max_steps: - defaultValue: -1.0 - description: Number of steps to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - max_train_secs: - defaultValue: -1.0 - description: 'Amount of time in seconds to run the - - trainer for.' - isOptional: true - parameterType: NUMBER_INTEGER - measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement to use - - if/when the service automatically selects the final measurement from - - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - - or "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - num_decision_steps: - defaultValue: 6.0 - description: Number of sequential decision steps. - isOptional: true - parameterType: NUMBER_INTEGER - num_transformer_layers: - defaultValue: 4.0 - description: 'The number of transformer layers - - for each decision step. used only at one decision step and as it - - increases, more flexibility is provided to use a feature at multiple - - decision steps.' - isOptional: true - parameterType: NUMBER_INTEGER - num_transformer_layers_ratio: - defaultValue: 0.25 - description: 'The ratio of shared - - transformer layer to transformer layers.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_metric: - defaultValue: '' - description: 'Optimization metric used for - - `measurement_selection_type`. Default is "rmse" for regression and "auc" - - for classification.' - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to - - produce. "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - relaxation_factor: - defaultValue: 1.5 - description: 'Relaxation factor that promotes the - - reuse of each feature at different decision steps. When it is 1, a - - feature is enforced to be used only at one decision step and as it - - increases, more flexibility is provided to use a feature at multiple - - decision steps.' - isOptional: true - parameterType: NUMBER_DOUBLE - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - sparsity_loss_weight: - defaultValue: 1.0e-05 - description: 'Weight of the loss for sparsity - - regularization (increasing it will yield more sparse feature selection).' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - description: The target column name. - parameterType: STRING - training_disk_spec: - defaultValue: - boot_disk_size_gb: 100.0 - boot_disk_type: pd-ssd - description: The training disk spec. - isOptional: true - parameterType: STRUCT - training_machine_spec: - defaultValue: - machine_type: c2-standard-16 - description: 'The training machine - - spec. See https://cloud.google.com/compute/docs/machine-types for - - options.' - isOptional: true - parameterType: STRUCT - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - yeo_johnson_transform: - defaultValue: true - description: 'Enables trainable Yeo-Johnson - - power transform.' - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: The UnmanagedContainerModel artifact. - parameters: - gcp_resources: - description: Serialized gcp_resources proto tracking the custom training - job. - parameterType: STRING - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-parse-worker-pool-specs-override: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _parse_worker_pool_specs_override - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ - \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ - \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ - \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ - \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ - \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ - \ The list of dictionaries for overriding training\n and evaluation\ - \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ - \ machine spec.\n training_disk_spec: The training disk spec.\n \ - \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ - \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ - \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ - \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ - \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ - \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ - \ training_machine_spec = worker_pool_specs_override[0].get(\n \ - \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ - \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ - \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ - \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ - \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ - \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ - \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'training_machine_spec',\n \ - \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ - \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ - \ eval_machine_spec,\n eval_replica_count,\n )\n\n" - image: python:3.7 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-tabnet-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"tabnet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", - "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", - ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", - "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", - "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", - "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", - "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", - "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--large_category_dim=", - "{{$.inputs.parameters[''large_category_dim'']}}", "\", \"--large_category_thresh=", - "{{$.inputs.parameters[''large_category_thresh'']}}", "\", \"--yeo_johnson_transform=", - "{{$.inputs.parameters[''yeo_johnson_transform'']}}", "\", \"--feature_dim=", - "{{$.inputs.parameters[''feature_dim'']}}", "\", \"--feature_dim_ratio=", - "{{$.inputs.parameters[''feature_dim_ratio'']}}", "\", \"--num_decision_steps=", - "{{$.inputs.parameters[''num_decision_steps'']}}", "\", \"--relaxation_factor=", - "{{$.inputs.parameters[''relaxation_factor'']}}", "\", \"--decay_every=", - "{{$.inputs.parameters[''decay_every'']}}", "\", \"--decay_rate=", "{{$.inputs.parameters[''decay_rate'']}}", - "\", \"--gradient_thresh=", "{{$.inputs.parameters[''gradient_thresh'']}}", - "\", \"--sparsity_loss_weight=", "{{$.inputs.parameters[''sparsity_loss_weight'']}}", - "\", \"--batch_momentum=", "{{$.inputs.parameters[''batch_momentum'']}}", - "\", \"--batch_size_ratio=", "{{$.inputs.parameters[''batch_size_ratio'']}}", - "\", \"--num_transformer_layers=", "{{$.inputs.parameters[''num_transformer_layers'']}}", - "\", \"--num_transformer_layers_ratio=", "{{$.inputs.parameters[''num_transformer_layers_ratio'']}}", - "\", \"--class_weight=", "{{$.inputs.parameters[''class_weight'']}}", "\", - \"--loss_function_type=", "{{$.inputs.parameters[''loss_function_type'']}}", - "\", \"--alpha_focal_loss=", "{{$.inputs.parameters[''alpha_focal_loss'']}}", - "\", \"--gamma_focal_loss=", "{{$.inputs.parameters[''gamma_focal_loss'']}}", - "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", - "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", - "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", - "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", - "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", - "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", - "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\", - \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 -pipelineInfo: - description: The TabNet training pipeline. - name: automl-tabular-tabnet-trainer -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--alpha_focal_loss: - componentInputParameter: alpha_focal_loss - pipelinechannel--batch_momentum: - componentInputParameter: batch_momentum - pipelinechannel--batch_size: - componentInputParameter: batch_size - pipelinechannel--batch_size_ratio: - componentInputParameter: batch_size_ratio - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--cache_data: - componentInputParameter: cache_data - pipelinechannel--class_weight: - componentInputParameter: class_weight - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--decay_every: - componentInputParameter: decay_every - pipelinechannel--decay_rate: - componentInputParameter: decay_rate - pipelinechannel--enable_profiler: - componentInputParameter: enable_profiler - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eval_frequency_secs: - componentInputParameter: eval_frequency_secs - pipelinechannel--eval_steps: - componentInputParameter: eval_steps - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_dim: - componentInputParameter: feature_dim - pipelinechannel--feature_dim_ratio: - componentInputParameter: feature_dim_ratio - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--gamma_focal_loss: - componentInputParameter: gamma_focal_loss - pipelinechannel--gradient_thresh: - componentInputParameter: gradient_thresh - pipelinechannel--large_category_dim: - componentInputParameter: large_category_dim - pipelinechannel--large_category_thresh: - componentInputParameter: large_category_thresh - pipelinechannel--learning_rate: - componentInputParameter: learning_rate - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--loss_function_type: - componentInputParameter: loss_function_type - pipelinechannel--materialized_examples_format: - componentInputParameter: materialized_examples_format - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--max_steps: - componentInputParameter: max_steps - pipelinechannel--max_train_secs: - componentInputParameter: max_train_secs - pipelinechannel--measurement_selection_type: - componentInputParameter: measurement_selection_type - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--num_decision_steps: - componentInputParameter: num_decision_steps - pipelinechannel--num_transformer_layers: - componentInputParameter: num_transformer_layers - pipelinechannel--num_transformer_layers_ratio: - componentInputParameter: num_transformer_layers_ratio - pipelinechannel--optimization_metric: - componentInputParameter: optimization_metric - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--relaxation_factor: - componentInputParameter: relaxation_factor - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--sparsity_loss_weight: - componentInputParameter: sparsity_loss_weight - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transform_execution_engine: - componentInputParameter: tf_transform_execution_engine - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--worker_pool_specs_override: - componentInputParameter: worker_pool_specs_override - pipelinechannel--yeo_johnson_transform: - componentInputParameter: yeo_johnson_transform - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - alpha_focal_loss: - defaultValue: 0.25 - description: 'Alpha value (balancing factor) in focal_loss function. - - Only used for classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - batch_momentum: - defaultValue: 0.95 - description: Momentum in ghost batch normalization. - isOptional: true - parameterType: NUMBER_DOUBLE - batch_size: - defaultValue: 100.0 - description: Batch size for training. - isOptional: true - parameterType: NUMBER_INTEGER - batch_size_ratio: - defaultValue: 0.25 - description: 'The ratio of virtual batch size (size of the ghost batch - - normalization) to batch size.' - isOptional: true - parameterType: NUMBER_DOUBLE - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Staging directory for BigQuery tables. - isOptional: true - parameterType: STRING - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to ''auto'', caching is - - determined based on the dataset size.' - isOptional: true - parameterType: STRING - class_weight: - defaultValue: 1.0 - description: 'The class weight is used to computes a weighted cross entropy - - which is helpful in classify imbalanced dataset. Only used for - - classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - decay_every: - defaultValue: 100.0 - description: 'Number of iterations for periodically applying learning rate - - decaying.' - isOptional: true - parameterType: NUMBER_DOUBLE - decay_rate: - defaultValue: 0.95 - description: Learning rate decaying. - isOptional: true - parameterType: NUMBER_DOUBLE - enable_profiler: - defaultValue: false - description: Enables profiling and saves a trace during evaluation. - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and checkpointing will - - take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not specified or - - negative, it means run evaluation on the whole validation dataset. If set - - to 0, it means run evaluation for a fixed number of samples.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_dim: - defaultValue: 64.0 - description: 'Dimensionality of the hidden representation in feature - - transformation block.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_dim_ratio: - defaultValue: 0.5 - description: 'The ratio of output dimension (dimensionality of the - - outputs of each decision step) to feature dimension.' - isOptional: true - parameterType: NUMBER_DOUBLE - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - gamma_focal_loss: - defaultValue: 2.0 - description: 'Gamma value (modulating factor) for focal loss for focal - - loss. Only used for classification.' - isOptional: true - parameterType: NUMBER_DOUBLE - gradient_thresh: - defaultValue: 2000.0 - description: Threshold for the norm of gradients for clipping. - isOptional: true - parameterType: NUMBER_DOUBLE - large_category_dim: - defaultValue: 1.0 - description: 'Embedding dimension for categorical feature with large - - number of categories.' - isOptional: true - parameterType: NUMBER_INTEGER - large_category_thresh: - defaultValue: 300.0 - description: 'Threshold for number of categories to apply - - large_category_dim embedding dimension to.' - isOptional: true - parameterType: NUMBER_INTEGER - learning_rate: - description: The learning rate used by the linear optimizer. - parameterType: NUMBER_DOUBLE - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - loss_function_type: - defaultValue: default - description: 'Loss function type. Loss function in classification - - [cross_entropy, weighted_cross_entropy, focal_loss], default is - - cross_entropy. Loss function in regression: [rmse, mae, mse], default is - - mse.' - isOptional: true - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format for the materialized examples. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - max_steps: - defaultValue: -1.0 - description: Number of steps to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - max_train_secs: - defaultValue: -1.0 - description: Amount of time in seconds to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement to use if/when the service - - automatically selects the final measurement from previously reported - - intermediate measurements. One of "BEST_MEASUREMENT" or - - "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - num_decision_steps: - defaultValue: 6.0 - description: Number of sequential decision steps. - isOptional: true - parameterType: NUMBER_INTEGER - num_transformer_layers: - defaultValue: 4.0 - description: 'The number of transformer layers for each decision - - step. used only at one decision step and as it increases, more flexibility - - is provided to use a feature at multiple decision steps.' - isOptional: true - parameterType: NUMBER_INTEGER - num_transformer_layers_ratio: - defaultValue: 0.25 - description: 'The ratio of shared transformer layer to - - transformer layers.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_metric: - defaultValue: '' - description: 'Optimization metric used for - - `measurement_selection_type`. Default is "rmse" for regression and "auc" - - for classification.' - isOptional: true - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - relaxation_factor: - defaultValue: 1.5 - description: 'Relaxation factor that promotes the reuse of each feature - - at different decision steps. When it is 1, a feature is enforced to be - - used only at one decision step and as it increases, more flexibility is - - provided to use a feature at multiple decision steps.' - isOptional: true - parameterType: NUMBER_DOUBLE - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - sparsity_loss_weight: - defaultValue: 1.0e-05 - description: 'Weight of the loss for sparsity regularization - - (increasing it will yield more sparse feature selection).' - isOptional: true - parameterType: NUMBER_DOUBLE - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: List of auto transform features. - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: '' - description: 'Execution engine to run TF-based - - transformations. Currently supports "dataflow" or "bigquery"' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - worker_pool_specs_override: - description: 'The dictionary for overriding training and - - evaluation worker pool specs. The dictionary should be of format - - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' - isOptional: true - parameterType: LIST - yeo_johnson_transform: - defaultValue: true - description: Enables trainable Yeo-Johnson power transform. - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py deleted file mode 100644 index 096c5e378c..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py +++ /dev/null @@ -1,3360 +0,0 @@ -"""Util functions for AutoML Tabular pipeline.""" - -import json -import os -import pathlib -from typing import Any, Dict, List, Optional, Tuple, Union -import uuid -import warnings - -_DEFAULT_NUM_PARALLEL_TRAILS = 35 -_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 -_NUM_FOLDS = 5 -_DISTILL_TOTAL_TRIALS = 100 -_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' -_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 -_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 -_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' -_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 -_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 -_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' -_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 -_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 -_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 - -# Needed because we reference the AutoML Tabular V1 pipeline. -_GCPC_STAGING_PATH = pathlib.Path( - __file__ -).parent.parent.parent.parent.resolve() -_GCPC_GA_TABULAR_PATH = str(_GCPC_STAGING_PATH / 'v1' / 'automl' / 'tabular') - - -def _update_parameters( - parameter_values: Dict[str, Any], new_params: Dict[str, Any] -): - parameter_values.update( - {param: value for param, value in new_params.items() if value is not None} - ) - - -def _generate_model_display_name() -> str: - """Automatically generates a model_display_name. - - Returns: - model_display_name. - """ - return f'tabular-workflow-model-{uuid.uuid4()}' - - -# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag -# to signify FTE usage instead of the presence of num_selected_features. -def _get_default_pipeline_params( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: Optional[int] = None, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[float] = None, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - max_selected_features: Optional[int] = None, - apply_feature_selection_tuning: bool = False, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - run_distillation: bool = False, - distill_batch_predict_machine_type: Optional[str] = None, - distill_batch_predict_starting_replica_count: Optional[int] = None, - distill_batch_predict_max_replica_count: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - quantiles: Optional[List[float]] = None, - enable_probabilistic_inference: bool = False, - num_selected_features: Optional[int] = None, - model_display_name: str = '', - model_description: str = '', -) -> Dict[str, Any]: - """Get the AutoML Tabular v1 default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The path to a GCS file containing the transformations to - apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - study_spec_parameters_override: The list for overriding study spec. The list - should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - max_selected_features: number of features to select for training, - apply_feature_selection_tuning: tuning feature selection rate if true. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. At inference time, the predictive distribution is used to make - a point prediction that minimizes the optimization objective. For example, - the mean of a predictive distribution is the point prediction that - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - distribution are also returned. - num_selected_features: Number of selected features for feature selection, - defaults to None, in which case all features are used. If specified, - enable_probabilistic_inference and run_distillation cannot be enabled. - model_display_name: The display name of the uploaded Vertex model. - model_description: The description for the uploaded model. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if not study_spec_parameters_override: - study_spec_parameters_override = [] - if not stage_1_tuner_worker_pool_specs_override: - stage_1_tuner_worker_pool_specs_override = [] - if not cv_trainer_worker_pool_specs_override: - cv_trainer_worker_pool_specs_override = [] - if not quantiles: - quantiles = [] - - parameter_values = {} - parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'optimization_objective': optimization_objective, - 'train_budget_milli_node_hours': train_budget_milli_node_hours, - 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, - 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, - 'stage_2_num_selected_trials': stage_2_num_selected_trials, - 'weight_column': weight_column, - 'optimization_objective_recall_value': ( - optimization_objective_recall_value - ), - 'optimization_objective_precision_value': ( - optimization_objective_precision_value - ), - 'study_spec_parameters_override': study_spec_parameters_override, - 'stage_1_tuner_worker_pool_specs_override': ( - stage_1_tuner_worker_pool_specs_override - ), - 'cv_trainer_worker_pool_specs_override': ( - cv_trainer_worker_pool_specs_override - ), - 'export_additional_model_without_custom_ops': ( - export_additional_model_without_custom_ops - ), - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'dataflow_service_account': dataflow_service_account, - 'encryption_spec_key_name': encryption_spec_key_name, - 'max_selected_features': max_selected_features, - 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, - 'quantiles': quantiles, - 'enable_probabilistic_inference': enable_probabilistic_inference, - 'model_display_name': model_display_name, - 'model_description': model_description, - } - parameter_values.update( - {param: value for param, value in parameters.items() if value is not None} - ) - - if run_evaluation: - eval_parameters = { - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_batch_explain_machine_type': ( - evaluation_batch_explain_machine_type - ), - 'evaluation_batch_explain_starting_replica_count': ( - evaluation_batch_explain_starting_replica_count - ), - 'evaluation_batch_explain_max_replica_count': ( - evaluation_batch_explain_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'run_evaluation': run_evaluation, - } - parameter_values.update( - { - param: value - for param, value in eval_parameters.items() - if value is not None - } - ) - - # V1 pipeline without FTE - if num_selected_features is None: - if not additional_experiments: - additional_experiments = {} - - parameters = { - 'transformations': transformations, - 'stats_and_example_gen_dataflow_machine_type': ( - stats_and_example_gen_dataflow_machine_type - ), - 'stats_and_example_gen_dataflow_max_num_workers': ( - stats_and_example_gen_dataflow_max_num_workers - ), - 'stats_and_example_gen_dataflow_disk_size_gb': ( - stats_and_example_gen_dataflow_disk_size_gb - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': ( - transform_dataflow_max_num_workers - ), - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'additional_experiments': additional_experiments, - } - parameter_values.update( - { - param: value - for param, value in parameters.items() - if value is not None - } - ) - - if apply_feature_selection_tuning: - parameter_values.update({ - 'apply_feature_selection_tuning': apply_feature_selection_tuning, - }) - - if run_distillation: - distillation_parameters = { - 'distill_batch_predict_machine_type': ( - distill_batch_predict_machine_type - ), - 'distill_batch_predict_starting_replica_count': ( - distill_batch_predict_starting_replica_count - ), - 'distill_batch_predict_max_replica_count': ( - distill_batch_predict_max_replica_count - ), - 'run_distillation': run_distillation, - } - parameter_values.update( - { - param: value - for param, value in distillation_parameters.items() - if value is not None - } - ) - - # V2 pipeline (with FTE) - else: - if run_distillation: - raise ValueError( - 'Distillation is currently not supported' - ' when num_selected_features is specified.' - ) - - parameters = { - 'num_selected_features': num_selected_features, - 'dataset_level_custom_transformation_definitions': [], - 'dataset_level_transformations': [], - 'tf_auto_transform_features': {}, - 'tf_custom_transformation_definitions': [], - 'legacy_transformations_path': transformations, - 'feature_transform_engine_dataflow_machine_type': ( - transform_dataflow_machine_type - ), - 'feature_transform_engine_dataflow_max_num_workers': ( - transform_dataflow_max_num_workers - ), - 'feature_transform_engine_dataflow_disk_size_gb': ( - transform_dataflow_disk_size_gb - ), - } - parameter_values.update( - { - param: value - for param, value in parameters.items() - if value is not None - } - ) - - return parameter_values - - -def get_automl_tabular_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: Optional[int] = None, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - run_distillation: bool = False, - distill_batch_predict_machine_type: Optional[str] = None, - distill_batch_predict_starting_replica_count: Optional[int] = None, - distill_batch_predict_max_replica_count: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - quantiles: Optional[List[float]] = None, - enable_probabilistic_inference: bool = False, - num_selected_features: Optional[int] = None, - model_display_name: str = '', - model_description: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular v1 default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The path to a GCS file containing the transformations to - apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - study_spec_parameters_override: The list for overriding study spec. The list - should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. At inference time, the predictive distribution is used to make - a point prediction that minimizes the optimization objective. For example, - the mean of a predictive distribution is the point prediction that - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - distribution are also returned. - num_selected_features: Number of selected features for feature selection, - defaults to None, in which case all features are used. - model_display_name: The display name of the uploaded Vertex model. - model_description: The description for the uploaded model. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = _get_default_pipeline_params( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - study_spec_parameters_override=study_spec_parameters_override, - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - dataflow_service_account=dataflow_service_account, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, - evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, - evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - run_distillation=run_distillation, - distill_batch_predict_machine_type=distill_batch_predict_machine_type, - distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, - distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - quantiles=quantiles, - enable_probabilistic_inference=enable_probabilistic_inference, - num_selected_features=num_selected_features, - model_display_name=model_display_name, - model_description=model_description, - ) - - # V1 pipeline without FTE - if num_selected_features is None: - pipeline_definition_path = os.path.join( - _GCPC_GA_TABULAR_PATH, 'automl_tabular_pipeline.yaml' - ) - - # V2 pipeline with FTE - else: - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'automl_tabular_v2_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_automl_tabular_feature_selection_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: Optional[int] = None, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - max_selected_features: int = 1000, - apply_feature_selection_tuning: bool = False, - run_distillation: bool = False, - distill_batch_predict_machine_type: Optional[str] = None, - distill_batch_predict_starting_replica_count: Optional[int] = None, - distill_batch_predict_max_replica_count: Optional[int] = None, - model_display_name: str = '', - model_description: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular v1 default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The path to a GCS file containing the transformations to - apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - study_spec_parameters_override: The list for overriding study spec. The list - should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - max_selected_features: number of features to select for training, - apply_feature_selection_tuning: tuning feature selection rate if true. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - model_display_name: The display name of the uploaded Vertex model. - model_description: The description for the uploaded model. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - model_display_name = ( - model_display_name - if model_display_name - else _generate_model_display_name() - ) - - parameter_values = _get_default_pipeline_params( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - study_spec_parameters_override=study_spec_parameters_override, - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - dataflow_service_account=dataflow_service_account, - max_selected_features=max_selected_features, - apply_feature_selection_tuning=apply_feature_selection_tuning, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, - evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, - evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - run_distillation=run_distillation, - distill_batch_predict_machine_type=distill_batch_predict_machine_type, - distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, - distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, - model_display_name=model_display_name, - model_description=model_description, - ) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'automl_tabular_feature_selection_pipeline.yaml', - ) - return pipeline_definition_path, parameter_values - - -def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: - """Convert json input dict to encoded parameter string. - - This function is required due to the limitation on YAML component definition - that YAML definition does not have a keyword for apply quote escape, so the - JSON argument's quote must be manually escaped using this function. - - Args: - input_dict: The input json dictionary. - - Returns: - The encoded string used for parameter. - """ - if not input_dict: - return '' - out = json.dumps(json.dumps(input_dict)) - return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo - - -def get_skip_architecture_search_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_tuning_result_artifact_uri: str, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that skips architecture search. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The transformations to apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - - return get_automl_tabular_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=None, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - study_spec_parameters_override=[], - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override={}, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - dataflow_service_account=dataflow_service_account, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, - evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, - evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - run_distillation=None, - distill_batch_predict_machine_type=None, - distill_batch_predict_starting_replica_count=None, - distill_batch_predict_max_replica_count=None, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - quantiles=[], - enable_probabilistic_inference=False, - ) - - -def get_wide_and_deep_trainer_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - learning_rate: float, - dnn_learning_rate: float, - transform_config: Optional[str] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: bool = False, - feature_selection_algorithm: Optional[str] = None, - materialized_examples_format: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_transform_execution_engine: Optional[str] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - optimizer_type: str = 'adam', - max_steps: int = -1, - max_train_secs: int = -1, - l1_regularization_strength: float = 0, - l2_regularization_strength: float = 0, - l2_shrinkage_regularization_strength: float = 0, - beta_1: float = 0.9, - beta_2: float = 0.999, - hidden_units: str = '30,30,30', - use_wide: bool = True, - embed_categories: bool = True, - dnn_dropout: float = 0, - dnn_optimizer_type: str = 'adam', - dnn_l1_regularization_strength: float = 0, - dnn_l2_regularization_strength: float = 0, - dnn_l2_shrinkage_regularization_strength: float = 0, - dnn_beta_1: float = 0.9, - dnn_beta_2: float = 0.999, - enable_profiler: bool = False, - cache_data: str = 'auto', - seed: int = 1, - eval_steps: int = 0, - batch_size: int = 100, - measurement_selection_type: Optional[str] = None, - optimization_metric: Optional[str] = None, - eval_frequency_secs: int = 600, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: str = '', - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - worker_pool_specs_override: Optional[Dict[str, Any]] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the Wide & Deep training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - 'classification' or 'regression'. - learning_rate: The learning rate used by the linear optimizer. - dnn_learning_rate: The learning rate for training the deep part of the - model. - transform_config: Path to v1 TF transformation configuration. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - materialized_examples_format: The format for the materialized examples. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_transform_execution_engine: The execution engine used to execute TF-based - transformations. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - optimizer_type: The type of optimizer to use. Choices are "adam", "ftrl" and - "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively. - max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the trainer for. - l1_regularization_strength: L1 regularization strength for - optimizer_type="ftrl". - l2_regularization_strength: L2 regularization strength for - optimizer_type="ftrl". - l2_shrinkage_regularization_strength: L2 shrinkage regularization strength - for optimizer_type="ftrl". - beta_1: Beta 1 value for optimizer_type="adam". - beta_2: Beta 2 value for optimizer_type="adam". - hidden_units: Hidden layer sizes to use for DNN feature columns, provided in - comma-separated layers. - use_wide: If set to true, the categorical columns will be used in the wide - part of the DNN model. - embed_categories: If set to true, the categorical columns will be used - embedded and used in the deep part of the model. Embedding size is the - square root of the column cardinality. - dnn_dropout: The probability we will drop out a given coordinate. - dnn_optimizer_type: The type of optimizer to use for the deep part of the - model. Choices are "adam", "ftrl" and "sgd". for the Adam, FTRL, and - Gradient Descent Optimizers, respectively. - dnn_l1_regularization_strength: L1 regularization strength for - dnn_optimizer_type="ftrl". - dnn_l2_regularization_strength: L2 regularization strength for - dnn_optimizer_type="ftrl". - dnn_l2_shrinkage_regularization_strength: L2 shrinkage regularization - strength for dnn_optimizer_type="ftrl". - dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". - dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". - enable_profiler: Enables profiling and saves a trace during evaluation. - cache_data: Whether to cache data or not. If set to 'auto', caching is - determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not specified or - negative, it means run evaluation on the whole validation dataset. If set - to 0, it means run evaluation for a fixed number of samples. - batch_size: Batch size for training. - measurement_selection_type: Which measurement to use if/when the service - automatically selects the final measurement from previously reported - intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and checkpointing will - take place. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - worker_pool_specs_override: The dictionary for overriding training and - evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - if transform_config and tf_transformations_path: - raise ValueError( - 'Only one of transform_config and tf_transformations_path can ' - 'be specified.' - ) - - elif transform_config: - warnings.warn( - 'transform_config parameter is deprecated. ' - 'Please use the flattened transform config arguments instead.' - ) - tf_transformations_path = transform_config - - if not worker_pool_specs_override: - worker_pool_specs_override = [] - - parameter_values = {} - training_and_eval_parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'learning_rate': learning_rate, - 'dnn_learning_rate': dnn_learning_rate, - 'optimizer_type': optimizer_type, - 'max_steps': max_steps, - 'max_train_secs': max_train_secs, - 'l1_regularization_strength': l1_regularization_strength, - 'l2_regularization_strength': l2_regularization_strength, - 'l2_shrinkage_regularization_strength': ( - l2_shrinkage_regularization_strength - ), - 'beta_1': beta_1, - 'beta_2': beta_2, - 'hidden_units': hidden_units, - 'use_wide': use_wide, - 'embed_categories': embed_categories, - 'dnn_dropout': dnn_dropout, - 'dnn_optimizer_type': dnn_optimizer_type, - 'dnn_l1_regularization_strength': dnn_l1_regularization_strength, - 'dnn_l2_regularization_strength': dnn_l2_regularization_strength, - 'dnn_l2_shrinkage_regularization_strength': ( - dnn_l2_shrinkage_regularization_strength - ), - 'dnn_beta_1': dnn_beta_1, - 'dnn_beta_2': dnn_beta_2, - 'enable_profiler': enable_profiler, - 'cache_data': cache_data, - 'seed': seed, - 'eval_steps': eval_steps, - 'batch_size': batch_size, - 'measurement_selection_type': measurement_selection_type, - 'optimization_metric': optimization_metric, - 'eval_frequency_secs': eval_frequency_secs, - 'weight_column': weight_column, - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'worker_pool_specs_override': worker_pool_specs_override, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - _update_parameters(parameter_values, training_and_eval_parameters) - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - 'materialized_examples_format': ( - materialized_examples_format - if materialized_examples_format - else 'tfrecords_gzip' - ), - 'tf_transform_execution_engine': ( - tf_transform_execution_engine - if tf_transform_execution_engine - else 'dataflow' - ), - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'wide_and_deep_trainer_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: List[Dict[str, Any]], - max_trial_count: int, - parallel_trial_count: int, - algorithm: str, - enable_profiler: bool = False, - seed: int = 1, - eval_steps: int = 0, - eval_frequency_secs: int = 600, - transform_config: Optional[str] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_transform_execution_engine: Optional[str] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: str = '', - max_failed_trial_count: int = 0, - study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - worker_pool_specs_override: Optional[Dict[str, Any]] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the built-in algorithm HyperparameterTuningJob pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - study_spec_metric_id: Metric to optimize, possible values: [ 'loss', - 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', - 'recall']. - study_spec_metric_goal: Optimization goal of the metric, possible values: - "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries representing parameters - to optimize. The dictionary key is the parameter_id, which is passed to - training job as a command line argument, and the dictionary value is the - parameter specification of the metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run in parallel. - algorithm: Algorithm to train. One of "tabnet" and "wide_and_deep". - enable_profiler: Enables profiling and saves a trace during evaluation. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not specified or - negative, it means run evaluation on the whole validation dataset. If set - to 0, it means run evaluation for a fixed number of samples. - eval_frequency_secs: Frequency at which evaluation and checkpointing will - take place. - transform_config: Path to v1 TF transformation configuration. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_transform_execution_engine: The execution engine used to execute TF-based - transformations. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - max_failed_trial_count: The number of failed trials that need to be seen - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for the study. One of - "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". - study_spec_measurement_selection_type: Which measurement to use if/when the - service automatically selects the final measurement from previously - reported intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - worker_pool_specs_override: The dictionary for overriding training and - evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - warnings.warn( - 'This method is deprecated. Please use' - ' get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters or' - ' get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters' - ' instead.' - ) - - if algorithm == 'tabnet': - return get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - study_spec_metric_id=study_spec_metric_id, - study_spec_metric_goal=study_spec_metric_goal, - study_spec_parameters_override=study_spec_parameters_override, - max_trial_count=max_trial_count, - parallel_trial_count=parallel_trial_count, - transform_config=transform_config, - dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, - dataset_level_transformations=dataset_level_transformations, - predefined_split_key=predefined_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - tf_transform_execution_engine=tf_transform_execution_engine, - tf_auto_transform_features=tf_auto_transform_features, - tf_custom_transformation_definitions=tf_custom_transformation_definitions, - tf_transformations_path=tf_transformations_path, - enable_profiler=enable_profiler, - seed=seed, - eval_steps=eval_steps, - eval_frequency_secs=eval_frequency_secs, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, - weight_column=weight_column, - max_failed_trial_count=max_failed_trial_count, - study_spec_algorithm=study_spec_algorithm, - study_spec_measurement_selection_type=study_spec_measurement_selection_type, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - worker_pool_specs_override=worker_pool_specs_override, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - ) - elif algorithm == 'wide_and_deep': - return get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - study_spec_metric_id=study_spec_metric_id, - study_spec_metric_goal=study_spec_metric_goal, - study_spec_parameters_override=study_spec_parameters_override, - max_trial_count=max_trial_count, - parallel_trial_count=parallel_trial_count, - transform_config=transform_config, - dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, - dataset_level_transformations=dataset_level_transformations, - predefined_split_key=predefined_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - tf_transform_execution_engine=tf_transform_execution_engine, - tf_auto_transform_features=tf_auto_transform_features, - tf_custom_transformation_definitions=tf_custom_transformation_definitions, - tf_transformations_path=tf_transformations_path, - enable_profiler=enable_profiler, - seed=seed, - eval_steps=eval_steps, - eval_frequency_secs=eval_frequency_secs, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, - weight_column=weight_column, - max_failed_trial_count=max_failed_trial_count, - study_spec_algorithm=study_spec_algorithm, - study_spec_measurement_selection_type=study_spec_measurement_selection_type, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - worker_pool_specs_override=worker_pool_specs_override, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - ) - else: - raise ValueError( - 'Invalid algorithm provided. Supported values are "tabnet" and' - ' "wide_and_deep".' - ) - - -def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: List[Dict[str, Any]], - max_trial_count: int, - parallel_trial_count: int, - transform_config: Optional[str] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: bool = False, - feature_selection_algorithm: Optional[str] = None, - materialized_examples_format: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_transform_execution_engine: Optional[str] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - enable_profiler: bool = False, - cache_data: str = 'auto', - seed: int = 1, - eval_steps: int = 0, - eval_frequency_secs: int = 600, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: str = '', - max_failed_trial_count: int = 0, - study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - worker_pool_specs_override: Optional[Dict[str, Any]] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the TabNet HyperparameterTuningJob pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - study_spec_metric_id: Metric to optimize, possible values: [ 'loss', - 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', - 'recall']. - study_spec_metric_goal: Optimization goal of the metric, possible values: - "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries representing parameters - to optimize. The dictionary key is the parameter_id, which is passed to - training job as a command line argument, and the dictionary value is the - parameter specification of the metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run in parallel. - transform_config: Path to v1 TF transformation configuration. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - materialized_examples_format: The format for the materialized examples. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_transform_execution_engine: The execution engine used to execute TF-based - transformations. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - enable_profiler: Enables profiling and saves a trace during evaluation. - cache_data: Whether to cache data or not. If set to 'auto', caching is - determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not specified or - negative, it means run evaluation on the whole validation dataset. If set - to 0, it means run evaluation for a fixed number of samples. - eval_frequency_secs: Frequency at which evaluation and checkpointing will - take place. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - max_failed_trial_count: The number of failed trials that need to be seen - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for the study. One of - "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". - study_spec_measurement_selection_type: Which measurement to use if/when the - service automatically selects the final measurement from previously - reported intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - worker_pool_specs_override: The dictionary for overriding training and - evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - if transform_config and tf_transformations_path: - raise ValueError( - 'Only one of transform_config and tf_transformations_path can ' - 'be specified.' - ) - - elif transform_config: - warnings.warn( - 'transform_config parameter is deprecated. ' - 'Please use the flattened transform config arguments instead.' - ) - tf_transformations_path = transform_config - - if not worker_pool_specs_override: - worker_pool_specs_override = [] - - parameter_values = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'study_spec_metric_id': study_spec_metric_id, - 'study_spec_metric_goal': study_spec_metric_goal, - 'study_spec_parameters_override': study_spec_parameters_override, - 'max_trial_count': max_trial_count, - 'parallel_trial_count': parallel_trial_count, - 'enable_profiler': enable_profiler, - 'cache_data': cache_data, - 'seed': seed, - 'eval_steps': eval_steps, - 'eval_frequency_secs': eval_frequency_secs, - 'weight_column': weight_column, - 'max_failed_trial_count': max_failed_trial_count, - 'study_spec_algorithm': study_spec_algorithm, - 'study_spec_measurement_selection_type': ( - study_spec_measurement_selection_type - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'worker_pool_specs_override': worker_pool_specs_override, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - 'materialized_examples_format': ( - materialized_examples_format - if materialized_examples_format - else 'tfrecords_gzip' - ), - 'tf_transform_execution_engine': ( - tf_transform_execution_engine - if tf_transform_execution_engine - else 'dataflow' - ), - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'tabnet_hyperparameter_tuning_job_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: List[Dict[str, Any]], - max_trial_count: int, - parallel_trial_count: int, - transform_config: Optional[str] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: bool = False, - feature_selection_algorithm: Optional[str] = None, - materialized_examples_format: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_transform_execution_engine: Optional[str] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - enable_profiler: bool = False, - cache_data: str = 'auto', - seed: int = 1, - eval_steps: int = 0, - eval_frequency_secs: int = 600, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: str = '', - max_failed_trial_count: int = 0, - study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - worker_pool_specs_override: Optional[Dict[str, Any]] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the Wide & Deep algorithm HyperparameterTuningJob pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - study_spec_metric_id: Metric to optimize, possible values: [ 'loss', - 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', - 'recall']. - study_spec_metric_goal: Optimization goal of the metric, possible values: - "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries representing parameters - to optimize. The dictionary key is the parameter_id, which is passed to - training job as a command line argument, and the dictionary value is the - parameter specification of the metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run in parallel. - transform_config: Path to v1 TF transformation configuration. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - materialized_examples_format: The format for the materialized examples. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_transform_execution_engine: The execution engine used to execute TF-based - transformations. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - enable_profiler: Enables profiling and saves a trace during evaluation. - cache_data: Whether to cache data or not. If set to 'auto', caching is - determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not specified or - negative, it means run evaluation on the whole validation dataset. If set - to 0, it means run evaluation for a fixed number of samples. - eval_frequency_secs: Frequency at which evaluation and checkpointing will - take place. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - max_failed_trial_count: The number of failed trials that need to be seen - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for the study. One of - "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". - study_spec_measurement_selection_type: Which measurement to use if/when the - service automatically selects the final measurement from previously - reported intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - worker_pool_specs_override: The dictionary for overriding training and - evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - if transform_config and tf_transformations_path: - raise ValueError( - 'Only one of transform_config and tf_transformations_path can ' - 'be specified.' - ) - - elif transform_config: - warnings.warn( - 'transform_config parameter is deprecated. ' - 'Please use the flattened transform config arguments instead.' - ) - tf_transformations_path = transform_config - - if not worker_pool_specs_override: - worker_pool_specs_override = [] - - parameter_values = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'study_spec_metric_id': study_spec_metric_id, - 'study_spec_metric_goal': study_spec_metric_goal, - 'study_spec_parameters_override': study_spec_parameters_override, - 'max_trial_count': max_trial_count, - 'parallel_trial_count': parallel_trial_count, - 'enable_profiler': enable_profiler, - 'cache_data': cache_data, - 'seed': seed, - 'eval_steps': eval_steps, - 'eval_frequency_secs': eval_frequency_secs, - 'weight_column': weight_column, - 'max_failed_trial_count': max_failed_trial_count, - 'study_spec_algorithm': study_spec_algorithm, - 'study_spec_measurement_selection_type': ( - study_spec_measurement_selection_type - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'worker_pool_specs_override': worker_pool_specs_override, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - 'materialized_examples_format': ( - materialized_examples_format - if materialized_examples_format - else 'tfrecords_gzip' - ), - 'tf_transform_execution_engine': ( - tf_transform_execution_engine - if tf_transform_execution_engine - else 'dataflow' - ), - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'wide_and_deep_hyperparameter_tuning_job_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def get_tabnet_trainer_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - learning_rate: float, - transform_config: Optional[str] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: bool = False, - feature_selection_algorithm: Optional[str] = None, - materialized_examples_format: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_transform_execution_engine: Optional[str] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - max_steps: int = -1, - max_train_secs: int = -1, - large_category_dim: int = 1, - large_category_thresh: int = 300, - yeo_johnson_transform: bool = True, - feature_dim: int = 64, - feature_dim_ratio: float = 0.5, - num_decision_steps: int = 6, - relaxation_factor: float = 1.5, - decay_every: float = 100, - decay_rate: float = 0.95, - gradient_thresh: float = 2000, - sparsity_loss_weight: float = 0.00001, - batch_momentum: float = 0.95, - batch_size_ratio: float = 0.25, - num_transformer_layers: int = 4, - num_transformer_layers_ratio: float = 0.25, - class_weight: float = 1.0, - loss_function_type: str = 'default', - alpha_focal_loss: float = 0.25, - gamma_focal_loss: float = 2.0, - enable_profiler: bool = False, - cache_data: str = 'auto', - seed: int = 1, - eval_steps: int = 0, - batch_size: int = 100, - measurement_selection_type: Optional[str] = None, - optimization_metric: Optional[str] = None, - eval_frequency_secs: int = 600, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: str = '', - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - worker_pool_specs_override: Optional[Dict[str, Any]] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the TabNet training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - learning_rate: The learning rate used by the linear optimizer. - transform_config: Path to v1 TF transformation configuration. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - materialized_examples_format: The format for the materialized examples. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_transform_execution_engine: The execution engine used to execute TF-based - transformations. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the trainer for. - large_category_dim: Embedding dimension for categorical feature with large - number of categories. - large_category_thresh: Threshold for number of categories to apply - large_category_dim embedding dimension to. - yeo_johnson_transform: Enables trainable Yeo-Johnson power transform. - feature_dim: Dimensionality of the hidden representation in feature - transformation block. - feature_dim_ratio: The ratio of output dimension (dimensionality of the - outputs of each decision step) to feature dimension. - num_decision_steps: Number of sequential decision steps. - relaxation_factor: Relaxation factor that promotes the reuse of each feature - at different decision steps. When it is 1, a feature is enforced to be - used only at one decision step and as it increases, more flexibility is - provided to use a feature at multiple decision steps. - decay_every: Number of iterations for periodically applying learning rate - decaying. - decay_rate: Learning rate decaying. - gradient_thresh: Threshold for the norm of gradients for clipping. - sparsity_loss_weight: Weight of the loss for sparsity regularization - (increasing it will yield more sparse feature selection). - batch_momentum: Momentum in ghost batch normalization. - batch_size_ratio: The ratio of virtual batch size (size of the ghost batch - normalization) to batch size. - num_transformer_layers: The number of transformer layers for each decision - step. used only at one decision step and as it increases, more flexibility - is provided to use a feature at multiple decision steps. - num_transformer_layers_ratio: The ratio of shared transformer layer to - transformer layers. - class_weight: The class weight is used to computes a weighted cross entropy - which is helpful in classify imbalanced dataset. Only used for - classification. - loss_function_type: Loss function type. Loss function in classification - [cross_entropy, weighted_cross_entropy, focal_loss], default is - cross_entropy. Loss function in regression: [rmse, mae, mse], default is - mse. - alpha_focal_loss: Alpha value (balancing factor) in focal_loss function. - Only used for classification. - gamma_focal_loss: Gamma value (modulating factor) for focal loss for focal - loss. Only used for classification. - enable_profiler: Enables profiling and saves a trace during evaluation. - cache_data: Whether to cache data or not. If set to 'auto', caching is - determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not specified or - negative, it means run evaluation on the whole validation dataset. If set - to 0, it means run evaluation for a fixed number of samples. - batch_size: Batch size for training. - measurement_selection_type: Which measurement to use if/when the service - automatically selects the final measurement from previously reported - intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and checkpointing will - take place. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - worker_pool_specs_override: The dictionary for overriding training and - evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - if transform_config and tf_transformations_path: - raise ValueError( - 'Only one of transform_config and tf_transformations_path can ' - 'be specified.' - ) - - elif transform_config: - warnings.warn( - 'transform_config parameter is deprecated. ' - 'Please use the flattened transform config arguments instead.' - ) - tf_transformations_path = transform_config - - if not worker_pool_specs_override: - worker_pool_specs_override = [] - - parameter_values = {} - training_and_eval_parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'learning_rate': learning_rate, - 'max_steps': max_steps, - 'max_train_secs': max_train_secs, - 'large_category_dim': large_category_dim, - 'large_category_thresh': large_category_thresh, - 'yeo_johnson_transform': yeo_johnson_transform, - 'feature_dim': feature_dim, - 'feature_dim_ratio': feature_dim_ratio, - 'num_decision_steps': num_decision_steps, - 'relaxation_factor': relaxation_factor, - 'decay_every': decay_every, - 'decay_rate': decay_rate, - 'gradient_thresh': gradient_thresh, - 'sparsity_loss_weight': sparsity_loss_weight, - 'batch_momentum': batch_momentum, - 'batch_size_ratio': batch_size_ratio, - 'num_transformer_layers': num_transformer_layers, - 'num_transformer_layers_ratio': num_transformer_layers_ratio, - 'class_weight': class_weight, - 'loss_function_type': loss_function_type, - 'alpha_focal_loss': alpha_focal_loss, - 'gamma_focal_loss': gamma_focal_loss, - 'enable_profiler': enable_profiler, - 'cache_data': cache_data, - 'seed': seed, - 'eval_steps': eval_steps, - 'batch_size': batch_size, - 'measurement_selection_type': measurement_selection_type, - 'optimization_metric': optimization_metric, - 'eval_frequency_secs': eval_frequency_secs, - 'weight_column': weight_column, - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'worker_pool_specs_override': worker_pool_specs_override, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - _update_parameters(parameter_values, training_and_eval_parameters) - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - 'materialized_examples_format': ( - materialized_examples_format - if materialized_examples_format - else 'tfrecords_gzip' - ), - 'tf_transform_execution_engine': ( - tf_transform_execution_engine - if tf_transform_execution_engine - else 'dataflow' - ), - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), 'tabnet_trainer_pipeline.yaml' - ) - - return pipeline_definition_path, parameter_values - - -def get_tabnet_study_spec_parameters_override( - dataset_size_bucket: str, prediction_type: str, training_budget_bucket: str -) -> List[Dict[str, Any]]: - """Get study_spec_parameters_override for a TabNet hyperparameter tuning job. - - Args: - dataset_size_bucket: Size of the dataset. One of "small" (< 1M rows), - "medium" (1M - 100M rows), or "large" (> 100M rows). - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - training_budget_bucket: Bucket of the estimated training budget. One of - "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This - parameter is only used as a hint for the hyperparameter search space, - unrelated to the real cost. - - Returns: - List of study_spec_parameters_override. - """ - - if dataset_size_bucket not in ['small', 'medium', 'large']: - raise ValueError( - 'Invalid dataset_size_bucket provided. Supported values ' - ' are "small", "medium" or "large".' - ) - if training_budget_bucket not in ['small', 'medium', 'large']: - raise ValueError( - 'Invalid training_budget_bucket provided. Supported values ' - 'are "small", "medium" or "large".' - ) - - param_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - f'configs/tabnet_params_{dataset_size_bucket}_data_{training_budget_bucket}_search_space.json', - ) - with open(param_path, 'r') as f: - param_content = f.read() - params = json.loads(param_content) - - if prediction_type == 'regression': - return _format_tabnet_regression_study_spec_parameters_override( - params, training_budget_bucket - ) - return params - - -def _format_tabnet_regression_study_spec_parameters_override( - params: List[Dict[str, Any]], training_budget_bucket: str -) -> List[Dict[str, Any]]: - """Get regression study_spec_parameters_override for a TabNet hyperparameter tuning job. - - Args: - params: List of dictionaries representing parameters to optimize. The - dictionary key is the parameter_id, which is passed to training job as a - command line argument, and the dictionary value is the parameter - specification of the metric. - training_budget_bucket: Bucket of the estimated training budget. One of - "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This - parameter is only used as a hint for the hyperparameter search space, - unrelated to the real cost. - - Returns: - List of study_spec_parameters_override for regression. - """ - - # To get regression study_spec_parameters, we need to set - # `loss_function_type` to ‘mae’ (‘mae’ and ‘mse’ for "large" search space), - # remove the `alpha_focal_loss`, `gamma_focal_loss` - # and `class_weight` parameters and increase the max for - # `sparsity_loss_weight` to 100. - formatted_params = [] - for param in params: - if param['parameter_id'] in [ - 'alpha_focal_loss', - 'gamma_focal_loss', - 'class_weight', - ]: - continue - elif param['parameter_id'] == 'sparsity_loss_weight': - param['double_value_spec']['max_value'] = 100 - elif param['parameter_id'] == 'loss_function_type': - if training_budget_bucket == 'large': - param['categorical_value_spec']['values'] = ['mae', 'mse'] - else: - param['categorical_value_spec']['values'] = ['mae'] - - formatted_params.append(param) - - return formatted_params - - -def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: - """Get study_spec_parameters_override for a Wide & Deep hyperparameter tuning job. - - Returns: - List of study_spec_parameters_override. - """ - param_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'configs/wide_and_deep_params.json', - ) - with open(param_path, 'r') as f: - param_content = f.read() - params = json.loads(param_content) - - return params - - -def get_xgboost_study_spec_parameters_override() -> List[Dict[str, Any]]: - """Get study_spec_parameters_override for an XGBoost hyperparameter tuning job. - - Returns: - List of study_spec_parameters_override. - """ - param_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), 'configs/xgboost_params.json' - ) - with open(param_path, 'r') as f: - param_content = f.read() - params = json.loads(param_content) - - return params - - -def get_xgboost_trainer_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - objective: str, - eval_metric: Optional[str] = None, - num_boost_round: Optional[int] = None, - early_stopping_rounds: Optional[int] = None, - base_score: Optional[float] = None, - disable_default_eval_metric: Optional[int] = None, - seed: Optional[int] = None, - seed_per_iteration: Optional[bool] = None, - booster: Optional[str] = None, - eta: Optional[float] = None, - gamma: Optional[float] = None, - max_depth: Optional[int] = None, - min_child_weight: Optional[float] = None, - max_delta_step: Optional[float] = None, - subsample: Optional[float] = None, - colsample_bytree: Optional[float] = None, - colsample_bylevel: Optional[float] = None, - colsample_bynode: Optional[float] = None, - reg_lambda: Optional[float] = None, - reg_alpha: Optional[float] = None, - tree_method: Optional[str] = None, - scale_pos_weight: Optional[float] = None, - updater: Optional[str] = None, - refresh_leaf: Optional[int] = None, - process_type: Optional[str] = None, - grow_policy: Optional[str] = None, - sampling_method: Optional[str] = None, - monotone_constraints: Optional[str] = None, - interaction_constraints: Optional[str] = None, - sample_type: Optional[str] = None, - normalize_type: Optional[str] = None, - rate_drop: Optional[float] = None, - one_drop: Optional[int] = None, - skip_drop: Optional[float] = None, - num_parallel_tree: Optional[int] = None, - feature_selector: Optional[str] = None, - top_k: Optional[int] = None, - max_cat_to_onehot: Optional[int] = None, - max_leaves: Optional[int] = None, - max_bin: Optional[int] = None, - tweedie_variance_power: Optional[float] = None, - huber_slope: Optional[float] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: Optional[bool] = None, - feature_selection_algorithm: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: Optional[str] = None, - training_machine_type: Optional[str] = None, - training_total_replica_count: Optional[int] = None, - training_accelerator_type: Optional[str] = None, - training_accelerator_count: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - run_evaluation: Optional[bool] = None, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: Optional[bool] = None, - encryption_spec_key_name: Optional[str] = None, -): - """Get the XGBoost training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - objective: Specifies the learning task and the learning objective. Must be - one of [reg:squarederror, reg:squaredlogerror, - reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, - binary:logistic, multi:softprob]. - eval_metric: Evaluation metrics for validation data represented as a - comma-separated string. - num_boost_round: Number of boosting iterations. - early_stopping_rounds: Activates early stopping. Validation error needs to - decrease at least every early_stopping_rounds round(s) to continue - training. - base_score: The initial prediction score of all instances, global bias. - disable_default_eval_metric: Flag to disable default metric. Set to >0 to - disable. Default to 0. - seed: Random seed. - seed_per_iteration: Seed PRNG determnisticly via iterator number. - booster: Which booster to use, can be gbtree, gblinear or dart. gbtree and - dart use tree based model while gblinear uses linear function. - eta: Learning rate. - gamma: Minimum loss reduction required to make a further partition on a leaf - node of the tree. - max_depth: Maximum depth of a tree. - min_child_weight: Minimum sum of instance weight(hessian) needed in a child. - max_delta_step: Maximum delta step we allow each tree's weight estimation to - be. - subsample: Subsample ratio of the training instance. - colsample_bytree: Subsample ratio of columns when constructing each tree. - colsample_bylevel: Subsample ratio of columns for each split, in each level. - colsample_bynode: Subsample ratio of columns for each node (split). - reg_lambda: L2 regularization term on weights. - reg_alpha: L1 regularization term on weights. - tree_method: The tree construction algorithm used in XGBoost. Choices: - ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"]. - scale_pos_weight: Control the balance of positive and negative weights. - updater: A comma separated string defining the sequence of tree updaters to - run. - refresh_leaf: Refresh updater plugin. Update tree leaf and nodes's stats if - True. When it is False, only node stats are updated. - process_type: A type of boosting process to run. Choices:["default", - "update"] - grow_policy: Controls a way new nodes are added to the tree. Only supported - if tree_method is hist. Choices:["depthwise", "lossguide"] - sampling_method: The method to use to sample the training instances. - monotone_constraints: Constraint of variable monotonicity. - interaction_constraints: Constraints for interaction representing permitted - interactions. - sample_type: [dart booster only] Type of sampling algorithm. - Choices:["uniform", "weighted"] - normalize_type: [dart booster only] Type of normalization algorithm, - Choices:["tree", "forest"] - rate_drop: [dart booster only] Dropout rate.' - one_drop: [dart booster only] When this flag is enabled, at least one tree - is always dropped during the dropout (allows Binomial-plus-one or - epsilon-dropout from the original DART paper). - skip_drop: [dart booster only] Probability of skipping the dropout procedure - during a boosting iteration. - num_parallel_tree: Number of parallel trees constructed during each - iteration. This option is used to support boosted random forest. - feature_selector: [linear booster only] Feature selection and ordering - method. - top_k: The number of top features to select in greedy and thrifty feature - selector. The value of 0 means using all the features. - max_cat_to_onehot: A threshold for deciding whether XGBoost should use - one-hot encoding based split for categorical data. - max_leaves: Maximum number of nodes to be added. - max_bin: Maximum number of discrete bins to bucket continuous features. - tweedie_variance_power: Parameter that controls the variance of the Tweedie - distribution. - huber_slope: A parameter used for Pseudo-Huber loss to define the delta - term. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - training_machine_type: Machine type. - training_total_replica_count: Number of workers. - training_accelerator_type: Accelerator type. - training_accelerator_count: Accelerator count. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = {} - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - training_and_eval_parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'objective': objective, - 'eval_metric': eval_metric, - 'num_boost_round': num_boost_round, - 'early_stopping_rounds': early_stopping_rounds, - 'base_score': base_score, - 'disable_default_eval_metric': disable_default_eval_metric, - 'seed': seed, - 'seed_per_iteration': seed_per_iteration, - 'booster': booster, - 'eta': eta, - 'gamma': gamma, - 'max_depth': max_depth, - 'min_child_weight': min_child_weight, - 'max_delta_step': max_delta_step, - 'subsample': subsample, - 'colsample_bytree': colsample_bytree, - 'colsample_bylevel': colsample_bylevel, - 'colsample_bynode': colsample_bynode, - 'reg_lambda': reg_lambda, - 'reg_alpha': reg_alpha, - 'tree_method': tree_method, - 'scale_pos_weight': scale_pos_weight, - 'updater': updater, - 'refresh_leaf': refresh_leaf, - 'process_type': process_type, - 'grow_policy': grow_policy, - 'sampling_method': sampling_method, - 'monotone_constraints': monotone_constraints, - 'interaction_constraints': interaction_constraints, - 'sample_type': sample_type, - 'normalize_type': normalize_type, - 'rate_drop': rate_drop, - 'one_drop': one_drop, - 'skip_drop': skip_drop, - 'num_parallel_tree': num_parallel_tree, - 'feature_selector': feature_selector, - 'top_k': top_k, - 'max_cat_to_onehot': max_cat_to_onehot, - 'max_leaves': max_leaves, - 'max_bin': max_bin, - 'tweedie_variance_power': tweedie_variance_power, - 'huber_slope': huber_slope, - 'weight_column': weight_column, - 'training_machine_type': training_machine_type, - 'training_total_replica_count': training_total_replica_count, - 'training_accelerator_type': training_accelerator_type, - 'training_accelerator_count': training_accelerator_count, - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - _update_parameters(parameter_values, training_and_eval_parameters) - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), 'xgboost_trainer_pipeline.yaml' - ) - - return pipeline_definition_path, parameter_values - - -def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - objective: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - max_trial_count: int, - parallel_trial_count: int, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - eval_metric: Optional[str] = None, - disable_default_eval_metric: Optional[int] = None, - seed: Optional[int] = None, - seed_per_iteration: Optional[bool] = None, - dataset_level_custom_transformation_definitions: Optional[ - List[Dict[str, Any]] - ] = None, - dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, - run_feature_selection: Optional[bool] = None, - feature_selection_algorithm: Optional[str] = None, - max_selected_features: Optional[int] = None, - predefined_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - tf_auto_transform_features: Optional[ - Union[List[str], Dict[str, List[str]]] - ] = None, - tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, - tf_transformations_path: Optional[str] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - bigquery_staging_full_dataset_id: Optional[str] = None, - weight_column: Optional[str] = None, - max_failed_trial_count: Optional[int] = None, - training_machine_type: Optional[str] = None, - training_total_replica_count: Optional[int] = None, - training_accelerator_type: Optional[str] = None, - training_accelerator_count: Optional[int] = None, - study_spec_algorithm: Optional[str] = None, - study_spec_measurement_selection_type: Optional[str] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - run_evaluation: Optional[bool] = None, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - dataflow_service_account: Optional[str] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: Optional[bool] = None, - encryption_spec_key_name: Optional[str] = None, -): - """Get the XGBoost HyperparameterTuningJob pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - objective: Specifies the learning task and the learning objective. Must be - one of [reg:squarederror, reg:squaredlogerror, - reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, - binary:logistic, multi:softprob]. - study_spec_metric_id: Metric to optimize. For options, please look under - 'eval_metric' at - https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. - study_spec_metric_goal: Optimization goal of the metric, possible values: - "MAXIMIZE", "MINIMIZE". - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run in parallel. - study_spec_parameters_override: List of dictionaries representing parameters - to optimize. The dictionary key is the parameter_id, which is passed to - training job as a command line argument, and the dictionary value is the - parameter specification of the metric. - eval_metric: Evaluation metrics for validation data represented as a - comma-separated string. - disable_default_eval_metric: Flag to disable default metric. Set to >0 to - disable. Default to 0. - seed: Random seed. - seed_per_iteration: Seed PRNG determnisticly via iterator number. - dataset_level_custom_transformation_definitions: Dataset-level custom - transformation definitions in string format. - dataset_level_transformations: Dataset-level transformation configuration in - string format. - run_feature_selection: Whether to enable feature selection. - feature_selection_algorithm: Feature selection algorithm. - max_selected_features: Maximum number of features to select. - predefined_split_key: Predefined split key. - stratified_split_key: Stratified split key. - training_fraction: Training fraction. - validation_fraction: Validation fraction. - test_fraction: Test fraction. - tf_auto_transform_features: List of auto transform features in the - comma-separated string format. - tf_custom_transformation_definitions: TF custom transformation definitions - in string format. - tf_transformations_path: Path to TF transformation configuration. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for - storing intermediate tables. - weight_column: The weight column name. - max_failed_trial_count: The number of failed trials that need to be seen - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - how many trials must fail before the whole job fails. - training_machine_type: Machine type. - training_total_replica_count: Number of workers. - training_accelerator_type: Accelerator type. - training_accelerator_count: Accelerator count. - study_spec_algorithm: The search algorithm specified for the study. One of - 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement to use if/when the - service automatically selects the final measurement from previously - reported intermediate measurements. One of "BEST_MEASUREMENT" or - "LAST_MEASUREMENT". - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - run_evaluation: Whether to run evaluation steps during training. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = {} - if isinstance(tf_auto_transform_features, list): - tf_auto_transform_features = {'auto': tf_auto_transform_features} - - training_and_eval_parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'objective': objective, - 'eval_metric': eval_metric, - 'study_spec_metric_id': study_spec_metric_id, - 'study_spec_metric_goal': study_spec_metric_goal, - 'max_trial_count': max_trial_count, - 'parallel_trial_count': parallel_trial_count, - 'study_spec_parameters_override': ( - study_spec_parameters_override - if study_spec_parameters_override - else [] - ), - 'disable_default_eval_metric': disable_default_eval_metric, - 'seed': seed, - 'seed_per_iteration': seed_per_iteration, - 'weight_column': weight_column, - 'max_failed_trial_count': max_failed_trial_count, - 'training_machine_type': training_machine_type, - 'training_total_replica_count': training_total_replica_count, - 'training_accelerator_type': training_accelerator_type, - 'training_accelerator_count': training_accelerator_count, - 'study_spec_algorithm': study_spec_algorithm, - 'study_spec_measurement_selection_type': ( - study_spec_measurement_selection_type - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'run_evaluation': run_evaluation, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - _update_parameters(parameter_values, training_and_eval_parameters) - - fte_params = { - 'dataset_level_custom_transformation_definitions': ( - dataset_level_custom_transformation_definitions - if dataset_level_custom_transformation_definitions - else [] - ), - 'dataset_level_transformations': ( - dataset_level_transformations if dataset_level_transformations else [] - ), - 'run_feature_selection': run_feature_selection, - 'feature_selection_algorithm': feature_selection_algorithm, - 'max_selected_features': max_selected_features, - 'predefined_split_key': predefined_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'tf_auto_transform_features': ( - tf_auto_transform_features if tf_auto_transform_features else {} - ), - 'tf_custom_transformation_definitions': ( - tf_custom_transformation_definitions - if tf_custom_transformation_definitions - else [] - ), - 'tf_transformations_path': tf_transformations_path, - } - _update_parameters(parameter_values, fte_params) - - data_source_and_split_parameters = { - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, - } - _update_parameters(parameter_values, data_source_and_split_parameters) - - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'xgboost_hyperparameter_tuning_job_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py deleted file mode 100644 index 6f76075d48..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +++ /dev/null @@ -1,236 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Wide and Deep Hyperparameter Tuning component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input - - -@dsl.container_component -def wide_and_deep_hyperparameter_tuning_job( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: list, - max_trial_count: int, - parallel_trial_count: int, - instance_baseline: Input[Artifact], - metadata: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - transform_output: Input[Artifact], - training_schema_uri: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - instance_schema_uri: dsl.OutputPath(str), - prediction_schema_uri: dsl.OutputPath(str), - trials: dsl.OutputPath(str), - prediction_docker_uri_output: dsl.OutputPath(str), - execution_metrics: dsl.OutputPath(dict), - weight_column: Optional[str] = '', - enable_profiler: Optional[bool] = False, - cache_data: Optional[str] = 'auto', - seed: Optional[int] = 1, - eval_steps: Optional[int] = 0, - eval_frequency_secs: Optional[int] = 600, - max_failed_trial_count: Optional[int] = 0, - study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', - training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, - training_disk_spec: Optional[dict] = { - 'boot_disk_type': 'pd-ssd', - 'boot_disk_size_gb': 100, - }, - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Tunes Wide & Deep hyperparameters using Vertex HyperparameterTuningJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". - weight_column: The weight column name. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - study_spec_metric_id: Metric to optimize, , possible - values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. - training_disk_spec: The training disk spec. - instance_baseline: The path to a JSON file for baseline values. - metadata: Amount of time in seconds to run the trainer for. - materialized_train_split: The path to the materialized train split. - materialized_eval_split: The path to the materialized validation split. - transform_output: The path to transform output. - training_schema_uri: The path to the training schema. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training job. - instance_schema_uri: The path to the instance schema. - prediction_schema_uri: The path to the prediction schema. - trials: The path to the hyperparameter tuning trials - prediction_docker_uri_output: The URI of the prediction container. - execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', - ], - args=[ - '--type', - 'HyperparameterTuningJobWithMetrics', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--execution_metrics', - execution_metrics, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "wide-and-deep-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "study_spec": {"metrics": [{"metric_id": "', - study_spec_metric_id, - '", "goal": "', - study_spec_metric_goal, - '"}], "parameters": ', - study_spec_parameters_override, - ', "algorithm": "', - study_spec_algorithm, - '", "measurement_selection_type": "', - study_spec_measurement_selection_type, - '"}, "max_trial_count": ', - max_trial_count, - ', "parallel_trial_count": ', - parallel_trial_count, - ', "max_failed_trial_count": ', - max_failed_trial_count, - ( - ', "trial_job_spec": {"worker_pool_specs":' - ' [{"replica_count":"' - ), - '1', - '", "machine_spec": ', - training_machine_spec, - ', "disk_spec": ', - training_disk_spec, - ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', - '", "args": ["--target_column=', - target_column, - '", "--weight_column=', - weight_column, - '", "--model_type=', - prediction_type, - '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - '", "--prediction_docker_uri_artifact_path=', - prediction_docker_uri_output, - '", "--baseline_path=', - instance_baseline.uri, - '", "--metadata_path=', - metadata.uri, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_schema_path=', - training_schema_uri.uri, - '", "--instance_schema_path=', - instance_schema_uri, - '", "--prediction_schema_path=', - prediction_schema_uri, - '", "--trials_path=', - trials, - '", "--job_dir=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--training_data_path=' - ), - materialized_train_split.uri, - '", "--validation_data_path=', - materialized_eval_split.uri, - '", "--enable_profiler=', - enable_profiler, - '", "--cache_data=', - cache_data, - '", "--measurement_selection_type=', - study_spec_measurement_selection_type, - '", "--metric_goal=', - study_spec_metric_goal, - '", "--seed=', - seed, - '", "--eval_steps=', - eval_steps, - '", "--eval_frequency_secs=', - eval_frequency_secs, - '"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml deleted file mode 100644 index f6c3308c7f..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ /dev/null @@ -1,4018 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-wide-and-deep-hyperparameter-tuning-job -# Description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. -# Inputs: -# bigquery_staging_full_dataset_id: str [Default: ''] -# cache_data: str [Default: 'auto'] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# enable_profiler: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# eval_frequency_secs: int [Default: 600.0] -# eval_steps: int [Default: 0.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_selection_algorithm: str [Default: 'AMI'] -# location: str -# materialized_examples_format: str [Default: 'tfrecords_gzip'] -# max_failed_trial_count: int [Default: 0.0] -# max_selected_features: int [Default: -1.0] -# max_trial_count: int -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# parallel_trial_count: int -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# seed: int [Default: 1.0] -# stratified_split_key: str [Default: ''] -# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] -# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] -# study_spec_metric_goal: str -# study_spec_metric_id: str -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: ''] -# tf_transformations_path: str [Default: ''] -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# worker_pool_specs_override: list -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - inputDefinitions: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - materialized_examples_format: - componentInputParameter: pipelinechannel--materialized_examples_format - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - model_type: - runtimeValue: - constant: neural_network - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transform_execution_engine: - componentInputParameter: pipelinechannel--tf_transform_execution_engine - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - get-best-hyperparameter-tuning-job-trial: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-best-hyperparameter-tuning-job-trial - dependentTasks: - - wide-and-deep-hyperparameter-tuning-job - inputs: - parameters: - gcp_resources: - taskOutputParameter: - outputParameterKey: gcp_resources - producerTask: wide-and-deep-hyperparameter-tuning-job - instance_schema_uri: - taskOutputParameter: - outputParameterKey: instance_schema_uri - producerTask: wide-and-deep-hyperparameter-tuning-job - prediction_docker_uri: - taskOutputParameter: - outputParameterKey: prediction_docker_uri_output - producerTask: wide-and-deep-hyperparameter-tuning-job - prediction_schema_uri: - taskOutputParameter: - outputParameterKey: prediction_schema_uri - producerTask: wide-and-deep-hyperparameter-tuning-job - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - trials_dir: - taskOutputParameter: - outputParameterKey: trials - producerTask: wide-and-deep-hyperparameter-tuning-job - taskInfo: - name: get-best-hyperparameter-tuning-job-trial - get-wide-and-deep-study-spec-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-wide-and-deep-study-spec-parameters - inputs: - parameters: - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - taskInfo: - name: get-wide-and-deep-study-spec-parameters - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-infra-validator - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - parse-worker-pool-specs-override: - cachingOptions: - enableCache: true - componentRef: - name: comp-parse-worker-pool-specs-override - inputs: - parameters: - worker_pool_specs_override: - componentInputParameter: pipelinechannel--worker_pool_specs_override - taskInfo: - name: parse-worker-pool-specs-override - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - wide-and-deep-hyperparameter-tuning-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-wide-and-deep-hyperparameter-tuning-job - dependentTasks: - - feature-transform-engine - - get-wide-and-deep-study-spec-parameters - - parse-worker-pool-specs-override - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - cache_data: - componentInputParameter: pipelinechannel--cache_data - enable_profiler: - componentInputParameter: pipelinechannel--enable_profiler - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - eval_frequency_secs: - componentInputParameter: pipelinechannel--eval_frequency_secs - eval_steps: - componentInputParameter: pipelinechannel--eval_steps - location: - componentInputParameter: pipelinechannel--location - max_failed_trial_count: - componentInputParameter: pipelinechannel--max_failed_trial_count - max_trial_count: - componentInputParameter: pipelinechannel--max_trial_count - parallel_trial_count: - componentInputParameter: pipelinechannel--parallel_trial_count - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - seed: - componentInputParameter: pipelinechannel--seed - study_spec_algorithm: - componentInputParameter: pipelinechannel--study_spec_algorithm - study_spec_measurement_selection_type: - componentInputParameter: pipelinechannel--study_spec_measurement_selection_type - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - study_spec_metric_id: - componentInputParameter: pipelinechannel--study_spec_metric_id - study_spec_parameters_override: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-wide-and-deep-study-spec-parameters - target_column: - componentInputParameter: pipelinechannel--target_column - training_disk_spec: - taskOutputParameter: - outputParameterKey: training_disk_spec - producerTask: parse-worker-pool-specs-override - training_machine_spec: - taskOutputParameter: - outputParameterKey: training_machine_spec - producerTask: parse-worker-pool-specs-override - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: wide-and-deep-hyperparameter-tuning-job - inputDefinitions: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--cache_data: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--enable_profiler: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eval_frequency_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--eval_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--materialized_examples_format: - parameterType: STRING - pipelinechannel--max_failed_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--max_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--parallel_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_algorithm: - parameterType: STRING - pipelinechannel--study_spec_measurement_selection_type: - parameterType: STRING - pipelinechannel--study_spec_metric_goal: - parameterType: STRING - pipelinechannel--study_spec_metric_id: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transform_execution_engine: - parameterType: STRING - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--worker_pool_specs_override: - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-get-best-hyperparameter-tuning-job-trial: - executorLabel: exec-get-best-hyperparameter-tuning-job-trial - inputDefinitions: - parameters: - gcp_resources: - description: Proto tracking the hyperparameter tuning job. - parameterType: STRING - instance_schema_uri: - defaultValue: '' - description: The instance schema uri. - isOptional: true - parameterType: STRING - prediction_docker_uri: - defaultValue: '' - description: The prediction docker container uri. - isOptional: true - parameterType: STRING - prediction_schema_uri: - defaultValue: '' - description: The prediction schema_uri. - isOptional: true - parameterType: STRING - read_value_from_file: - defaultValue: false - description: If true, read file to get the relevant value. - isOptional: true - parameterType: BOOLEAN - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - trials_dir: - defaultValue: '' - description: The path to the hyperparameter tuning trials. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-get-wide-and-deep-study-spec-parameters: - executorLabel: exec-get-wide-and-deep-study-spec-parameters - inputDefinitions: - parameters: - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-parse-worker-pool-specs-override: - executorLabel: exec-parse-worker-pool-specs-override - inputDefinitions: - parameters: - worker_pool_specs_override: - description: 'The list of dictionaries for overriding training - - and evaluation worker pool specs.' - parameterType: LIST - outputDefinitions: - parameters: - eval_machine_spec: - description: The eval machine spec. - parameterType: STRUCT - eval_replica_count: - description: The replica count for eval. - parameterType: NUMBER_INTEGER - training_disk_spec: - description: The training disk spec. - parameterType: STRUCT - training_machine_spec: - description: The training machine spec. - parameterType: STRUCT - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - comp-wide-and-deep-hyperparameter-tuning-job: - executorLabel: exec-wide-and-deep-hyperparameter-tuning-job - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to a JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized validation split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Amount of time in seconds to run the trainer for. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to transform output. - parameters: - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to - - ''auto'', caching is determined based on the dataset size.' - isOptional: true - parameterType: STRING - enable_profiler: - defaultValue: false - description: 'Enables profiling and saves a trace - - during evaluation.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and - - checkpointing will take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not - - specified or negative, it means run evaluation on the whole validation - - dataset. If set to 0, it means run evaluation for a fixed number of - - samples.' - isOptional: true - parameterType: NUMBER_INTEGER - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that - - need to be seen before failing the HyperparameterTuningJob. If set to - 0, - - Vertex AI decides how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - parallel_trial_count: - description: 'The desired number of trials to run - - in parallel.' - parameterType: NUMBER_INTEGER - prediction_type: - description: 'The type of prediction the model is to - - produce. "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for - - the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or - - ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement - - to use if/when the service automatically selects the final measurement - - from previously reported intermediate measurements. One of - - "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, - - possible values: "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize, , possible - - values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', - ''auc'', ''precision'', ''recall''].' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries - - representing parameters to optimize. The dictionary key is the - - parameter_id, which is passed to training job as a command line - - argument, and the dictionary value is the parameter specification of the - - metric.' - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - training_disk_spec: - defaultValue: - boot_disk_size_gb: 100.0 - boot_disk_type: pd-ssd - description: The training disk spec. - isOptional: true - parameterType: STRUCT - training_machine_spec: - defaultValue: - machine_type: c2-standard-16 - description: 'The training machine - - spec. See https://cloud.google.com/compute/docs/machine-types for - - options.' - isOptional: true - parameterType: STRUCT - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - execution_metrics: - description: Core metrics in dictionary of hyperparameter tuning job execution. - parameterType: STRUCT - gcp_resources: - description: Serialized gcp_resources proto tracking the custom training - job. - parameterType: STRING - instance_schema_uri: - description: The path to the instance schema. - parameterType: STRING - prediction_docker_uri_output: - description: The URI of the prediction container. - parameterType: STRING - prediction_schema_uri: - description: The path to the prediction schema. - parameterType: STRING - trials: - description: The path to the hyperparameter tuning trials - parameterType: STRING -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-get-best-hyperparameter-tuning-job-trial: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_best_hyperparameter_tuning_job_trial - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ - \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ - \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ - \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ - \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ - \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ - \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ - \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ - .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ - \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ - \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ - \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ - \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ - \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ - \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ - \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ - \ provided, read the file before continuing.\n if read_value_from_file:\n\ - \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ - \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ - \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ - \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ - \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ - \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ - \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ - \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ - \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ - \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ - \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ - \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ - \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ - \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ - \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ - \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ - \ for trial in response.trials:\n if trial.final_measurement:\n \ - \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ - \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ - \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ - \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ - \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ - \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ - \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ - \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ - \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ - \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ - \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ - \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim - exec-get-wide-and-deep-study-spec-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_wide_and_deep_study_spec_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_wide_and_deep_study_spec_parameters(\n study_spec_parameters_override:\ - \ list # Required for KFP validation; pylint:disable=g-bare-generic\n)\ - \ -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ - \ study_spec_parameters for a Wide & Deep hyperparameter tuning job.\n\n\ - \ Args:\n study_spec_parameters_override: List of dictionaries representing\ - \ parameters\n to optimize. The dictionary key is the parameter_id,\ - \ which is passed to\n training job as a command line argument, and\ - \ the dictionary value is the\n parameter specification of the metric.\n\ - \n Returns:\n List of final Vizier study_spec_parameters of type ParameterSpec.\n\ - \ \"\"\"\n default_params = [\n {\n 'parameter_id': 'max_steps',\n\ - \ 'discrete_value_spec': {\n 'values': [5000, 10000,\ - \ 20000, 30000, 40000, 50000]\n },\n },\n {\n \ - \ 'parameter_id': 'max_train_secs',\n 'discrete_value_spec':\ - \ {'values': [-1]},\n },\n {\n 'parameter_id': 'learning_rate',\n\ - \ 'double_value_spec': {'min_value': 0.0001, 'max_value': 0.0005},\n\ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ - \ 'parameter_id': 'optimizer_type',\n 'categorical_value_spec':\ - \ {'values': ['adam', 'ftrl', 'sgd']},\n },\n {\n 'parameter_id':\ - \ 'l1_regularization_strength',\n 'discrete_value_spec': {'values':\ - \ [0, 0.01, 0.02]},\n },\n {\n 'parameter_id': 'l2_regularization_strength',\n\ - \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ - \ {\n 'parameter_id': 'l2_shrinkage_regularization_strength',\n\ - \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ - \ {\n 'parameter_id': 'beta_1',\n 'discrete_value_spec':\ - \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ - \ 'beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9, 0.999]},\n\ - \ },\n {\n 'parameter_id': 'hidden_units',\n \ - \ 'categorical_value_spec': {'values': ['30,30,30']},\n },\n \ - \ {\n 'parameter_id': 'use_wide',\n 'categorical_value_spec':\ - \ {'values': ['true', 'false']},\n },\n {\n 'parameter_id':\ - \ 'embed_categories',\n 'categorical_value_spec': {'values': ['true',\ - \ 'false']},\n },\n {\n 'parameter_id': 'dnn_dropout',\n\ - \ 'discrete_value_spec': {'values': [0, 0.1, 0.2]},\n },\n\ - \ {\n 'parameter_id': 'dnn_learning_rate',\n 'double_value_spec':\ - \ {'min_value': 0.0001, 'max_value': 0.0005},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n {\n 'parameter_id': 'dnn_optimizer_type',\n \ - \ 'categorical_value_spec': {'values': ['adam', 'ftrl', 'sgd']},\n\ - \ },\n {\n 'parameter_id': 'dnn_l1_regularization_strength',\n\ - \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ - \ {\n 'parameter_id': 'dnn_l2_regularization_strength',\n\ - \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ - \ {\n 'parameter_id': 'dnn_l2_shrinkage_regularization_strength',\n\ - \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ - \ {\n 'parameter_id': 'dnn_beta_1',\n 'discrete_value_spec':\ - \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ - \ 'dnn_beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9,\ - \ 0.999]},\n },\n {\n 'parameter_id': 'batch_size',\n\ - \ 'discrete_value_spec': {'values': [1024, 2048, 4096, 8192, 16384]},\n\ - \ },\n ]\n # pylint:disable=g-import-not-at-top,redefined-outer-name\n\ - \ import warnings\n # pylint:enable=g-import-not-at-top,redefined-outer-name\n\ - \n override_params = {}\n for param in study_spec_parameters_override:\n\ - \ override_params[param['parameter_id']] = param\n\n study_spec_parameters\ - \ = []\n for param in default_params:\n study_spec_parameters.append(\n\ - \ override_params.get(param['parameter_id'], param)\n )\n\n extra_overrides\ - \ = set(override_params) - set(\n p['parameter_id'] for p in default_params\n\ - \ )\n if extra_overrides:\n extra_override_str = ', '.join(extra_overrides)\n\ - \ warnings.warn(\n f'The overrides {extra_override_str} were not\ - \ found in the params and '\n 'will be ignored.'\n )\n\n return\ - \ study_spec_parameters\n\n" - image: python:3.7 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-parse-worker-pool-specs-override: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _parse_worker_pool_specs_override - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ - \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ - \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ - \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ - \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ - \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ - \ The list of dictionaries for overriding training\n and evaluation\ - \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ - \ machine spec.\n training_disk_spec: The training disk spec.\n \ - \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ - \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ - \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ - \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ - \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ - \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ - \ training_machine_spec = worker_pool_specs_override[0].get(\n \ - \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ - \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ - \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ - \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ - \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ - \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ - \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'training_machine_spec',\n \ - \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ - \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ - \ eval_machine_spec,\n eval_replica_count,\n )\n\n" - image: python:3.7 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-wide-and-deep-hyperparameter-tuning-job: - container: - args: - - --type - - HyperparameterTuningJobWithMetrics - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --execution_metrics - - '{{$.outputs.parameters[''execution_metrics''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"wide-and-deep-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", - "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", - "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", - ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", - "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", - "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", - ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", - ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", - ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", - "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", - ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", - "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", - "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", - "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", - "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", - "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", - "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", - "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", - "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--measurement_selection_type=", - "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", - \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", - "\", \"--seed=", "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", - "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", - "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 -pipelineInfo: - description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. - name: automl-tabular-wide-and-deep-hyperparameter-tuning-job -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--cache_data: - componentInputParameter: cache_data - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--enable_profiler: - componentInputParameter: enable_profiler - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eval_frequency_secs: - componentInputParameter: eval_frequency_secs - pipelinechannel--eval_steps: - componentInputParameter: eval_steps - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--materialized_examples_format: - componentInputParameter: materialized_examples_format - pipelinechannel--max_failed_trial_count: - componentInputParameter: max_failed_trial_count - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--max_trial_count: - componentInputParameter: max_trial_count - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--parallel_trial_count: - componentInputParameter: parallel_trial_count - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_algorithm: - componentInputParameter: study_spec_algorithm - pipelinechannel--study_spec_measurement_selection_type: - componentInputParameter: study_spec_measurement_selection_type - pipelinechannel--study_spec_metric_goal: - componentInputParameter: study_spec_metric_goal - pipelinechannel--study_spec_metric_id: - componentInputParameter: study_spec_metric_id - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transform_execution_engine: - componentInputParameter: tf_transform_execution_engine - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--worker_pool_specs_override: - componentInputParameter: worker_pool_specs_override - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Staging directory for BigQuery tables. - isOptional: true - parameterType: STRING - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to ''auto'', caching is - - determined based on the dataset size.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - enable_profiler: - defaultValue: false - description: Enables profiling and saves a trace during evaluation. - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and checkpointing will - - take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not specified or - - negative, it means run evaluation on the whole validation dataset. If set - - to 0, it means run evaluation for a fixed number of samples.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format for the materialized examples. - isOptional: true - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that need to be seen - - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - - how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - parallel_trial_count: - description: The desired number of trials to run in parallel. - parameterType: NUMBER_INTEGER - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for the study. One of - - ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: ' Which measurement to use if/when the - - service automatically selects the final measurement from previously - - reported intermediate measurements. One of "BEST_MEASUREMENT" or - - "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize, possible values: [ ''loss'', - - ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', - - ''recall''].' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: List of auto transform features. - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: '' - description: 'Execution engine to run TF-based - - transformations. Currently supports "dataflow" or "bigquery"' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - worker_pool_specs_override: - description: 'The dictionary for overriding training and - - evaluation worker pool specs. The dictionary should be of format - - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py deleted file mode 100644 index 19eaddb481..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Wide and Deep Trainer component spec.""" - -from typing import Optional - -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def wide_and_deep_trainer( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - learning_rate: float, - dnn_learning_rate: float, - instance_baseline: Input[Artifact], - metadata: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - transform_output: Input[Artifact], - training_schema_uri: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument - weight_column: Optional[str] = '', - max_steps: Optional[int] = -1, - max_train_secs: Optional[int] = -1, - optimizer_type: Optional[str] = 'adam', - l1_regularization_strength: Optional[float] = 0, - l2_regularization_strength: Optional[float] = 0, - l2_shrinkage_regularization_strength: Optional[float] = 0, - beta_1: Optional[float] = 0.9, - beta_2: Optional[float] = 0.999, - hidden_units: Optional[str] = '30,30,30', - use_wide: Optional[bool] = True, - embed_categories: Optional[bool] = True, - dnn_dropout: Optional[float] = 0, - dnn_optimizer_type: Optional[str] = 'ftrl', - dnn_l1_regularization_strength: Optional[float] = 0, - dnn_l2_regularization_strength: Optional[float] = 0, - dnn_l2_shrinkage_regularization_strength: Optional[float] = 0, - dnn_beta_1: Optional[float] = 0.9, - dnn_beta_2: Optional[float] = 0.999, - enable_profiler: Optional[bool] = False, - cache_data: Optional[str] = 'auto', - seed: Optional[int] = 1, - eval_steps: Optional[int] = 0, - batch_size: Optional[int] = 100, - measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', - optimization_metric: Optional[str] = '', - eval_frequency_secs: Optional[int] = 600, - training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, - training_disk_spec: Optional[dict] = { - 'boot_disk_type': 'pd-ssd', - 'boot_disk_size_gb': 100, - }, - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Trains a Wide & Deep model using Vertex CustomJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". - weight_column: The weight column name. - max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the - trainer for. - learning_rate: The learning rate used by the linear optimizer. - optimizer_type: The type of optimizer to use. Choices are - "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent - Optimizers, respectively. - l1_regularization_strength: L1 regularization strength - for optimizer_type="ftrl". - l2_regularization_strength: L2 regularization strength - for optimizer_type="ftrl" - l2_shrinkage_regularization_strength: L2 shrinkage - regularization strength for optimizer_type="ftrl". - beta_1: Beta 1 value for optimizer_type="adam". - beta_2: Beta 2 value for optimizer_type="adam". - hidden_units: Hidden layer sizes to use for DNN feature - columns, provided in comma-separated layers. - use_wide: If set to true, the categorical columns will be - used in the wide part of the DNN model. - embed_categories: If set to true, the categorical columns - will be used embedded and used in the deep part of the model. Embedding - size is the square root of the column cardinality. - dnn_dropout: The probability we will drop out a given - coordinate. - dnn_learning_rate: The learning rate for training the - deep part of the model. - dnn_optimizer_type: The type of optimizer to use for the - deep part of the model. Choices are "adam", "ftrl" and "sgd". for the - Adam, FTRL, and Gradient Descent Optimizers, respectively. - dnn_l1_regularization_strength: L1 regularization - strength for dnn_optimizer_type="ftrl". - dnn_l2_regularization_strength: L2 regularization - strength for dnn_optimizer_type="ftrl". - dnn_l2_shrinkage_regularization_strength: L2 shrinkage - regularization strength for dnn_optimizer_type="ftrl". - dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". - dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. - seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - batch_size: Batch size for training. - measurement_selection_type: Which measurement to use - if/when the service automatically selects the final measurement from - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - or "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. - training_disk_spec: The training disk spec. - instance_baseline: The path to a JSON file for baseline values. - metadata: Amount of time in seconds to run the trainer for. - materialized_train_split: The path to the materialized train split. - materialized_eval_split: The path to the materialized validation split. - transform_output: The path to transform output. - training_schema_uri: The path to the training schema. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training job. - unmanaged_container_model: The UnmanagedContainerModel artifact. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "wide-and-deep-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', - '1', - '", "machine_spec": ', - training_machine_spec, - ', "disk_spec": ', - training_disk_spec, - ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', - '", "args": ["--target_column=', - target_column, - '", "--weight_column=', - weight_column, - '", "--model_type=', - prediction_type, - '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - '", "--baseline_path=', - instance_baseline.uri, - '", "--metadata_path=', - metadata.uri, - '", "--transform_output_path=', - transform_output.uri, - '", "--training_schema_path=', - training_schema_uri.uri, - '", "--job_dir=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--training_data_path=' - ), - materialized_train_split.uri, - '", "--validation_data_path=', - materialized_eval_split.uri, - '", "--max_steps=', - max_steps, - '", "--max_train_secs=', - max_train_secs, - '", "--learning_rate=', - learning_rate, - '", "--optimizer_type=', - optimizer_type, - '", "--l1_regularization_strength=', - l1_regularization_strength, - '", "--l2_regularization_strength=', - l2_regularization_strength, - '", "--l2_shrinkage_regularization_strength=', - l2_shrinkage_regularization_strength, - '", "--beta_1=', - beta_1, - '", "--beta_2=', - beta_2, - '", "--hidden_units=', - hidden_units, - '", "--use_wide=', - use_wide, - '", "--embed_categories=', - embed_categories, - '", "--dnn_dropout=', - dnn_dropout, - '", "--dnn_learning_rate=', - dnn_learning_rate, - '", "--dnn_optimizer_type=', - dnn_optimizer_type, - '", "--dnn_l1_regularization_strength=', - dnn_l1_regularization_strength, - '", "--dnn_l2_regularization_strength=', - dnn_l2_regularization_strength, - '", "--dnn_l2_shrinkage_regularization_strength=', - dnn_l2_shrinkage_regularization_strength, - '", "--dnn_beta_1=', - dnn_beta_1, - '", "--dnn_beta_2=', - dnn_beta_2, - '", "--enable_profiler=', - enable_profiler, - '", "--cache_data=', - cache_data, - '", "--seed=', - seed, - '", "--eval_steps=', - eval_steps, - '", "--batch_size=', - batch_size, - '", "--measurement_selection_type=', - measurement_selection_type, - '", "--optimization_metric=', - optimization_metric, - '", "--eval_frequency_secs=', - eval_frequency_secs, - '", "--executor_input={{$.json_escape[1]}}"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml deleted file mode 100644 index 748711a0dd..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ /dev/null @@ -1,4048 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-wide-and-deep-trainer -# Description: The Wide & Deep training pipeline. -# Inputs: -# batch_size: int [Default: 100.0] -# beta_1: float [Default: 0.9] -# beta_2: float [Default: 0.999] -# bigquery_staging_full_dataset_id: str [Default: ''] -# cache_data: str [Default: 'auto'] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# dnn_beta_1: float [Default: 0.9] -# dnn_beta_2: float [Default: 0.999] -# dnn_dropout: float [Default: 0.0] -# dnn_l1_regularization_strength: float [Default: 0.0] -# dnn_l2_regularization_strength: float [Default: 0.0] -# dnn_l2_shrinkage_regularization_strength: float [Default: 0.0] -# dnn_learning_rate: float -# dnn_optimizer_type: str [Default: 'adam'] -# embed_categories: bool [Default: True] -# enable_profiler: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# eval_frequency_secs: int [Default: 600.0] -# eval_steps: int [Default: 0.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_selection_algorithm: str [Default: 'AMI'] -# hidden_units: str [Default: '30,30,30'] -# l1_regularization_strength: float [Default: 0.0] -# l2_regularization_strength: float [Default: 0.0] -# l2_shrinkage_regularization_strength: float [Default: 0.0] -# learning_rate: float -# location: str -# materialized_examples_format: str [Default: 'tfrecords_gzip'] -# max_selected_features: int [Default: -1.0] -# max_steps: int [Default: -1.0] -# max_train_secs: int [Default: -1.0] -# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# optimization_metric: str [Default: ''] -# optimizer_type: str [Default: 'adam'] -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# seed: int [Default: 1.0] -# stratified_split_key: str [Default: ''] -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transform_execution_engine: str [Default: ''] -# tf_transformations_path: str [Default: ''] -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# use_wide: bool [Default: True] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# worker_pool_specs_override: list -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--wide-and-deep-trainer-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - inputDefinitions: - artifacts: - pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - wide-and-deep-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: wide-and-deep-trainer - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - wide-and-deep-trainer - inputs: - artifacts: - pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: wide-and-deep-trainer - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - materialized_examples_format: - componentInputParameter: pipelinechannel--materialized_examples_format - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - model_type: - runtimeValue: - constant: neural_network - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transform_execution_engine: - componentInputParameter: pipelinechannel--tf_transform_execution_engine - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-infra-validator - - wide-and-deep-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: wide-and-deep-trainer - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - parse-worker-pool-specs-override: - cachingOptions: - enableCache: true - componentRef: - name: comp-parse-worker-pool-specs-override - inputs: - parameters: - worker_pool_specs_override: - componentInputParameter: pipelinechannel--worker_pool_specs_override - taskInfo: - name: parse-worker-pool-specs-override - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - wide-and-deep-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-wide-and-deep-trainer - dependentTasks: - - feature-transform-engine - - parse-worker-pool-specs-override - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: training-configurator-and-validator - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - batch_size: - componentInputParameter: pipelinechannel--batch_size - beta_1: - componentInputParameter: pipelinechannel--beta_1 - beta_2: - componentInputParameter: pipelinechannel--beta_2 - cache_data: - componentInputParameter: pipelinechannel--cache_data - dnn_beta_1: - componentInputParameter: pipelinechannel--dnn_beta_1 - dnn_beta_2: - componentInputParameter: pipelinechannel--dnn_beta_2 - dnn_dropout: - componentInputParameter: pipelinechannel--dnn_dropout - dnn_l1_regularization_strength: - componentInputParameter: pipelinechannel--dnn_l1_regularization_strength - dnn_l2_regularization_strength: - componentInputParameter: pipelinechannel--dnn_l2_regularization_strength - dnn_l2_shrinkage_regularization_strength: - componentInputParameter: pipelinechannel--dnn_l2_shrinkage_regularization_strength - dnn_learning_rate: - componentInputParameter: pipelinechannel--dnn_learning_rate - dnn_optimizer_type: - componentInputParameter: pipelinechannel--dnn_optimizer_type - embed_categories: - componentInputParameter: pipelinechannel--embed_categories - enable_profiler: - componentInputParameter: pipelinechannel--enable_profiler - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - eval_frequency_secs: - componentInputParameter: pipelinechannel--eval_frequency_secs - eval_steps: - componentInputParameter: pipelinechannel--eval_steps - hidden_units: - componentInputParameter: pipelinechannel--hidden_units - l1_regularization_strength: - componentInputParameter: pipelinechannel--l1_regularization_strength - l2_regularization_strength: - componentInputParameter: pipelinechannel--l2_regularization_strength - l2_shrinkage_regularization_strength: - componentInputParameter: pipelinechannel--l2_shrinkage_regularization_strength - learning_rate: - componentInputParameter: pipelinechannel--learning_rate - location: - componentInputParameter: pipelinechannel--location - max_steps: - componentInputParameter: pipelinechannel--max_steps - max_train_secs: - componentInputParameter: pipelinechannel--max_train_secs - measurement_selection_type: - componentInputParameter: pipelinechannel--measurement_selection_type - optimization_metric: - componentInputParameter: pipelinechannel--optimization_metric - optimizer_type: - componentInputParameter: pipelinechannel--optimizer_type - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - seed: - componentInputParameter: pipelinechannel--seed - target_column: - componentInputParameter: pipelinechannel--target_column - training_disk_spec: - taskOutputParameter: - outputParameterKey: training_disk_spec - producerTask: parse-worker-pool-specs-override - training_machine_spec: - taskOutputParameter: - outputParameterKey: training_machine_spec - producerTask: parse-worker-pool-specs-override - use_wide: - componentInputParameter: pipelinechannel--use_wide - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: wide-and-deep-trainer - inputDefinitions: - parameters: - pipelinechannel--batch_size: - parameterType: NUMBER_INTEGER - pipelinechannel--beta_1: - parameterType: NUMBER_DOUBLE - pipelinechannel--beta_2: - parameterType: NUMBER_DOUBLE - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--cache_data: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--dnn_beta_1: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_beta_2: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_dropout: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_l1_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_l2_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_l2_shrinkage_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_learning_rate: - parameterType: NUMBER_DOUBLE - pipelinechannel--dnn_optimizer_type: - parameterType: STRING - pipelinechannel--embed_categories: - parameterType: BOOLEAN - pipelinechannel--enable_profiler: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eval_frequency_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--eval_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--hidden_units: - parameterType: STRING - pipelinechannel--l1_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--l2_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--l2_shrinkage_regularization_strength: - parameterType: NUMBER_DOUBLE - pipelinechannel--learning_rate: - parameterType: NUMBER_DOUBLE - pipelinechannel--location: - parameterType: STRING - pipelinechannel--materialized_examples_format: - parameterType: STRING - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--max_steps: - parameterType: NUMBER_INTEGER - pipelinechannel--max_train_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--measurement_selection_type: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--optimization_metric: - parameterType: STRING - pipelinechannel--optimizer_type: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transform_execution_engine: - parameterType: STRING - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--use_wide: - parameterType: BOOLEAN - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - pipelinechannel--worker_pool_specs_override: - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-parse-worker-pool-specs-override: - executorLabel: exec-parse-worker-pool-specs-override - inputDefinitions: - parameters: - worker_pool_specs_override: - description: 'The list of dictionaries for overriding training - - and evaluation worker pool specs.' - parameterType: LIST - outputDefinitions: - parameters: - eval_machine_spec: - description: The eval machine spec. - parameterType: STRUCT - eval_replica_count: - description: The replica count for eval. - parameterType: NUMBER_INTEGER - training_disk_spec: - description: The training disk spec. - parameterType: STRUCT - training_machine_spec: - description: The training machine spec. - parameterType: STRUCT - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - comp-wide-and-deep-trainer: - executorLabel: exec-wide-and-deep-trainer - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to a JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized validation split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the materialized train split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Amount of time in seconds to run the trainer for. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The path to transform output. - parameters: - batch_size: - defaultValue: 100.0 - description: Batch size for training. - isOptional: true - parameterType: NUMBER_INTEGER - beta_1: - defaultValue: 0.9 - description: Beta 1 value for optimizer_type="adam". - isOptional: true - parameterType: NUMBER_DOUBLE - beta_2: - defaultValue: 0.999 - description: Beta 2 value for optimizer_type="adam". - isOptional: true - parameterType: NUMBER_DOUBLE - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to - - ''auto'', caching is determined based on the dataset size.' - isOptional: true - parameterType: STRING - dnn_beta_1: - defaultValue: 0.9 - description: Beta 1 value for dnn_optimizer_type="adam". - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_beta_2: - defaultValue: 0.999 - description: Beta 2 value for dnn_optimizer_type="adam". - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_dropout: - defaultValue: 0.0 - description: 'The probability we will drop out a given - - coordinate.' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l1_regularization_strength: - defaultValue: 0.0 - description: 'L1 regularization - - strength for dnn_optimizer_type="ftrl".' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l2_regularization_strength: - defaultValue: 0.0 - description: 'L2 regularization - - strength for dnn_optimizer_type="ftrl".' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l2_shrinkage_regularization_strength: - defaultValue: 0.0 - description: 'L2 shrinkage - - regularization strength for dnn_optimizer_type="ftrl".' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_learning_rate: - description: 'The learning rate for training the - - deep part of the model.' - parameterType: NUMBER_DOUBLE - dnn_optimizer_type: - defaultValue: ftrl - description: 'The type of optimizer to use for the - - deep part of the model. Choices are "adam", "ftrl" and "sgd". for the - - Adam, FTRL, and Gradient Descent Optimizers, respectively.' - isOptional: true - parameterType: STRING - embed_categories: - defaultValue: true - description: 'If set to true, the categorical columns - - will be used embedded and used in the deep part of the model. Embedding - - size is the square root of the column cardinality.' - isOptional: true - parameterType: BOOLEAN - enable_profiler: - defaultValue: false - description: 'Enables profiling and saves a trace - - during evaluation.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and - - checkpointing will take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not - - specified or negative, it means run evaluation on the whole validation - - dataset. If set to 0, it means run evaluation for a fixed number of - - samples.' - isOptional: true - parameterType: NUMBER_INTEGER - hidden_units: - defaultValue: 30,30,30 - description: 'Hidden layer sizes to use for DNN feature - - columns, provided in comma-separated layers.' - isOptional: true - parameterType: STRING - l1_regularization_strength: - defaultValue: 0.0 - description: 'L1 regularization strength - - for optimizer_type="ftrl".' - isOptional: true - parameterType: NUMBER_DOUBLE - l2_regularization_strength: - defaultValue: 0.0 - description: 'L2 regularization strength - - for optimizer_type="ftrl"' - isOptional: true - parameterType: NUMBER_DOUBLE - l2_shrinkage_regularization_strength: - defaultValue: 0.0 - description: 'L2 shrinkage - - regularization strength for optimizer_type="ftrl".' - isOptional: true - parameterType: NUMBER_DOUBLE - learning_rate: - description: The learning rate used by the linear optimizer. - parameterType: NUMBER_DOUBLE - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_steps: - defaultValue: -1.0 - description: Number of steps to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - max_train_secs: - defaultValue: -1.0 - description: 'Amount of time in seconds to run the - - trainer for.' - isOptional: true - parameterType: NUMBER_INTEGER - measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement to use - - if/when the service automatically selects the final measurement from - - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - - or "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - optimization_metric: - defaultValue: '' - description: 'Optimization metric used for - - `measurement_selection_type`. Default is "rmse" for regression and "auc" - - for classification.' - isOptional: true - parameterType: STRING - optimizer_type: - defaultValue: adam - description: 'The type of optimizer to use. Choices are - - "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent - - Optimizers, respectively.' - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to - - produce. "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - target_column: - description: The target column name. - parameterType: STRING - training_disk_spec: - defaultValue: - boot_disk_size_gb: 100.0 - boot_disk_type: pd-ssd - description: The training disk spec. - isOptional: true - parameterType: STRUCT - training_machine_spec: - defaultValue: - machine_type: c2-standard-16 - description: 'The training machine - - spec. See https://cloud.google.com/compute/docs/machine-types for - - options.' - isOptional: true - parameterType: STRUCT - use_wide: - defaultValue: true - description: 'If set to true, the categorical columns will be - - used in the wide part of the DNN model.' - isOptional: true - parameterType: BOOLEAN - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: The UnmanagedContainerModel artifact. - parameters: - gcp_resources: - description: Serialized gcp_resources proto tracking the custom training - job. - parameterType: STRING -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-parse-worker-pool-specs-override: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _parse_worker_pool_specs_override - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ - \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ - \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ - \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ - \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ - \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ - \ The list of dictionaries for overriding training\n and evaluation\ - \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ - \ machine spec.\n training_disk_spec: The training disk spec.\n \ - \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ - \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ - \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ - \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ - \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ - \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ - \ training_machine_spec = worker_pool_specs_override[0].get(\n \ - \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ - \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ - \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ - \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ - \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ - \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ - \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'training_machine_spec',\n \ - \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ - \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ - \ eval_machine_spec,\n eval_replica_count,\n )\n\n" - image: python:3.7 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-wide-and-deep-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"wide-and-deep-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", - "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", - ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", - "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", - "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", - "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", - "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", - "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--optimizer_type=", - "{{$.inputs.parameters[''optimizer_type'']}}", "\", \"--l1_regularization_strength=", - "{{$.inputs.parameters[''l1_regularization_strength'']}}", "\", \"--l2_regularization_strength=", - "{{$.inputs.parameters[''l2_regularization_strength'']}}", "\", \"--l2_shrinkage_regularization_strength=", - "{{$.inputs.parameters[''l2_shrinkage_regularization_strength'']}}", "\", - \"--beta_1=", "{{$.inputs.parameters[''beta_1'']}}", "\", \"--beta_2=", - "{{$.inputs.parameters[''beta_2'']}}", "\", \"--hidden_units=", "{{$.inputs.parameters[''hidden_units'']}}", - "\", \"--use_wide=", "{{$.inputs.parameters[''use_wide'']}}", "\", \"--embed_categories=", - "{{$.inputs.parameters[''embed_categories'']}}", "\", \"--dnn_dropout=", - "{{$.inputs.parameters[''dnn_dropout'']}}", "\", \"--dnn_learning_rate=", - "{{$.inputs.parameters[''dnn_learning_rate'']}}", "\", \"--dnn_optimizer_type=", - "{{$.inputs.parameters[''dnn_optimizer_type'']}}", "\", \"--dnn_l1_regularization_strength=", - "{{$.inputs.parameters[''dnn_l1_regularization_strength'']}}", "\", \"--dnn_l2_regularization_strength=", - "{{$.inputs.parameters[''dnn_l2_regularization_strength'']}}", "\", \"--dnn_l2_shrinkage_regularization_strength=", - "{{$.inputs.parameters[''dnn_l2_shrinkage_regularization_strength'']}}", - "\", \"--dnn_beta_1=", "{{$.inputs.parameters[''dnn_beta_1'']}}", "\", \"--dnn_beta_2=", - "{{$.inputs.parameters[''dnn_beta_2'']}}", "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", - "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", - "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", - "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", - "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", - "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", - "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 -pipelineInfo: - description: The Wide & Deep training pipeline. - name: automl-tabular-wide-and-deep-trainer -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--batch_size: - componentInputParameter: batch_size - pipelinechannel--beta_1: - componentInputParameter: beta_1 - pipelinechannel--beta_2: - componentInputParameter: beta_2 - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--cache_data: - componentInputParameter: cache_data - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--dnn_beta_1: - componentInputParameter: dnn_beta_1 - pipelinechannel--dnn_beta_2: - componentInputParameter: dnn_beta_2 - pipelinechannel--dnn_dropout: - componentInputParameter: dnn_dropout - pipelinechannel--dnn_l1_regularization_strength: - componentInputParameter: dnn_l1_regularization_strength - pipelinechannel--dnn_l2_regularization_strength: - componentInputParameter: dnn_l2_regularization_strength - pipelinechannel--dnn_l2_shrinkage_regularization_strength: - componentInputParameter: dnn_l2_shrinkage_regularization_strength - pipelinechannel--dnn_learning_rate: - componentInputParameter: dnn_learning_rate - pipelinechannel--dnn_optimizer_type: - componentInputParameter: dnn_optimizer_type - pipelinechannel--embed_categories: - componentInputParameter: embed_categories - pipelinechannel--enable_profiler: - componentInputParameter: enable_profiler - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eval_frequency_secs: - componentInputParameter: eval_frequency_secs - pipelinechannel--eval_steps: - componentInputParameter: eval_steps - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--hidden_units: - componentInputParameter: hidden_units - pipelinechannel--l1_regularization_strength: - componentInputParameter: l1_regularization_strength - pipelinechannel--l2_regularization_strength: - componentInputParameter: l2_regularization_strength - pipelinechannel--l2_shrinkage_regularization_strength: - componentInputParameter: l2_shrinkage_regularization_strength - pipelinechannel--learning_rate: - componentInputParameter: learning_rate - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--materialized_examples_format: - componentInputParameter: materialized_examples_format - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--max_steps: - componentInputParameter: max_steps - pipelinechannel--max_train_secs: - componentInputParameter: max_train_secs - pipelinechannel--measurement_selection_type: - componentInputParameter: measurement_selection_type - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--optimization_metric: - componentInputParameter: optimization_metric - pipelinechannel--optimizer_type: - componentInputParameter: optimizer_type - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transform_execution_engine: - componentInputParameter: tf_transform_execution_engine - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--use_wide: - componentInputParameter: use_wide - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - pipelinechannel--worker_pool_specs_override: - componentInputParameter: worker_pool_specs_override - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - batch_size: - defaultValue: 100.0 - description: Batch size for training. - isOptional: true - parameterType: NUMBER_INTEGER - beta_1: - defaultValue: 0.9 - description: Beta 1 value for optimizer_type='adam'. - isOptional: true - parameterType: NUMBER_DOUBLE - beta_2: - defaultValue: 0.999 - description: Beta 2 value for optimizer_type='adam'. - isOptional: true - parameterType: NUMBER_DOUBLE - bigquery_staging_full_dataset_id: - defaultValue: '' - description: Staging directory for BigQuery tables. - isOptional: true - parameterType: STRING - cache_data: - defaultValue: auto - description: 'Whether to cache data or not. If set to ''auto'', caching is - - determined based on the dataset size.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - dnn_beta_1: - defaultValue: 0.9 - description: Beta 1 value for dnn_optimizer_type='adam'. - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_beta_2: - defaultValue: 0.999 - description: Beta 2 value for dnn_optimizer_type='adam'. - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_dropout: - defaultValue: 0.0 - description: The probability we will drop out a given coordinate. - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l1_regularization_strength: - defaultValue: 0.0 - description: 'L1 regularization strength for - - dnn_optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l2_regularization_strength: - defaultValue: 0.0 - description: 'L2 regularization strength for - - dnn_optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_l2_shrinkage_regularization_strength: - defaultValue: 0.0 - description: 'L2 shrinkage regularization - - strength for dnn_optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - dnn_learning_rate: - description: 'The learning rate for training the deep part of the - - model.' - parameterType: NUMBER_DOUBLE - dnn_optimizer_type: - defaultValue: adam - description: 'The type of optimizer to use for the deep part of the - - model. Choices are ''adam'', ''ftrl'' and ''sgd''. for the Adam, FTRL, and - - Gradient Descent Optimizers, respectively.' - isOptional: true - parameterType: STRING - embed_categories: - defaultValue: true - description: 'If set to true, the categorical columns will be used - - embedded and used in the deep part of the model. Embedding size is the - - square root of the column cardinality.' - isOptional: true - parameterType: BOOLEAN - enable_profiler: - defaultValue: false - description: Enables profiling and saves a trace during evaluation. - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_frequency_secs: - defaultValue: 600.0 - description: 'Frequency at which evaluation and checkpointing will - - take place.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_steps: - defaultValue: 0.0 - description: 'Number of steps to run evaluation for. If not specified or - - negative, it means run evaluation on the whole validation dataset. If set - - to 0, it means run evaluation for a fixed number of samples.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - hidden_units: - defaultValue: 30,30,30 - description: 'Hidden layer sizes to use for DNN feature columns, provided - in - - comma-separated layers.' - isOptional: true - parameterType: STRING - l1_regularization_strength: - defaultValue: 0.0 - description: 'L1 regularization strength for - - optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - l2_regularization_strength: - defaultValue: 0.0 - description: 'L2 regularization strength for - - optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - l2_shrinkage_regularization_strength: - defaultValue: 0.0 - description: 'L2 shrinkage regularization strength - - for optimizer_type=''ftrl''.' - isOptional: true - parameterType: NUMBER_DOUBLE - learning_rate: - description: The learning rate used by the linear optimizer. - parameterType: NUMBER_DOUBLE - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: The format for the materialized examples. - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - max_steps: - defaultValue: -1.0 - description: Number of steps to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - max_train_secs: - defaultValue: -1.0 - description: Amount of time in seconds to run the trainer for. - isOptional: true - parameterType: NUMBER_INTEGER - measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement to use if/when the service - - automatically selects the final measurement from previously reported - - intermediate measurements. One of "BEST_MEASUREMENT" or - - "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - optimization_metric: - defaultValue: '' - description: 'Optimization metric used for - - `measurement_selection_type`. Default is "rmse" for regression and "auc" - - for classification.' - isOptional: true - parameterType: STRING - optimizer_type: - defaultValue: adam - description: 'The type of optimizer to use. Choices are "adam", "ftrl" and - - "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively.' - isOptional: true - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - seed: - defaultValue: 1.0 - description: Seed to be used for this run. - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: List of auto transform features. - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: '' - description: 'Execution engine to run TF-based - - transformations. Currently supports "dataflow" or "bigquery"' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - use_wide: - defaultValue: true - description: 'If set to true, the categorical columns will be used in the - wide - - part of the DNN model.' - isOptional: true - parameterType: BOOLEAN - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - worker_pool_specs_override: - description: 'The dictionary for overriding training and - - evaluation worker pool specs. The dictionary should be of format - - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py deleted file mode 100644 index a96e46d984..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML XGBoost Hyperparameter Tuning component spec.""" - -from typing import Optional - -from kfp import dsl - - -@dsl.container_component -def xgboost_hyperparameter_tuning_job( - project: str, - location: str, - study_spec_metric_id: str, - study_spec_metric_goal: str, - study_spec_parameters_override: list, - max_trial_count: int, - parallel_trial_count: int, - worker_pool_specs: list, - gcp_resources: dsl.OutputPath(str), - max_failed_trial_count: Optional[int] = 0, - study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', - study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Tunes XGBoost hyperparameters using Vertex HyperparameterTuningJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - study_spec_metric_id: Metric to optimize. For options, - please look under 'eval_metric' at - https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. - max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". - worker_pool_specs: The worker pool specs. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', - ], - args=[ - '--type', - 'HyperparameterTuningJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "xgboost-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "study_spec": {"metrics": [{"metric_id": "', - study_spec_metric_id, - '", "goal": "', - study_spec_metric_goal, - '"}], "parameters": ', - study_spec_parameters_override, - ', "algorithm": "', - study_spec_algorithm, - '", "measurement_selection_type": "', - study_spec_measurement_selection_type, - '"}, "max_trial_count": ', - max_trial_count, - ', "parallel_trial_count": ', - parallel_trial_count, - ', "max_failed_trial_count": ', - max_failed_trial_count, - ', "trial_job_spec": {"worker_pool_specs": ', - worker_pool_specs, - '}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml deleted file mode 100644 index 8c3017aa09..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +++ /dev/null @@ -1,4332 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-xgboost-hyperparameter-tuning-job -# Description: The XGBoost HyperparameterTuningJob pipeline. -# Inputs: -# bigquery_staging_full_dataset_id: str [Default: ''] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# disable_default_eval_metric: int [Default: 0.0] -# encryption_spec_key_name: str [Default: ''] -# eval_metric: str [Default: ''] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_selection_algorithm: str [Default: 'AMI'] -# location: str -# max_failed_trial_count: int [Default: 0.0] -# max_selected_features: int [Default: -1.0] -# max_trial_count: int -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# objective: str -# parallel_trial_count: int -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: False] -# run_feature_selection: bool [Default: False] -# seed: int [Default: 0.0] -# seed_per_iteration: bool [Default: False] -# stratified_split_key: str [Default: ''] -# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] -# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] -# study_spec_metric_goal: str -# study_spec_metric_id: str -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transformations_path: str [Default: ''] -# training_accelerator_count: int [Default: 0.0] -# training_accelerator_type: str [Default: ''] -# training_fraction: float [Default: -1.0] -# training_machine_type: str [Default: 'c2-standard-16'] -# training_total_replica_count: int [Default: 1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - inputDefinitions: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--get-prediction-type-for-xgboost-Output: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - get-best-hyperparameter-tuning-job-trial - - get-prediction-type-for-xgboost - inputs: - artifacts: - pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--get-prediction-type-for-xgboost-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - dependentTasks: - - get-prediction-type-for-xgboost - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - generate-xgboost-hyperparameter-tuning-worker-pool-specs: - cachingOptions: - enableCache: true - componentRef: - name: comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs - dependentTasks: - - feature-transform-engine - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - accelerator_count: - componentInputParameter: pipelinechannel--training_accelerator_count - accelerator_type: - componentInputParameter: pipelinechannel--training_accelerator_type - disable_default_eval_metric: - componentInputParameter: pipelinechannel--disable_default_eval_metric - eval_metric: - componentInputParameter: pipelinechannel--eval_metric - machine_type: - componentInputParameter: pipelinechannel--training_machine_type - objective: - componentInputParameter: pipelinechannel--objective - seed: - componentInputParameter: pipelinechannel--seed - seed_per_iteration: - componentInputParameter: pipelinechannel--seed_per_iteration - target_column: - componentInputParameter: pipelinechannel--target_column - total_replica_count: - componentInputParameter: pipelinechannel--training_total_replica_count - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: generate-xgboost-hyperparameter-tuning-worker-pool-specs - get-best-hyperparameter-tuning-job-trial: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-best-hyperparameter-tuning-job-trial - dependentTasks: - - generate-xgboost-hyperparameter-tuning-worker-pool-specs - - xgboost-hyperparameter-tuning-job - inputs: - parameters: - gcp_resources: - taskOutputParameter: - outputParameterKey: gcp_resources - producerTask: xgboost-hyperparameter-tuning-job - instance_schema_uri: - taskOutputParameter: - outputParameterKey: instance_schema_path - producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs - prediction_docker_uri: - taskOutputParameter: - outputParameterKey: prediction_docker_uri_artifact_path - producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs - prediction_schema_uri: - taskOutputParameter: - outputParameterKey: prediction_schema_path - producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs - read_value_from_file: - runtimeValue: - constant: 1.0 - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - trials_dir: - taskOutputParameter: - outputParameterKey: trials_path - producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs - taskInfo: - name: get-best-hyperparameter-tuning-job-trial - get-prediction-type-for-xgboost: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-type-for-xgboost - inputs: - parameters: - objective: - componentInputParameter: pipelinechannel--objective - taskInfo: - name: get-prediction-type-for-xgboost - get-xgboost-study-spec-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-xgboost-study-spec-parameters - inputs: - parameters: - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - taskInfo: - name: get-xgboost-study-spec-parameters - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - get-best-hyperparameter-tuning-job-trial - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: get-best-hyperparameter-tuning-job-trial - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - - get-prediction-type-for-xgboost - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - xgboost-hyperparameter-tuning-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-xgboost-hyperparameter-tuning-job - dependentTasks: - - generate-xgboost-hyperparameter-tuning-worker-pool-specs - - get-xgboost-study-spec-parameters - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - max_failed_trial_count: - componentInputParameter: pipelinechannel--max_failed_trial_count - max_trial_count: - componentInputParameter: pipelinechannel--max_trial_count - parallel_trial_count: - componentInputParameter: pipelinechannel--parallel_trial_count - project: - componentInputParameter: pipelinechannel--project - study_spec_algorithm: - componentInputParameter: pipelinechannel--study_spec_algorithm - study_spec_measurement_selection_type: - componentInputParameter: pipelinechannel--study_spec_measurement_selection_type - study_spec_metric_goal: - componentInputParameter: pipelinechannel--study_spec_metric_goal - study_spec_metric_id: - componentInputParameter: pipelinechannel--study_spec_metric_id - study_spec_parameters_override: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-xgboost-study-spec-parameters - worker_pool_specs: - taskOutputParameter: - outputParameterKey: worker_pool_specs - producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs - taskInfo: - name: xgboost-hyperparameter-tuning-job - inputDefinitions: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--disable_default_eval_metric: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eval_metric: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_failed_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--max_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--objective: - parameterType: STRING - pipelinechannel--parallel_trial_count: - parameterType: NUMBER_INTEGER - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--seed_per_iteration: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_algorithm: - parameterType: STRING - pipelinechannel--study_spec_measurement_selection_type: - parameterType: STRING - pipelinechannel--study_spec_metric_goal: - parameterType: STRING - pipelinechannel--study_spec_metric_id: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--training_accelerator_count: - parameterType: NUMBER_INTEGER - pipelinechannel--training_accelerator_type: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_machine_type: - parameterType: STRING - pipelinechannel--training_total_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs: - executorLabel: exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Required. The path to the materialized validation - - split.' - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Required. The path to the materialized train - - split.' - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Required. The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Required. The path to transform output. - parameters: - accelerator_count: - defaultValue: 0.0 - description: Accelerator count. - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: Accelerator type. - isOptional: true - parameterType: STRING - disable_default_eval_metric: - defaultValue: 0.0 - description: 'Flag to disable default metric. Set to >0 to - - disable. Default to 0.' - isOptional: true - parameterType: NUMBER_INTEGER - eval_metric: - defaultValue: '' - description: 'Evaluation metrics for validation data represented as a - - comma-separated string.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: c2-standard-16 - description: Machine type. - isOptional: true - parameterType: STRING - objective: - description: Required. Specifies the learning task and the learning objective. - parameterType: STRING - seed: - defaultValue: 0.0 - description: Random seed. - isOptional: true - parameterType: NUMBER_INTEGER - seed_per_iteration: - defaultValue: false - description: Seed PRNG determnisticly via iterator number. - isOptional: true - parameterType: BOOLEAN - target_column: - description: Required. Target column name. - parameterType: STRING - total_replica_count: - description: Number of workers. - parameterType: NUMBER_INTEGER - weight_column: - defaultValue: '' - description: Weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - job_dir: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - instance_schema_path: - parameterType: STRING - instance_schema_uri: - parameterType: STRING - prediction_docker_uri_artifact_path: - parameterType: STRING - prediction_docker_uri_output: - parameterType: STRING - prediction_schema_path: - parameterType: STRING - prediction_schema_uri: - parameterType: STRING - trials: - parameterType: STRING - trials_path: - parameterType: STRING - worker_pool_specs: - parameterType: LIST - comp-get-best-hyperparameter-tuning-job-trial: - executorLabel: exec-get-best-hyperparameter-tuning-job-trial - inputDefinitions: - parameters: - gcp_resources: - description: Proto tracking the hyperparameter tuning job. - parameterType: STRING - instance_schema_uri: - defaultValue: '' - description: The instance schema uri. - isOptional: true - parameterType: STRING - prediction_docker_uri: - defaultValue: '' - description: The prediction docker container uri. - isOptional: true - parameterType: STRING - prediction_schema_uri: - defaultValue: '' - description: The prediction schema_uri. - isOptional: true - parameterType: STRING - read_value_from_file: - defaultValue: false - description: If true, read file to get the relevant value. - isOptional: true - parameterType: BOOLEAN - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - trials_dir: - defaultValue: '' - description: The path to the hyperparameter tuning trials. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-get-prediction-type-for-xgboost: - executorLabel: exec-get-prediction-type-for-xgboost - inputDefinitions: - parameters: - objective: - description: The XGBoost training objective - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-xgboost-study-spec-parameters: - executorLabel: exec-get-xgboost-study-spec-parameters - inputDefinitions: - parameters: - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - comp-xgboost-hyperparameter-tuning-job: - executorLabel: exec-xgboost-hyperparameter-tuning-job - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that - - need to be seen before failing the HyperparameterTuningJob. If set to - 0, - - Vertex AI decides how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - parallel_trial_count: - description: 'The desired number of trials to run - - in parallel.' - parameterType: NUMBER_INTEGER - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for - - the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or - - ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: 'Which measurement - - to use if/when the service automatically selects the final measurement - - from previously reported intermediate measurements. One of - - "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, - - possible values: "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize. For options, - - please look under ''eval_metric'' at - - https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries - - representing parameters to optimize. The dictionary key is the - - parameter_id, which is passed to training job as a command line - - argument, and the dictionary value is the parameter specification of the - - metric.' - parameterType: LIST - worker_pool_specs: - description: The worker pool specs. - parameterType: LIST - outputDefinitions: - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the custom training - - job.' - parameterType: STRING -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _generate_xgboost_hyperparameter_tuning_worker_pool_specs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _generate_xgboost_hyperparameter_tuning_worker_pool_specs(\n\ - \ total_replica_count: int,\n target_column: str,\n objective:\ - \ str,\n materialized_train_split: dsl.InputPath('MaterializedSplit'),\n\ - \ materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n transform_output:\ - \ dsl.InputPath('TransformOutput'),\n training_schema_uri: dsl.InputPath('DatasetSchema'),\n\ - \ instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ - \ job_dir: dsl.OutputPath('JobDir'),\n instance_schema_uri: dsl.OutputPath(str),\n\ - \ prediction_schema_uri: dsl.OutputPath(str),\n trials: dsl.OutputPath(str),\n\ - \ prediction_docker_uri_output: dsl.OutputPath(str),\n machine_type:\ - \ str = 'c2-standard-16',\n accelerator_type: str = '',\n accelerator_count:\ - \ int = 0,\n weight_column: str = '',\n eval_metric: str = '',\n \ - \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('worker_pool_specs',\ - \ list), # pylint:disable=g-bare-generic\n ('instance_schema_path',\ - \ str),\n ('prediction_schema_path', str),\n ('trials_path',\ - \ str),\n ('prediction_docker_uri_artifact_path', str),\n ],\n\ - ):\n \"\"\"Generates worker pool specs for XGBoost hyperparameter tuning.\n\ - \n For single machine XGBoost training, returns one worker pool spec for\ - \ master.\n For distributed XGBoost training, returns two worker pool specs,\ - \ the first one\n for master and the second one for the remaining workers.\n\ - \n Args:\n total_replica_count: Number of workers.\n target_column:\ - \ Required. Target column name.\n objective: Required. Specifies the\ - \ learning task and the learning objective.\n materialized_train_split:\ - \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ - \ Required. The path to the materialized validation\n split.\n transform_output:\ - \ Required. The path to transform output.\n training_schema_uri: Required.\ - \ The path to the training schema.\n instance_baseline: Path to JSON\ - \ file for baseline values.\n job_dir: Job dir path.\n instance_schema_uri:\ - \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ - \ trials: The trials uri.\n prediction_docker_uri_output: The prediction\ - \ docker container uri.\n machine_type: Machine type.\n accelerator_type:\ - \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ - \ Weight column name.\n eval_metric: Evaluation metrics for validation\ - \ data represented as a\n comma-separated string.\n disable_default_eval_metric:\ - \ Flag to disable default metric. Set to >0 to\n disable. Default to\ - \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ - \ via iterator number.\n\n Raises:\n ValueError: If accelerator_count\ - \ <= 0 and accelerator_type is specified.\n\n Returns:\n Output parameters.\n\ - \ \"\"\"\n import copy\n import collections\n import re\n\n def get_gcs_path(path):\n\ - \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ - \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ - \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ - \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ - \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ - \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ - \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ - \ f'--trials_path={get_gcs_path(trials)}',\n f'--prediction_docker_uri_artifact_path={get_gcs_path(prediction_docker_uri_output)}',\n\ - \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ - \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ - \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ - \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ - \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ - \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ - \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ - \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ - \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325',\n\ - \ ],\n },\n }\n\n # Add optional arguments if set\n if\ - \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ - \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ - \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ - \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ - \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ - \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ - \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ - \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ - \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ - \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ - \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ - \n return collections.namedtuple(\n 'Outputs',\n [\n \ - \ 'worker_pool_specs',\n 'instance_schema_path',\n 'prediction_schema_path',\n\ - \ 'trials_path',\n 'prediction_docker_uri_artifact_path',\n\ - \ ],\n )(\n worker_pool_specs_lst,\n get_gcs_path(instance_schema_uri),\n\ - \ get_gcs_path(prediction_schema_uri),\n get_gcs_path(trials),\n\ - \ get_gcs_path(prediction_docker_uri_output),\n )\n\n" - image: python:3.7 - exec-get-best-hyperparameter-tuning-job-trial: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_best_hyperparameter_tuning_job_trial - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ - \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ - \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ - \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ - \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ - \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ - \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ - \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ - .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ - \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ - \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ - \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ - \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ - \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ - \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ - \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ - \ provided, read the file before continuing.\n if read_value_from_file:\n\ - \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ - \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ - \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ - \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ - \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ - \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ - \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ - \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ - \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ - \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ - \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ - \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ - \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ - \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ - \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ - \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ - \ for trial in response.trials:\n if trial.final_measurement:\n \ - \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ - \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ - \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ - \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ - \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ - \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ - \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ - \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ - \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ - \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ - \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ - \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim - exec-get-prediction-type-for-xgboost: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_type_for_xgboost - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ - \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ - \ objective: The XGBoost training objective\n\n Returns:\n A string.\ - \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ - \ or objective.startswith('multi'):\n return 'classification'\n elif\ - \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ - \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ - \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ - \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ - \ ' multi:softprob].'\n )\n\n" - image: python:3.7 - exec-get-xgboost-study-spec-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_xgboost_study_spec_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_xgboost_study_spec_parameters(\n study_spec_parameters_override:\ - \ list, # Required for KFP validation; pylint:disable=g-bare-generic,unused-argument\n\ - ) -> list: # Required for KFP validation; pylint:disable=g-bare-generic\n\ - \ \"\"\"Get study_spec_parameters for an XGBoost hyperparameter tuning\ - \ job.\n\n Args:\n study_spec_parameters_override: List of dictionaries\ - \ representing parameters\n to optimize. The dictionary key is the\ - \ parameter_id, which is passed to\n training job as a command line\ - \ argument, and the dictionary value is the\n parameter specification\ - \ of the metric.\n\n Returns:\n List of final Vizier study_spec_parameters\ - \ of type ParameterSpec.\n \"\"\"\n # pylint:disable=g-import-not-at-top,redefined-outer-name,reimported\n\ - \ import functools\n import math\n from typing import Any, Dict, List,\ - \ Optional\n # pylint:enable=g-import-not-at-top,redefined-outer-name,reimported\n\ - \n # Need to define constants within the component function\n # pylint:disable=invalid-name\n\ - \ _GBTREE_BOOSTER = 'gbtree'\n _GBLINEAR_BOOSTER = 'gblinear'\n _DART_BOOSTER\ - \ = 'dart'\n _XGBOOST_BOOSTER_PARAMETERS_MAP = {\n 'eta': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER],\n 'gamma': [_GBTREE_BOOSTER, _DART_BOOSTER],\n \ - \ 'max_depth': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'min_child_weight':\ - \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_delta_step': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER],\n 'subsample': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ - \ 'colsample_bytree': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bylevel':\ - \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bynode': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER],\n 'lambda': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n\ - \ 'alpha': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n \ - \ 'tree_method': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'scale_pos_weight':\ - \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'updater': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER, _GBLINEAR_BOOSTER],\n 'refresh_leaf': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER],\n 'process_type': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ - \ 'grow_policy': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'sampling_method':\ - \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'monotone_constraints': [_GBTREE_BOOSTER,\ - \ _DART_BOOSTER],\n 'interaction_constraints': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ - \ 'sample_type': [_DART_BOOSTER],\n 'normalize_type': [_DART_BOOSTER],\n\ - \ 'rate_drop': [_DART_BOOSTER],\n 'one_drop': [_DART_BOOSTER],\n\ - \ 'skip_drop': [_DART_BOOSTER],\n 'num_parallel_tree': [_GBLINEAR_BOOSTER],\n\ - \ 'feature_selector': [_GBLINEAR_BOOSTER],\n 'top_k': [_GBLINEAR_BOOSTER],\n\ - \ 'max_leaves': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_bin':\ - \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n }\n _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS\ - \ = frozenset(\n ['updater', 'monotone_constraints', 'interaction_constraints']\n\ - \ )\n\n def _validate_float_spec(\n parameter_spec: Dict[str, Any],\ - \ lower_bound: float, upper_bound: float\n ) -> None:\n msg = (\n \ - \ f'Parameter spec for {parameter_spec[\"parameter_id\"]} must contain\ - \ '\n 'double_value_spec or discrete_value_spec with float values\ - \ within '\n f'the range of {lower_bound} and {upper_bound} (inclusive)'\n\ - \ )\n if 'double_value_spec' in parameter_spec:\n float_spec\ - \ = parameter_spec['double_value_spec']\n if float_spec['min_value']\ - \ < lower_bound:\n raise ValueError(\n f'{msg}, but got\ - \ {float_spec[\"min_value\"]} for min_value.'\n )\n if float_spec['max_value']\ - \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ - \ {float_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ - \ in parameter_spec:\n float_spec = parameter_spec['discrete_value_spec']\n\ - \ float_values = float_spec['values']\n for val in float_values:\n\ - \ if val < lower_bound or val > upper_bound:\n raise ValueError(f'{msg},\ - \ but got {val} in {float_values}.')\n else:\n raise ValueError(\n\ - \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"\ - ]}. {msg}.'\n )\n\n def _validate_int_spec(\n parameter_spec:\ - \ Dict[str, Any],\n lower_bound: Optional[int],\n upper_bound:\ - \ Optional[int],\n ) -> None:\n msg = (\n f'Parameter spec for\ - \ {parameter_spec[\"parameter_id\"]} must contain '\n 'integer_value_spec\ - \ or discrete_value_spec with integer values within '\n f'the range\ - \ of {lower_bound} and {upper_bound} (inclusive)'\n )\n if 'integer_value_spec'\ - \ in parameter_spec:\n int_spec = parameter_spec['integer_value_spec']\n\ - \ if lower_bound is not None and int_spec['min_value'] < lower_bound:\n\ - \ raise ValueError(\n f'{msg}, but got {int_spec[\"min_value\"\ - ]} for min_value.'\n )\n if upper_bound is not None and int_spec['max_value']\ - \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ - \ {int_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ - \ in parameter_spec:\n int_values = parameter_spec['discrete_value_spec']['values']\n\ - \ for val in int_values:\n if not isinstance(val, int):\n \ - \ raise ValueError(\n f'{msg}, but got non-integer {val}\ - \ with '\n f'type {type(val)} in {int_values}.'\n \ - \ )\n if (lower_bound is not None and val < lower_bound) or (\n \ - \ upper_bound is not None and val > upper_bound\n ):\n\ - \ raise ValueError(f'{msg}, but got {val} in {int_values}.')\n\ - \ else:\n raise ValueError(\n f'Unexpected value spec for\ - \ {parameter_spec[\"parameter_id\"]}. {msg}.'\n )\n\n def _validate_categorical_spec(\n\ - \ parameter_spec: Dict[str, Any], valid_categories: Optional[List[str]]\n\ - \ ) -> None:\n msg = (\n f'Parameter spec for {parameter_spec[\"\ - parameter_id\"]} must contain '\n 'categorical_value_spec with unique\ - \ categories from '\n f'{valid_categories}'\n )\n if 'categorical_value_spec'\ - \ in parameter_spec:\n if valid_categories is None:\n # Any\ - \ category is valid.\n return\n categorical_values = parameter_spec['categorical_value_spec']['values']\n\ - \ valid_categorical_values = set(categorical_values).intersection(\n\ - \ set(valid_categories)\n )\n if len(valid_categorical_values)\ - \ != len(categorical_values):\n raise ValueError(f'{msg}, but got\ - \ {categorical_values}.')\n else:\n raise ValueError(\n \ - \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"]}. {msg}.'\n\ - \ )\n\n _XGBOOST_PARAM_VALIDATIONS = {\n 'num_boost_round': functools.partial(\n\ - \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ - \ 'early_stopping_rounds': functools.partial(\n _validate_int_spec,\ - \ lower_bound=1, upper_bound=None\n ),\n 'base_score': functools.partial(\n\ - \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ - \ 'booster': functools.partial(\n _validate_categorical_spec,\n\ - \ valid_categories=['gbtree', 'gblinear', 'dart'],\n ),\n\ - \ 'eta': functools.partial(\n _validate_float_spec, lower_bound=0,\ - \ upper_bound=1\n ),\n 'gamma': functools.partial(\n \ - \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n ),\n\ - \ 'max_depth': functools.partial(\n _validate_int_spec, lower_bound=0,\ - \ upper_bound=None\n ),\n 'min_child_weight': functools.partial(\n\ - \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n \ - \ ),\n 'max_delta_step': functools.partial(\n _validate_float_spec,\ - \ lower_bound=0, upper_bound=math.inf\n ),\n 'subsample': functools.partial(\n\ - \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ - \ ),\n 'colsample_bytree': functools.partial(\n _validate_float_spec,\ - \ lower_bound=0.0001, upper_bound=1\n ),\n 'colsample_bylevel':\ - \ functools.partial(\n _validate_float_spec, lower_bound=0.0001,\ - \ upper_bound=1\n ),\n 'colsample_bynode': functools.partial(\n\ - \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ - \ ),\n 'lambda': functools.partial(\n _validate_float_spec,\ - \ lower_bound=0, upper_bound=1\n ),\n 'alpha': functools.partial(\n\ - \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ - \ 'tree_method': functools.partial(\n _validate_categorical_spec,\n\ - \ valid_categories=['auto', 'exact', 'approx', 'hist', 'gpu_hist'],\n\ - \ ),\n 'scale_pos_weight': functools.partial(\n _validate_float_spec,\ - \ lower_bound=0, upper_bound=math.inf\n ),\n 'updater': functools.partial(\n\ - \ _validate_categorical_spec, valid_categories=None\n ),\n\ - \ 'refresh_leaf': functools.partial(\n _validate_int_spec,\ - \ lower_bound=0, upper_bound=1\n ),\n 'process_type': functools.partial(\n\ - \ _validate_categorical_spec, valid_categories=['default', 'updated']\n\ - \ ),\n 'grow_policy': functools.partial(\n _validate_categorical_spec,\n\ - \ valid_categories=['depthwise', 'lossguide'],\n ),\n \ - \ 'sampling_method': functools.partial(\n _validate_categorical_spec,\n\ - \ valid_categories=['uniform', 'gradient_based'],\n ),\n \ - \ 'monotone_constraints': functools.partial(\n _validate_categorical_spec,\ - \ valid_categories=None\n ),\n 'interaction_constraints': functools.partial(\n\ - \ _validate_categorical_spec, valid_categories=None\n ),\n\ - \ 'sample_type': functools.partial(\n _validate_categorical_spec,\ - \ valid_categories=['uniform', 'weighted']\n ),\n 'normalize_type':\ - \ functools.partial(\n _validate_categorical_spec, valid_categories=['tree',\ - \ 'forest']\n ),\n 'rate_drop': functools.partial(\n \ - \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n 'one_drop':\ - \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=1\n\ - \ ),\n 'skip_drop': functools.partial(\n _validate_float_spec,\ - \ lower_bound=0, upper_bound=1\n ),\n 'num_parallel_tree': functools.partial(\n\ - \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ - \ 'feature_selector': functools.partial(\n _validate_categorical_spec,\n\ - \ valid_categories=['cyclic', 'shuffle', 'random', 'greedy', 'thrifty'],\n\ - \ ),\n 'top_k': functools.partial(\n _validate_int_spec,\ - \ lower_bound=0, upper_bound=None\n ),\n 'max_cat_to_onehot':\ - \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=None\n\ - \ ),\n 'max_leaves': functools.partial(\n _validate_int_spec,\ - \ lower_bound=0, upper_bound=None\n ),\n 'max_bin': functools.partial(\n\ - \ _validate_int_spec, lower_bound=0, upper_bound=None\n ),\n\ - \ }\n\n def _add_booster_param(\n override_booster_params: Dict[str,\ - \ Any],\n param: Dict[str, Any],\n override_boosters: List[str],\n\ - \ ) -> None:\n # Validate parameter spec.\n param_id = param['parameter_spec']['parameter_id']\n\ - \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param_id]\n validation_func(param['parameter_spec'])\n\ - \ # Add parameter spec for valid boosters.\n parent_boosters = param['parent_categorical_values']['values']\n\ - \ all_boosters = set(_XGBOOST_BOOSTER_PARAMETERS_MAP[param_id]).intersection(\n\ - \ set(override_boosters)\n )\n valid_parent_boosters = set(parent_boosters).intersection(all_boosters)\n\ - \ if valid_parent_boosters:\n override_booster_params[param_id]\ - \ = {}\n for booster in valid_parent_boosters:\n override_booster_params[param_id][booster]\ - \ = param['parameter_spec']\n\n def _get_booster_param_specs(\n override_booster_params:\ - \ Dict[str, Any],\n param_id: str,\n default_param_spec: Optional[Dict[str,\ - \ Any]],\n ) -> List[Dict[str, Any]]:\n if param_id not in override_booster_params:\n\ - \ if default_param_spec is None:\n return []\n return [default_param_spec]\n\ - \ override_param_specs = override_booster_params[param_id]\n if default_param_spec\ - \ is not None:\n for booster in default_param_spec['parent_categorical_values']['values']:\n\ - \ if booster not in override_param_specs:\n override_param_specs[booster]\ - \ = default_param_spec['parameter_spec']\n param_specs = []\n for\ - \ booster, override_spec in override_param_specs.items():\n included\ - \ = False\n for spec in param_specs:\n if spec['parameter_spec']\ - \ == override_spec:\n spec['parent_categorical_values']['values'].append(booster)\n\ - \ included = True\n break\n if not included:\n \ - \ param_specs.append({\n 'parameter_spec': override_spec,\n\ - \ 'parent_categorical_values': {'values': [booster]},\n \ - \ })\n return param_specs\n\n default_params = [\n {\n \ - \ 'parameter_id': 'num_boost_round',\n 'discrete_value_spec':\ - \ {'values': [1, 5, 10, 15, 20]},\n },\n {\n 'parameter_id':\ - \ 'early_stopping_rounds',\n 'discrete_value_spec': {'values':\ - \ [3, 5, 10]},\n },\n {'parameter_id': 'base_score', 'discrete_value_spec':\ - \ {'values': [0.5]}},\n {\n 'parameter_id': 'booster',\n \ - \ 'categorical_value_spec': {'values': ['gbtree', 'gblinear', 'dart']},\n\ - \ 'conditional_parameter_specs': [\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'eta',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_LOG_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'gamma',\n\ - \ 'discrete_value_spec': {\n \ - \ 'values': [0, 10, 50, 100, 500, 1000]\n },\n \ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'max_depth',\n\ - \ 'integer_value_spec': {'min_value': 6, 'max_value':\ - \ 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'min_child_weight',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0,\n 'max_value': 10.0,\n \ - \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'max_delta_step',\n\ - \ 'discrete_value_spec': {\n \ - \ 'values': [0.0, 1.0, 3.0, 5.0, 7.0, 9.0]\n },\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'subsample',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'colsample_bytree',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'colsample_bylevel',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'colsample_bynode',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'lambda',\n\ - \ 'double_value_spec': {\n \ - \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ - \ },\n 'scale_type': 'UNIT_REVERSE_LOG_SCALE',\n\ - \ },\n 'parent_categorical_values': {\n\ - \ 'values': ['gbtree', 'dart', 'gblinear']\n \ - \ },\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'alpha',\n \ - \ 'double_value_spec': {\n 'min_value': 0.0001,\n\ - \ 'max_value': 1.0,\n },\n\ - \ 'scale_type': 'UNIT_LOG_SCALE',\n \ - \ },\n 'parent_categorical_values': {\n \ - \ 'values': ['gbtree', 'dart', 'gblinear']\n },\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'tree_method',\n \ - \ 'categorical_value_spec': {'values': ['auto']},\n \ - \ },\n 'parent_categorical_values': {'values': ['gbtree',\ - \ 'dart']},\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'scale_pos_weight',\n \ - \ 'discrete_value_spec': {'values': [1.0]},\n \ - \ },\n 'parent_categorical_values': {'values': ['gbtree',\ - \ 'dart']},\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'refresh_leaf',\n \ - \ 'discrete_value_spec': {'values': [1]},\n },\n\ - \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'process_type',\n \ - \ 'categorical_value_spec': {'values': ['default']},\n \ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'grow_policy',\n\ - \ 'categorical_value_spec': {'values': ['depthwise']},\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'sampling_method',\n\ - \ 'categorical_value_spec': {'values': ['uniform']},\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gbtree', 'dart']},\n },\n {\n \ - \ 'parameter_spec': {\n 'parameter_id': 'sample_type',\n\ - \ 'categorical_value_spec': {'values': ['uniform']},\n\ - \ },\n 'parent_categorical_values': {'values':\ - \ ['dart']},\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'normalize_type',\n \ - \ 'categorical_value_spec': {'values': ['tree']},\n \ - \ },\n 'parent_categorical_values': {'values':\ - \ ['dart']},\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'rate_drop',\n \ - \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ - \ 'parent_categorical_values': {'values': ['dart']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'one_drop',\n \ - \ 'discrete_value_spec': {'values': [0]},\n },\n\ - \ 'parent_categorical_values': {'values': ['dart']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'skip_drop',\n \ - \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ - \ 'parent_categorical_values': {'values': ['dart']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'num_parallel_tree',\n \ - \ 'discrete_value_spec': {'values': [1]},\n \ - \ },\n 'parent_categorical_values': {'values': ['gblinear']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'feature_selector',\n \ - \ 'categorical_value_spec': {'values': ['cyclic']},\n \ - \ },\n 'parent_categorical_values': {'values':\ - \ ['gblinear']},\n },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'top_k',\n \ - \ 'discrete_value_spec': {'values': [0]},\n },\n \ - \ 'parent_categorical_values': {'values': ['gblinear']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'max_leaves',\n \ - \ 'discrete_value_spec': {'values': [0]},\n },\n\ - \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ - \ },\n {\n 'parameter_spec':\ - \ {\n 'parameter_id': 'max_bin',\n \ - \ 'discrete_value_spec': {'values': [256]},\n },\n\ - \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ - \ },\n ],\n },\n ]\n\n # Construct dictionaries\ - \ so that parameter specs are accessible by id.\n override_params = {}\n\ - \ override_booster_params = {}\n for param in study_spec_parameters_override:\n\ - \ # Validate a study spec before adding to the override_params dictionary.\n\ - \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param['parameter_id']]\n\ - \ validation_func(param)\n override_params[param['parameter_id']]\ - \ = param\n\n # Add any param that does not have a default parameter\ - \ spec.\n if (\n param['parameter_id'] == 'max_cat_to_onehot'\n\ - \ and param['parameter_id'] not in default_params\n ):\n \ - \ default_params.append(param)\n if (\n param['parameter_id']\ - \ == 'booster'\n and 'conditional_parameter_specs' in param\n \ - \ ):\n for booster_param in param['conditional_parameter_specs']:\n\ - \ _add_booster_param(\n override_booster_params,\n \ - \ booster_param,\n override_boosters=param['categorical_value_spec']['values'],\n\ - \ )\n\n # Validate override params according to XGBoost param dependencies.\n\ - \ tree_method = override_booster_params.get('tree_method', None)\n if\ - \ tree_method is not None:\n for booster, tree_method_spec in tree_method.items():\n\ - \ if tree_method_spec['categorical_value_spec']['values'] != ['exact']:\n\ - \ continue\n # TODO(b/277777886): exact requires non-zero max_depth\ - \ value.\n # The below code is no longer necessary after raising min_value\ - \ to 6 in\n # the default spec. In the long run, we need to decide\ - \ the best\n # approach for max_depth. Keeping the code for now in\ - \ case the approach\n # involves overriding max_depth for 'exact' tree_method.\n\ - \ max_depth_spec = {\n 'parameter_id': 'max_depth',\n \ - \ 'integer_value_spec': {'min_value': 6, 'max_value': 10},\n \ - \ 'scale_type': 'UNIT_LINEAR_SCALE',\n }\n override_booster_params['max_depth']\ - \ = override_booster_params.get(\n 'max_depth', {booster: max_depth_spec}\n\ - \ )\n override_booster_params['max_depth'][booster] = override_booster_params[\n\ - \ 'max_depth'\n ].get(booster, max_depth_spec)\n try:\n\ - \ _validate_int_spec(\n override_booster_params['max_depth'][booster],\n\ - \ lower_bound=1,\n upper_bound=None,\n )\n\ - \ except ValueError as e:\n raise ValueError(\n 'max_depth\ - \ cannot be 0 (or < 0) when tree method is fixed to be '\n '\"\ - exact\".'\n ) from e\n\n # Construct the modified study specs study_spec_parameters.\n\ - \ study_spec_parameters = []\n for default_param in default_params:\n\ - \ override_param = override_params.get(\n default_param['parameter_id'],\ - \ default_param\n )\n study_spec_parameters.append(override_param)\n\ - \ # Override conditional parameters for booster.\n if default_param['parameter_id']\ - \ == 'booster':\n booster_param_specs = []\n override_booster_vals\ - \ = override_param['categorical_value_spec']['values']\n\n for booster_param\ - \ in default_param['conditional_parameter_specs']:\n override_parent_boosters\ - \ = set(\n booster_param['parent_categorical_values']['values']\n\ - \ ).intersection(override_booster_vals)\n if not override_parent_boosters:\n\ - \ # No need to include a booster param if no relevant booster will\n\ - \ # be used.\n continue\n # Update default booster\ - \ param boosters to exclude irrelevant boosters.\n booster_param['parent_categorical_values']['values']\ - \ = list(\n override_parent_boosters\n )\n booster_param_specs.extend(\n\ - \ _get_booster_param_specs(\n override_booster_params,\n\ - \ param_id=booster_param['parameter_spec']['parameter_id'],\n\ - \ default_param_spec=booster_param,\n )\n \ - \ )\n\n for booster_param_name in _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS:\n\ - \ booster_param_specs.extend(\n _get_booster_param_specs(\n\ - \ override_booster_params,\n param_id=booster_param_name,\n\ - \ default_param_spec=None,\n )\n )\n\n\ - \ # booster_param_specs combines the overriding booster parameter\n\ - \ # specs from user input and the default booster parameter specs.\n\ - \ override_param['conditional_parameter_specs'] = booster_param_specs\n\ - \n return study_spec_parameters\n\n" - image: python:3.7 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-xgboost-hyperparameter-tuning-job: - container: - args: - - --type - - HyperparameterTuningJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"xgboost-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", - "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", - "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", - ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", - "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", - "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", - ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", - ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", - ", \"trial_job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", - "}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 -pipelineInfo: - description: The XGBoost HyperparameterTuningJob pipeline. - name: automl-tabular-xgboost-hyperparameter-tuning-job -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--disable_default_eval_metric: - componentInputParameter: disable_default_eval_metric - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eval_metric: - componentInputParameter: eval_metric - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_failed_trial_count: - componentInputParameter: max_failed_trial_count - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--max_trial_count: - componentInputParameter: max_trial_count - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--objective: - componentInputParameter: objective - pipelinechannel--parallel_trial_count: - componentInputParameter: parallel_trial_count - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--seed_per_iteration: - componentInputParameter: seed_per_iteration - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_algorithm: - componentInputParameter: study_spec_algorithm - pipelinechannel--study_spec_measurement_selection_type: - componentInputParameter: study_spec_measurement_selection_type - pipelinechannel--study_spec_metric_goal: - componentInputParameter: study_spec_metric_goal - pipelinechannel--study_spec_metric_id: - componentInputParameter: study_spec_metric_id - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--training_accelerator_count: - componentInputParameter: training_accelerator_count - pipelinechannel--training_accelerator_type: - componentInputParameter: training_accelerator_type - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--training_machine_type: - componentInputParameter: training_machine_type - pipelinechannel--training_total_replica_count: - componentInputParameter: training_total_replica_count - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The BigQuery staging full dataset id for - - storing intermediate tables.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: The BigQuery data source. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: The CSV data source. - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - disable_default_eval_metric: - defaultValue: 0.0 - description: 'Flag to disable default metric. Set to >0 to - - disable. Default to 0.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eval_metric: - defaultValue: '' - description: 'Evaluation metrics for validation data represented as a - - comma-separated string.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_failed_trial_count: - defaultValue: 0.0 - description: 'The number of failed trials that need to be seen - - before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides - - how many trials must fail before the whole job fails.' - isOptional: true - parameterType: NUMBER_INTEGER - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - max_trial_count: - description: The desired total number of trials. - parameterType: NUMBER_INTEGER - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - objective: - description: 'Specifies the learning task and the learning objective. Must - be - - one of [reg:squarederror, reg:squaredlogerror, - - reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, - - binary:logistic, multi:softprob].' - parameterType: STRING - parallel_trial_count: - description: The desired number of trials to run in parallel. - parameterType: NUMBER_INTEGER - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - seed: - defaultValue: 0.0 - description: Random seed. - isOptional: true - parameterType: NUMBER_INTEGER - seed_per_iteration: - defaultValue: false - description: Seed PRNG determnisticly via iterator number. - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - study_spec_algorithm: - defaultValue: ALGORITHM_UNSPECIFIED - description: 'The search algorithm specified for the study. One of - - ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' - isOptional: true - parameterType: STRING - study_spec_measurement_selection_type: - defaultValue: BEST_MEASUREMENT - description: ' Which measurement to use if/when the - - service automatically selects the final measurement from previously - - reported intermediate measurements. One of "BEST_MEASUREMENT" or - - "LAST_MEASUREMENT".' - isOptional: true - parameterType: STRING - study_spec_metric_goal: - description: 'Optimization goal of the metric, possible values: - - "MAXIMIZE", "MINIMIZE".' - parameterType: STRING - study_spec_metric_id: - description: 'Metric to optimize. For options, please look under - - ''eval_metrics'' at - - https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' - parameterType: STRING - study_spec_parameters_override: - description: 'List of dictionaries representing parameters - - to optimize. The dictionary key is the parameter_id, which is passed to - - training job as a command line argument, and the dictionary value is the - - parameter specification of the metric.' - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: 'List of auto transform features in the - - comma-separated string format.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - training_accelerator_count: - defaultValue: 0.0 - description: Accelerator count. - isOptional: true - parameterType: NUMBER_INTEGER - training_accelerator_type: - defaultValue: '' - description: Accelerator type. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - training_machine_type: - defaultValue: c2-standard-16 - description: Machine type. - isOptional: true - parameterType: STRING - training_total_replica_count: - defaultValue: 1.0 - description: Number of workers. - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py deleted file mode 100644 index e03036c353..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML XGBoost Trainer component spec.""" - -from typing import Optional - -from kfp import dsl - - -@dsl.container_component -def xgboost_trainer( - project: str, - location: str, - worker_pool_specs: list, - gcp_resources: dsl.OutputPath(str), - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Trains an XGBoost model using Vertex CustomJob API. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - worker_pool_specs: The worker pool specs. - encryption_spec_key_name: The KMS key name. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "xgboost-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - '"}, "job_spec": {"worker_pool_specs": ', - worker_pool_specs, - '}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml deleted file mode 100644 index 0fc86f8c67..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +++ /dev/null @@ -1,4396 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-xgboost-trainer -# Description: The XGBoost training pipeline. -# Inputs: -# base_score: float [Default: 0.5] -# bigquery_staging_full_dataset_id: str [Default: ''] -# booster: str [Default: 'gbtree'] -# colsample_bylevel: float [Default: 1.0] -# colsample_bynode: float [Default: 1.0] -# colsample_bytree: float [Default: 1.0] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# dataset_level_custom_transformation_definitions: list -# dataset_level_transformations: list -# disable_default_eval_metric: int [Default: 0.0] -# early_stopping_rounds: int [Default: -1.0] -# encryption_spec_key_name: str [Default: ''] -# eta: float [Default: 0.3] -# eval_metric: str [Default: ''] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# feature_selection_algorithm: str [Default: 'AMI'] -# feature_selector: str [Default: 'cyclic'] -# gamma: float [Default: 0.0] -# grow_policy: str [Default: 'depthwise'] -# huber_slope: float [Default: 1.0] -# interaction_constraints: str [Default: ''] -# location: str -# max_bin: int [Default: 256.0] -# max_cat_to_onehot: int [Default: -1.0] -# max_delta_step: float [Default: 0.0] -# max_depth: int [Default: 6.0] -# max_leaves: int [Default: 0.0] -# max_selected_features: int [Default: -1.0] -# min_child_weight: float [Default: 1.0] -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# monotone_constraints: str [Default: ''] -# normalize_type: str [Default: 'tree'] -# num_boost_round: int [Default: 10.0] -# num_parallel_tree: int [Default: 1.0] -# objective: str -# one_drop: int [Default: 0.0] -# predefined_split_key: str [Default: ''] -# process_type: str [Default: 'default'] -# project: str -# rate_drop: float [Default: 0.0] -# refresh_leaf: int [Default: 1.0] -# reg_alpha: float [Default: 0.0] -# reg_lambda: float [Default: 1.0] -# root_dir: str -# run_evaluation: bool [Default: True] -# run_feature_selection: bool [Default: False] -# sample_type: str [Default: 'uniform'] -# sampling_method: str [Default: 'uniform'] -# scale_pos_weight: float [Default: 1.0] -# seed: int [Default: 0.0] -# seed_per_iteration: bool [Default: False] -# skip_drop: float [Default: 0.0] -# stratified_split_key: str [Default: ''] -# subsample: float [Default: 1.0] -# target_column: str -# test_fraction: float [Default: -1.0] -# tf_auto_transform_features: dict -# tf_custom_transformation_definitions: list -# tf_transformations_path: str [Default: ''] -# top_k: int [Default: 0.0] -# training_accelerator_count: int [Default: 0.0] -# training_accelerator_type: str [Default: ''] -# training_fraction: float [Default: -1.0] -# training_machine_type: str [Default: 'c2-standard-16'] -# training_total_replica_count: int [Default: 1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# tree_method: str [Default: 'auto'] -# tweedie_variance_power: float [Default: 1.5] -# updater: str [Default: ''] -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# Outputs: -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model - parameters: - bigquery_source_input_uri: - componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - inputDefinitions: - artifacts: - pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - parameterType: STRING - pipelinechannel--get-prediction-type-for-xgboost-Output: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - dependentTasks: - - xgboost-trainer - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bool-identity - - feature-transform-engine - - generate-xgboost-trainer-worker-pool-specs - - get-prediction-type-for-xgboost - inputs: - artifacts: - pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: generate-xgboost-trainer-worker-pool-specs - parameters: - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--get-prediction-type-for-xgboost-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - dependentTasks: - - get-prediction-type-for-xgboost - inputs: - parameters: - bigquery_staging_full_dataset_id: - componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - location: - componentInputParameter: pipelinechannel--location - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - model_type: - runtimeValue: - constant: boosted_trees - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: feature-transform-engine - generate-xgboost-trainer-worker-pool-specs: - cachingOptions: - enableCache: true - componentRef: - name: comp-generate-xgboost-trainer-worker-pool-specs - dependentTasks: - - feature-transform-engine - - split-materialized-data - - training-configurator-and-validator - inputs: - artifacts: - instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: training-configurator-and-validator - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - training_schema_uri: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: feature-transform-engine - parameters: - accelerator_count: - componentInputParameter: pipelinechannel--training_accelerator_count - accelerator_type: - componentInputParameter: pipelinechannel--training_accelerator_type - base_score: - componentInputParameter: pipelinechannel--base_score - booster: - componentInputParameter: pipelinechannel--booster - colsample_bylevel: - componentInputParameter: pipelinechannel--colsample_bylevel - colsample_bynode: - componentInputParameter: pipelinechannel--colsample_bynode - colsample_bytree: - componentInputParameter: pipelinechannel--colsample_bytree - disable_default_eval_metric: - componentInputParameter: pipelinechannel--disable_default_eval_metric - early_stopping_rounds: - componentInputParameter: pipelinechannel--early_stopping_rounds - eta: - componentInputParameter: pipelinechannel--eta - eval_metric: - componentInputParameter: pipelinechannel--eval_metric - feature_selector: - componentInputParameter: pipelinechannel--feature_selector - gamma: - componentInputParameter: pipelinechannel--gamma - grow_policy: - componentInputParameter: pipelinechannel--grow_policy - huber_slope: - componentInputParameter: pipelinechannel--huber_slope - interaction_constraints: - componentInputParameter: pipelinechannel--interaction_constraints - machine_type: - componentInputParameter: pipelinechannel--training_machine_type - max_bin: - componentInputParameter: pipelinechannel--max_bin - max_cat_to_onehot: - componentInputParameter: pipelinechannel--max_cat_to_onehot - max_delta_step: - componentInputParameter: pipelinechannel--max_delta_step - max_depth: - componentInputParameter: pipelinechannel--max_depth - max_leaves: - componentInputParameter: pipelinechannel--max_leaves - min_child_weight: - componentInputParameter: pipelinechannel--min_child_weight - monotone_constraints: - componentInputParameter: pipelinechannel--monotone_constraints - normalize_type: - componentInputParameter: pipelinechannel--normalize_type - num_boost_round: - componentInputParameter: pipelinechannel--num_boost_round - num_parallel_tree: - componentInputParameter: pipelinechannel--num_parallel_tree - objective: - componentInputParameter: pipelinechannel--objective - one_drop: - componentInputParameter: pipelinechannel--one_drop - process_type: - componentInputParameter: pipelinechannel--process_type - rate_drop: - componentInputParameter: pipelinechannel--rate_drop - refresh_leaf: - componentInputParameter: pipelinechannel--refresh_leaf - reg_alpha: - componentInputParameter: pipelinechannel--reg_alpha - reg_lambda: - componentInputParameter: pipelinechannel--reg_lambda - sample_type: - componentInputParameter: pipelinechannel--sample_type - sampling_method: - componentInputParameter: pipelinechannel--sampling_method - scale_pos_weight: - componentInputParameter: pipelinechannel--scale_pos_weight - seed: - componentInputParameter: pipelinechannel--seed - seed_per_iteration: - componentInputParameter: pipelinechannel--seed_per_iteration - skip_drop: - componentInputParameter: pipelinechannel--skip_drop - subsample: - componentInputParameter: pipelinechannel--subsample - target_column: - componentInputParameter: pipelinechannel--target_column - top_k: - componentInputParameter: pipelinechannel--top_k - total_replica_count: - componentInputParameter: pipelinechannel--training_total_replica_count - tree_method: - componentInputParameter: pipelinechannel--tree_method - tweedie_variance_power: - componentInputParameter: pipelinechannel--tweedie_variance_power - updater: - componentInputParameter: pipelinechannel--updater - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: generate-xgboost-trainer-worker-pool-specs - get-prediction-type-for-xgboost: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-prediction-type-for-xgboost - inputs: - parameters: - objective: - componentInputParameter: pipelinechannel--objective - taskInfo: - name: get-prediction-type-for-xgboost - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - generate-xgboost-trainer-worker-pool-specs - - xgboost-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: generate-xgboost-trainer-worker-pool-specs - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - split-materialized-data: - cachingOptions: - enableCache: true - componentRef: - name: comp-split-materialized-data - dependentTasks: - - feature-transform-engine - inputs: - artifacts: - materialized_data: - taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine - taskInfo: - name: split-materialized-data - training-configurator-and-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-training-configurator-and-validator - dependentTasks: - - feature-transform-engine - - get-prediction-type-for-xgboost - inputs: - artifacts: - dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - parameters: - prediction_type: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-prediction-type-for-xgboost - run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - target_column: - componentInputParameter: pipelinechannel--target_column - weight_column: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: training-configurator-and-validator - xgboost-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-xgboost-trainer - dependentTasks: - - generate-xgboost-trainer-worker-pool-specs - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - worker_pool_specs: - taskOutputParameter: - outputParameterKey: worker_pool_specs - producerTask: generate-xgboost-trainer-worker-pool-specs - taskInfo: - name: xgboost-trainer - inputDefinitions: - parameters: - pipelinechannel--base_score: - parameterType: NUMBER_DOUBLE - pipelinechannel--bigquery_staging_full_dataset_id: - parameterType: STRING - pipelinechannel--booster: - parameterType: STRING - pipelinechannel--colsample_bylevel: - parameterType: NUMBER_DOUBLE - pipelinechannel--colsample_bynode: - parameterType: NUMBER_DOUBLE - pipelinechannel--colsample_bytree: - parameterType: NUMBER_DOUBLE - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST - pipelinechannel--disable_default_eval_metric: - parameterType: NUMBER_INTEGER - pipelinechannel--early_stopping_rounds: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--eta: - parameterType: NUMBER_DOUBLE - pipelinechannel--eval_metric: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--feature_selection_algorithm: - parameterType: STRING - pipelinechannel--feature_selector: - parameterType: STRING - pipelinechannel--gamma: - parameterType: NUMBER_DOUBLE - pipelinechannel--grow_policy: - parameterType: STRING - pipelinechannel--huber_slope: - parameterType: NUMBER_DOUBLE - pipelinechannel--interaction_constraints: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_bin: - parameterType: NUMBER_INTEGER - pipelinechannel--max_cat_to_onehot: - parameterType: NUMBER_INTEGER - pipelinechannel--max_delta_step: - parameterType: NUMBER_DOUBLE - pipelinechannel--max_depth: - parameterType: NUMBER_INTEGER - pipelinechannel--max_leaves: - parameterType: NUMBER_INTEGER - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--min_child_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--monotone_constraints: - parameterType: STRING - pipelinechannel--normalize_type: - parameterType: STRING - pipelinechannel--num_boost_round: - parameterType: NUMBER_INTEGER - pipelinechannel--num_parallel_tree: - parameterType: NUMBER_INTEGER - pipelinechannel--objective: - parameterType: STRING - pipelinechannel--one_drop: - parameterType: NUMBER_INTEGER - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--process_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--rate_drop: - parameterType: NUMBER_DOUBLE - pipelinechannel--refresh_leaf: - parameterType: NUMBER_INTEGER - pipelinechannel--reg_alpha: - parameterType: NUMBER_DOUBLE - pipelinechannel--reg_lambda: - parameterType: NUMBER_DOUBLE - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--sample_type: - parameterType: STRING - pipelinechannel--sampling_method: - parameterType: STRING - pipelinechannel--scale_pos_weight: - parameterType: NUMBER_DOUBLE - pipelinechannel--seed: - parameterType: NUMBER_INTEGER - pipelinechannel--seed_per_iteration: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--skip_drop: - parameterType: NUMBER_DOUBLE - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--subsample: - parameterType: NUMBER_DOUBLE - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--top_k: - parameterType: NUMBER_INTEGER - pipelinechannel--training_accelerator_count: - parameterType: NUMBER_INTEGER - pipelinechannel--training_accelerator_type: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_machine_type: - parameterType: STRING - pipelinechannel--training_total_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--tree_method: - parameterType: STRING - pipelinechannel--tweedie_variance_power: - parameterType: NUMBER_DOUBLE - pipelinechannel--updater: - parameterType: STRING - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-generate-xgboost-trainer-worker-pool-specs: - executorLabel: exec-generate-xgboost-trainer-worker-pool-specs - inputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path to JSON file for baseline values. - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Required. The path to the materialized validation - - split.' - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Required. The path to the materialized train - - split.' - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Required. The path to the training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Required. The path to transform output. - parameters: - accelerator_count: - defaultValue: 0.0 - description: Accelerator count. - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: Accelerator type. - isOptional: true - parameterType: STRING - base_score: - defaultValue: 0.5 - description: The initial prediction score of all instances, global bias. - isOptional: true - parameterType: NUMBER_DOUBLE - booster: - defaultValue: gbtree - description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree - and - - dart use tree based model while gblinear uses linear function.' - isOptional: true - parameterType: STRING - colsample_bylevel: - defaultValue: 1.0 - description: Subsample ratio of columns for each split, in each level. - isOptional: true - parameterType: NUMBER_DOUBLE - colsample_bynode: - defaultValue: 1.0 - description: Subsample ratio of columns for each node (split). - isOptional: true - parameterType: NUMBER_DOUBLE - colsample_bytree: - defaultValue: 1.0 - description: Subsample ratio of columns when constructing each tree. - isOptional: true - parameterType: NUMBER_DOUBLE - disable_default_eval_metric: - defaultValue: 0.0 - description: 'Flag to disable default metric. Set to >0 to - - disable. Default to 0.' - isOptional: true - parameterType: NUMBER_INTEGER - early_stopping_rounds: - defaultValue: -1.0 - description: 'Activates early stopping. Validation error needs to - - decrease at least every early_stopping_rounds round(s) to continue - - training.' - isOptional: true - parameterType: NUMBER_INTEGER - eta: - defaultValue: 0.3 - description: Learning rate. - isOptional: true - parameterType: NUMBER_DOUBLE - eval_metric: - defaultValue: '' - description: 'Evaluation metrics for validation data represented as a - - comma-separated string.' - isOptional: true - parameterType: STRING - feature_selector: - defaultValue: cyclic - description: '[linear booster only] Feature selection and ordering - - method.' - isOptional: true - parameterType: STRING - gamma: - defaultValue: 0.0 - description: 'Minimum loss reduction required to make a further partition - on a leaf - - node of the tree.' - isOptional: true - parameterType: NUMBER_DOUBLE - grow_policy: - defaultValue: depthwise - description: 'Controls a way new nodes are added to the tree. Only supported - - if tree_method is hist. Choices:["depthwise", "lossguide"]' - isOptional: true - parameterType: STRING - huber_slope: - defaultValue: 1.0 - description: 'A parameter used for Pseudo-Huber loss to define the delta - - term.' - isOptional: true - parameterType: NUMBER_DOUBLE - interaction_constraints: - defaultValue: '' - description: 'Constraints for interaction representing permitted - - interactions.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: c2-standard-16 - description: Machine type. - isOptional: true - parameterType: STRING - max_bin: - defaultValue: 256.0 - description: Maximum number of discrete bins to bucket continuous features. - isOptional: true - parameterType: NUMBER_INTEGER - max_cat_to_onehot: - defaultValue: -1.0 - description: 'A threshold for deciding whether XGBoost should use - - one-hot encoding based split for categorical data.' - isOptional: true - parameterType: NUMBER_INTEGER - max_delta_step: - defaultValue: 0.0 - description: 'Maximum delta step we allow each tree''s weight estimation - to - - be.' - isOptional: true - parameterType: NUMBER_DOUBLE - max_depth: - defaultValue: 6.0 - description: Maximum depth of a tree. - isOptional: true - parameterType: NUMBER_INTEGER - max_leaves: - defaultValue: 0.0 - description: Maximum number of nodes to be added. - isOptional: true - parameterType: NUMBER_INTEGER - min_child_weight: - defaultValue: 1.0 - description: Minimum sum of instance weight(hessian) needed in a child. - isOptional: true - parameterType: NUMBER_DOUBLE - monotone_constraints: - defaultValue: '' - description: Constraint of variable monotonicity. - isOptional: true - parameterType: STRING - normalize_type: - defaultValue: tree - description: '[dart booster only] Type of normalization algorithm, - - Choices:["tree", "forest"]' - isOptional: true - parameterType: STRING - num_boost_round: - defaultValue: 10.0 - description: Number of boosting iterations. - isOptional: true - parameterType: NUMBER_INTEGER - num_parallel_tree: - defaultValue: 1.0 - description: 'Number of parallel trees constructed during each - - iteration. This option is used to support boosted random forest.' - isOptional: true - parameterType: NUMBER_INTEGER - objective: - description: Required. Specifies the learning task and the learning objective. - parameterType: STRING - one_drop: - defaultValue: 0.0 - description: '[dart booster only] When this flag is enabled, at least one - tree - - is always dropped during the dropout (allows Binomial-plus-one or - - epsilon-dropout from the original DART paper).' - isOptional: true - parameterType: NUMBER_INTEGER - process_type: - defaultValue: default - description: 'A type of boosting process to run. Choices:["default", - - "update"]' - isOptional: true - parameterType: STRING - rate_drop: - defaultValue: 0.0 - description: '[dart booster only] Dropout rate.''' - isOptional: true - parameterType: NUMBER_DOUBLE - refresh_leaf: - defaultValue: 1.0 - description: 'Refresh updater plugin. Update tree leaf and nodes''s stats - if - - True. When it is False, only node stats are updated.' - isOptional: true - parameterType: NUMBER_INTEGER - reg_alpha: - defaultValue: 0.0 - description: L1 regularization term on weights. - isOptional: true - parameterType: NUMBER_DOUBLE - reg_lambda: - defaultValue: 1.0 - description: L2 regularization term on weights. - isOptional: true - parameterType: NUMBER_DOUBLE - sample_type: - defaultValue: uniform - description: '[dart booster only] Type of sampling algorithm. - - Choices:["uniform", "weighted"]' - isOptional: true - parameterType: STRING - sampling_method: - defaultValue: uniform - description: The method to use to sample the training instances. - isOptional: true - parameterType: STRING - scale_pos_weight: - defaultValue: 1.0 - description: Control the balance of positive and negative weights. - isOptional: true - parameterType: NUMBER_DOUBLE - seed: - defaultValue: 0.0 - description: Random seed. - isOptional: true - parameterType: NUMBER_INTEGER - seed_per_iteration: - defaultValue: false - description: Seed PRNG determnisticly via iterator number. - isOptional: true - parameterType: BOOLEAN - skip_drop: - defaultValue: 0.0 - description: '[dart booster only] Probability of skipping the dropout procedure - - during a boosting iteration.' - isOptional: true - parameterType: NUMBER_DOUBLE - subsample: - defaultValue: 1.0 - description: Subsample ratio of the training instance. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - description: Required. Target column name. - parameterType: STRING - top_k: - defaultValue: 0.0 - description: 'The number of top features to select in greedy and thrifty - feature - - selector. The value of 0 means using all the features.' - isOptional: true - parameterType: NUMBER_INTEGER - total_replica_count: - description: Number of workers. - parameterType: NUMBER_INTEGER - tree_method: - defaultValue: auto - description: 'The tree construction algorithm used in XGBoost. Choices: - - ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' - isOptional: true - parameterType: STRING - tweedie_variance_power: - defaultValue: 1.5 - description: 'Parameter that controls the variance of the Tweedie - - distribution.' - isOptional: true - parameterType: NUMBER_DOUBLE - updater: - defaultValue: '' - description: 'A comma separated string defining the sequence of tree updaters - to - - run.' - isOptional: true - parameterType: STRING - weight_column: - defaultValue: '' - description: Weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - job_dir: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - unmanaged_container_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - worker_pool_specs: - parameterType: LIST - comp-get-prediction-type-for-xgboost: - executorLabel: exec-get-prediction-type-for-xgboost - inputDefinitions: - parameters: - objective: - description: The XGBoost training objective - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-split-materialized-data: - executorLabel: exec-split-materialized-data - inputDefinitions: - artifacts: - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'Materialized dataset output by the Feature - - Transform Engine.' - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized eval split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized test split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: Path patern to materialized train split. - comp-training-configurator-and-validator: - executorLabel: exec-training-configurator-and-validator - inputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Dataset stats generated by - - feature transform engine.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Schema of input data to the tf_model at - - serving time.' - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - available_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - available at forecast time.' - isOptional: true - parameterType: LIST - context_window: - defaultValue: -1.0 - description: The length of the context window. - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is - - enabled, the model will fit a distribution that captures the uncertainty - - of a prediction. At inference time, the predictive distribution is used - - to make a point prediction that minimizes the optimization objective. - - For example, the mean of a predictive distribution is the point - - prediction that minimizes RMSE loss. If quantiles are specified, then - - the quantiles of the distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - forecast_horizon: - defaultValue: -1.0 - description: The length of the forecast horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_model_type: - defaultValue: '' - description: The model types, e.g. l2l, seq2seq, tft. - isOptional: true - parameterType: STRING - forecasting_transformations: - defaultValue: {} - description: 'Dict mapping auto and/or type-resolutions to - - feature columns. The supported types are auto, categorical, numeric, - - text, and timestamp.' - isOptional: true - parameterType: STRUCT - group_columns: - description: 'A list of time series attribute column - - names that define the time series hierarchy.' - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over both the horizon and time series in the same - - hierarchy group.' - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over time series in the same group.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of "classification", - - "regression", "time_series".' - isOptional: true - parameterType: STRING - quantiles: - defaultValue: [] - description: All quantiles that the model need to predict. - isOptional: true - parameterType: LIST - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: 'Whether we are running evaluation in the training - - pipeline.' - isOptional: true - parameterType: BOOLEAN - split_example_counts: - description: 'JSON string of data split example counts for - - train, validate, and test splits.' - parameterType: STRING - stage_1_deadline_hours: - description: 'Stage 1 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - stage_2_deadline_hours: - description: 'Stage 2 training budget in - - hours.' - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - description: 'The weight of the loss for - - predictions aggregated over the horizon for a single time series.' - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - defaultValue: '' - description: 'The column that indicates the time. Used by forecasting - - only.' - isOptional: true - parameterType: STRING - time_series_attribute_columns: - defaultValue: [] - description: 'The column names of the time series - - attributes.' - isOptional: true - parameterType: LIST - time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by - - forecasting only.' - isOptional: true - parameterType: STRING - unavailable_at_forecast_columns: - defaultValue: [] - description: 'The names of the columns that are - - not available at forecast time.' - isOptional: true - parameterType: LIST - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - comp-xgboost-trainer: - executorLabel: exec-xgboost-trainer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - worker_pool_specs: - description: The worker pool specs. - parameterType: LIST - outputDefinitions: - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the custom training - - job.' - parameterType: STRING -deploymentSpec: - executors: - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 30.0 - exec-generate-xgboost-trainer-worker-pool-specs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _generate_xgboost_trainer_worker_pool_specs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _generate_xgboost_trainer_worker_pool_specs(\n total_replica_count:\ - \ int,\n target_column: str,\n objective: str,\n materialized_train_split:\ - \ dsl.InputPath('MaterializedSplit'),\n materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n\ - \ transform_output: dsl.InputPath('TransformOutput'),\n training_schema_uri:\ - \ dsl.InputPath('DatasetSchema'),\n instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ - \ job_dir: dsl.OutputPath('JobDir'),\n unmanaged_container_model:\ - \ dsl.Output[dsl.Artifact],\n machine_type: str = 'c2-standard-16',\n\ - \ accelerator_type: str = '',\n accelerator_count: int = 0,\n weight_column:\ - \ str = '',\n eval_metric: str = '',\n num_boost_round: int = 10,\n\ - \ early_stopping_rounds: int = -1,\n base_score: float = 0.5,\n \ - \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ - \ bool = False,\n booster: str = 'gbtree',\n eta: float = 0.3,\n \ - \ gamma: float = 0.0,\n max_depth: int = 6,\n min_child_weight:\ - \ float = 1.0,\n max_delta_step: float = 0.0,\n subsample: float =\ - \ 1.0,\n colsample_bytree: float = 1.0,\n colsample_bylevel: float\ - \ = 1.0,\n colsample_bynode: float = 1.0,\n reg_lambda: float = 1.0,\n\ - \ reg_alpha: float = 0.0,\n tree_method: str = 'auto',\n scale_pos_weight:\ - \ float = 1.0,\n updater: str = '',\n refresh_leaf: int = 1,\n \ - \ process_type: str = 'default',\n grow_policy: str = 'depthwise',\n\ - \ sampling_method: str = 'uniform',\n monotone_constraints: str =\ - \ '',\n interaction_constraints: str = '',\n sample_type: str = 'uniform',\n\ - \ normalize_type: str = 'tree',\n rate_drop: float = 0.0,\n one_drop:\ - \ int = 0,\n skip_drop: float = 0.0,\n num_parallel_tree: int = 1,\n\ - \ feature_selector: str = 'cyclic',\n top_k: int = 0,\n max_cat_to_onehot:\ - \ int = -1,\n max_leaves: int = 0,\n max_bin: int = 256,\n tweedie_variance_power:\ - \ float = 1.5,\n huber_slope: float = 1.0,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('worker_pool_specs', list), # pylint:disable=g-bare-generic\n\ - \ ],\n):\n \"\"\"Generates worker pool specs for XGBoost training.\n\ - \n For single machine XGBoost training, returns one worker pool spec for\ - \ master.\n For distributed XGBoost training, returns two worker pool specs,\ - \ the first one\n for master and the second one for the remaining workers.\n\ - \n Args:\n total_replica_count: Number of workers.\n target_column:\ - \ Required. Target column name.\n objective: Required. Specifies the\ - \ learning task and the learning objective.\n materialized_train_split:\ - \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ - \ Required. The path to the materialized validation\n split.\n transform_output:\ - \ Required. The path to transform output.\n training_schema_uri: Required.\ - \ The path to the training schema.\n instance_baseline: Path to JSON\ - \ file for baseline values.\n job_dir: Job dir path.\n unmanaged_container_model:\ - \ The unmanaged model.\n machine_type: Machine type.\n accelerator_type:\ - \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ - \ Weight column name.\n eval_metric: Evaluation metrics for validation\ - \ data represented as a\n comma-separated string.\n num_boost_round:\ - \ Number of boosting iterations.\n early_stopping_rounds: Activates early\ - \ stopping. Validation error needs to\n decrease at least every early_stopping_rounds\ - \ round(s) to continue\n training.\n base_score: The initial prediction\ - \ score of all instances, global bias.\n disable_default_eval_metric:\ - \ Flag to disable default metric. Set to >0 to\n disable. Default to\ - \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ - \ via iterator number.\n booster: Which booster to use, can be gbtree,\ - \ gblinear or dart. gbtree and\n dart use tree based model while gblinear\ - \ uses linear function.\n eta: Learning rate.\n gamma: Minimum loss\ - \ reduction required to make a further partition on a leaf\n node of\ - \ the tree.\n max_depth: Maximum depth of a tree.\n min_child_weight:\ - \ Minimum sum of instance weight(hessian) needed in a child.\n max_delta_step:\ - \ Maximum delta step we allow each tree's weight estimation to\n be.\n\ - \ subsample: Subsample ratio of the training instance.\n colsample_bytree:\ - \ Subsample ratio of columns when constructing each tree.\n colsample_bylevel:\ - \ Subsample ratio of columns for each split, in each level.\n colsample_bynode:\ - \ Subsample ratio of columns for each node (split).\n reg_lambda: L2\ - \ regularization term on weights.\n reg_alpha: L1 regularization term\ - \ on weights.\n tree_method: The tree construction algorithm used in\ - \ XGBoost. Choices:\n [\"auto\", \"exact\", \"approx\", \"hist\", \"\ - gpu_exact\", \"gpu_hist\"].\n scale_pos_weight: Control the balance of\ - \ positive and negative weights.\n updater: A comma separated string\ - \ defining the sequence of tree updaters to\n run.\n refresh_leaf:\ - \ Refresh updater plugin. Update tree leaf and nodes's stats if\n True.\ - \ When it is False, only node stats are updated.\n process_type: A type\ - \ of boosting process to run. Choices:[\"default\",\n \"update\"]\n\ - \ grow_policy: Controls a way new nodes are added to the tree. Only supported\n\ - \ if tree_method is hist. Choices:[\"depthwise\", \"lossguide\"]\n\ - \ sampling_method: The method to use to sample the training instances.\n\ - \ monotone_constraints: Constraint of variable monotonicity.\n interaction_constraints:\ - \ Constraints for interaction representing permitted\n interactions.\n\ - \ sample_type: [dart booster only] Type of sampling algorithm.\n \ - \ Choices:[\"uniform\", \"weighted\"]\n normalize_type: [dart booster\ - \ only] Type of normalization algorithm,\n Choices:[\"tree\", \"forest\"\ - ]\n rate_drop: [dart booster only] Dropout rate.'\n one_drop: [dart\ - \ booster only] When this flag is enabled, at least one tree\n is always\ - \ dropped during the dropout (allows Binomial-plus-one or\n epsilon-dropout\ - \ from the original DART paper).\n skip_drop: [dart booster only] Probability\ - \ of skipping the dropout procedure\n during a boosting iteration.\n\ - \ num_parallel_tree: Number of parallel trees constructed during each\n\ - \ iteration. This option is used to support boosted random forest.\n\ - \ feature_selector: [linear booster only] Feature selection and ordering\n\ - \ method.\n top_k: The number of top features to select in greedy\ - \ and thrifty feature\n selector. The value of 0 means using all the\ - \ features.\n max_cat_to_onehot: A threshold for deciding whether XGBoost\ - \ should use\n one-hot encoding based split for categorical data.\n\ - \ max_leaves: Maximum number of nodes to be added.\n max_bin: Maximum\ - \ number of discrete bins to bucket continuous features.\n tweedie_variance_power:\ - \ Parameter that controls the variance of the Tweedie\n distribution.\n\ - \ huber_slope: A parameter used for Pseudo-Huber loss to define the delta\n\ - \ term.\n\n Raises:\n ValueError: If accelerator_count <= 0 and\ - \ accelerator_type is specified.\n\n Returns:\n Outputs containing the\ - \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ - \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ - \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ - \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325'\n\ - \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ - \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ - \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ - \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ - \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ - \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ - \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ - \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ - \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ - \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ - \ f'--eval_metric={eval_metric}',\n f'--num_boost_round={num_boost_round}',\n\ - \ f'--base_score={base_score}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ - \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ - \ f'--booster={booster}',\n f'--eta={eta}',\n\ - \ f'--gamma={gamma}',\n f'--max_depth={max_depth}',\n\ - \ f'--min_child_weight={min_child_weight}',\n \ - \ f'--max_delta_step={max_delta_step}',\n f'--subsample={subsample}',\n\ - \ f'--colsample_bytree={colsample_bytree}',\n \ - \ f'--colsample_bylevel={colsample_bylevel}',\n f'--colsample_bynode={colsample_bynode}',\n\ - \ f'--lambda={reg_lambda}',\n f'--alpha={reg_alpha}',\n\ - \ f'--tree_method={tree_method}',\n f'--scale_pos_weight={scale_pos_weight}',\n\ - \ f'--refresh_leaf={refresh_leaf}',\n f'--process_type={process_type}',\n\ - \ f'--grow_policy={grow_policy}',\n f'--sampling_method={sampling_method}',\n\ - \ f'--sample_type={sample_type}',\n f'--normalize_type={normalize_type}',\n\ - \ f'--rate_drop={rate_drop}',\n f'--one_drop={one_drop}',\n\ - \ f'--skip_drop={skip_drop}',\n f'--num_parallel_tree={num_parallel_tree}',\n\ - \ f'--feature_selector={feature_selector}',\n \ - \ f'--top_k={top_k}',\n f'--max_leaves={max_leaves}',\n \ - \ f'--max_bin={max_bin}',\n f'--tweedie_variance_power={tweedie_variance_power}',\n\ - \ f'--huber_slope={huber_slope}',\n f'--prediction_docker_uri={prediction_docker_uri}',\n\ - \ '--executor_input={{$.json_escape[1]}}',\n ],\n\ - \ },\n }\n\n # Add optional arguments if set\n if weight_column:\n\ - \ master_worker_pool_spec['container_spec']['args'].append(\n \ - \ f'--weight_column={weight_column}'\n )\n if early_stopping_rounds\ - \ >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ - \ f'--early_stopping_rounds={early_stopping_rounds}'\n )\n if\ - \ updater:\n master_worker_pool_spec['container_spec']['args'].append(\n\ - \ f'--updater={updater}'\n )\n if monotone_constraints:\n \ - \ master_worker_pool_spec['container_spec']['args'].append(\n f'--monotone_constraints={monotone_constraints}'\n\ - \ )\n if interaction_constraints:\n master_worker_pool_spec['container_spec']['args'].append(\n\ - \ f'--interaction_constraints={interaction_constraints}'\n )\n\ - \ if max_cat_to_onehot >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ - \ f'--max_cat_to_onehot={max_cat_to_onehot}'\n )\n\n # Add accelerator_type\ - \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ - \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ - \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ - \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ - \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ - \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ - \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ - \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ - \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ - \n # Build unmanaged_container_model\n model_dir = os.path.join(formatted_job_dir,\ - \ 'model')\n unmanaged_container_model.metadata['containerSpec'] = {\n\ - \ 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ - \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ - \ = {\n 'instanceSchemaUri': os.path.join(model_dir, 'instance.yaml'),\n\ - \ 'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\ - \ }\n unmanaged_container_model.uri = model_dir\n\n return collections.namedtuple('Outputs',\ - \ ['worker_pool_specs'])(\n worker_pool_specs_lst\n )\n\n" - image: python:3.7 - exec-get-prediction-type-for-xgboost: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _get_prediction_type_for_xgboost - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ - \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ - \ objective: The XGBoost training objective\n\n Returns:\n A string.\ - \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ - \ or objective.startswith('multi'):\n return 'classification'\n elif\ - \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ - \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ - \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ - \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ - \ ' multi:softprob].'\n )\n\n" - image: python:3.7 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-split-materialized-data: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _split_materialized_data - command: - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ - \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ - \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ - \ \"\"\"Splits materialized_data into materialized_data test, train, and\ - \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ - \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ - \ materialized_train_split: Path patern to materialized_train_split.\n\ - \ materialized_eval_split: Path patern to materialized_eval_split.\n\ - \ materialized_test_split: Path patern to materialized_test_split.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ - \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ - \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ - \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ - \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['avro_data_source'][\n \ - \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ - \ file_patterns = materialized_data_json['parquet_data_source'][\n \ - \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ - \ data source: {materialized_data_json}')\n\n # we map indices to file\ - \ patterns based on the ordering of insertion order\n # in our transform_data\ - \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ - \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ - \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ - \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - exec-training-configurator-and-validator: - container: - args: - - training_configurator_and_validator - - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' - - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' - - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' - - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' - - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' - - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' - - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' - - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' - - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", - "{{$.inputs.parameters[''quantiles'']}}"]}}}' - - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' - - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' - - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' - - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": - ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": - ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-xgboost-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"xgboost-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", - "}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 -pipelineInfo: - description: The XGBoost training pipeline. - name: automl-tabular-xgboost-trainer -root: - dag: - outputs: - artifacts: - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--base_score: - componentInputParameter: base_score - pipelinechannel--bigquery_staging_full_dataset_id: - componentInputParameter: bigquery_staging_full_dataset_id - pipelinechannel--booster: - componentInputParameter: booster - pipelinechannel--colsample_bylevel: - componentInputParameter: colsample_bylevel - pipelinechannel--colsample_bynode: - componentInputParameter: colsample_bynode - pipelinechannel--colsample_bytree: - componentInputParameter: colsample_bytree - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--dataset_level_custom_transformation_definitions: - componentInputParameter: dataset_level_custom_transformation_definitions - pipelinechannel--dataset_level_transformations: - componentInputParameter: dataset_level_transformations - pipelinechannel--disable_default_eval_metric: - componentInputParameter: disable_default_eval_metric - pipelinechannel--early_stopping_rounds: - componentInputParameter: early_stopping_rounds - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--eta: - componentInputParameter: eta - pipelinechannel--eval_metric: - componentInputParameter: eval_metric - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--feature_selection_algorithm: - componentInputParameter: feature_selection_algorithm - pipelinechannel--feature_selector: - componentInputParameter: feature_selector - pipelinechannel--gamma: - componentInputParameter: gamma - pipelinechannel--grow_policy: - componentInputParameter: grow_policy - pipelinechannel--huber_slope: - componentInputParameter: huber_slope - pipelinechannel--interaction_constraints: - componentInputParameter: interaction_constraints - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_bin: - componentInputParameter: max_bin - pipelinechannel--max_cat_to_onehot: - componentInputParameter: max_cat_to_onehot - pipelinechannel--max_delta_step: - componentInputParameter: max_delta_step - pipelinechannel--max_depth: - componentInputParameter: max_depth - pipelinechannel--max_leaves: - componentInputParameter: max_leaves - pipelinechannel--max_selected_features: - componentInputParameter: max_selected_features - pipelinechannel--min_child_weight: - componentInputParameter: min_child_weight - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--monotone_constraints: - componentInputParameter: monotone_constraints - pipelinechannel--normalize_type: - componentInputParameter: normalize_type - pipelinechannel--num_boost_round: - componentInputParameter: num_boost_round - pipelinechannel--num_parallel_tree: - componentInputParameter: num_parallel_tree - pipelinechannel--objective: - componentInputParameter: objective - pipelinechannel--one_drop: - componentInputParameter: one_drop - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--process_type: - componentInputParameter: process_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--rate_drop: - componentInputParameter: rate_drop - pipelinechannel--refresh_leaf: - componentInputParameter: refresh_leaf - pipelinechannel--reg_alpha: - componentInputParameter: reg_alpha - pipelinechannel--reg_lambda: - componentInputParameter: reg_lambda - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--run_feature_selection: - componentInputParameter: run_feature_selection - pipelinechannel--sample_type: - componentInputParameter: sample_type - pipelinechannel--sampling_method: - componentInputParameter: sampling_method - pipelinechannel--scale_pos_weight: - componentInputParameter: scale_pos_weight - pipelinechannel--seed: - componentInputParameter: seed - pipelinechannel--seed_per_iteration: - componentInputParameter: seed_per_iteration - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--skip_drop: - componentInputParameter: skip_drop - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--subsample: - componentInputParameter: subsample - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--tf_auto_transform_features: - componentInputParameter: tf_auto_transform_features - pipelinechannel--tf_custom_transformation_definitions: - componentInputParameter: tf_custom_transformation_definitions - pipelinechannel--tf_transformations_path: - componentInputParameter: tf_transformations_path - pipelinechannel--top_k: - componentInputParameter: top_k - pipelinechannel--training_accelerator_count: - componentInputParameter: training_accelerator_count - pipelinechannel--training_accelerator_type: - componentInputParameter: training_accelerator_type - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--training_machine_type: - componentInputParameter: training_machine_type - pipelinechannel--training_total_replica_count: - componentInputParameter: training_total_replica_count - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--tree_method: - componentInputParameter: tree_method - pipelinechannel--tweedie_variance_power: - componentInputParameter: tweedie_variance_power - pipelinechannel--updater: - componentInputParameter: updater - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact. - parameters: - base_score: - defaultValue: 0.5 - description: The initial prediction score of all instances, global bias. - isOptional: true - parameterType: NUMBER_DOUBLE - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'The BigQuery staging full dataset id for - - storing intermediate tables.' - isOptional: true - parameterType: STRING - booster: - defaultValue: gbtree - description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree - and - - dart use tree based model while gblinear uses linear function.' - isOptional: true - parameterType: STRING - colsample_bylevel: - defaultValue: 1.0 - description: Subsample ratio of columns for each split, in each level. - isOptional: true - parameterType: NUMBER_DOUBLE - colsample_bynode: - defaultValue: 1.0 - description: Subsample ratio of columns for each node (split). - isOptional: true - parameterType: NUMBER_DOUBLE - colsample_bytree: - defaultValue: 1.0 - description: Subsample ratio of columns when constructing each tree. - isOptional: true - parameterType: NUMBER_DOUBLE - data_source_bigquery_table_path: - defaultValue: '' - description: The BigQuery data source. - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: The CSV data source. - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - description: 'Dataset-level custom - - transformation definitions in string format.' - isOptional: true - parameterType: LIST - dataset_level_transformations: - description: 'Dataset-level transformation configuration in - - string format.' - isOptional: true - parameterType: LIST - disable_default_eval_metric: - defaultValue: 0.0 - description: 'Flag to disable default metric. Set to >0 to - - disable. Default to 0.' - isOptional: true - parameterType: NUMBER_INTEGER - early_stopping_rounds: - defaultValue: -1.0 - description: 'Activates early stopping. Validation error needs to - - decrease at least every early_stopping_rounds round(s) to continue - - training.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - eta: - defaultValue: 0.3 - description: Learning rate. - isOptional: true - parameterType: NUMBER_DOUBLE - eval_metric: - defaultValue: '' - description: 'Evaluation metrics for validation data represented as a - - comma-separated string.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - feature_selection_algorithm: - defaultValue: AMI - description: Feature selection algorithm. - isOptional: true - parameterType: STRING - feature_selector: - defaultValue: cyclic - description: '[linear booster only] Feature selection and ordering - - method.' - isOptional: true - parameterType: STRING - gamma: - defaultValue: 0.0 - description: 'Minimum loss reduction required to make a further partition - on a leaf - - node of the tree.' - isOptional: true - parameterType: NUMBER_DOUBLE - grow_policy: - defaultValue: depthwise - description: 'Controls a way new nodes are added to the tree. Only supported - - if tree_method is hist. Choices:["depthwise", "lossguide"]' - isOptional: true - parameterType: STRING - huber_slope: - defaultValue: 1.0 - description: 'A parameter used for Pseudo-Huber loss to define the delta - - term.' - isOptional: true - parameterType: NUMBER_DOUBLE - interaction_constraints: - defaultValue: '' - description: 'Constraints for interaction representing permitted - - interactions.' - isOptional: true - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - max_bin: - defaultValue: 256.0 - description: Maximum number of discrete bins to bucket continuous features. - isOptional: true - parameterType: NUMBER_INTEGER - max_cat_to_onehot: - defaultValue: -1.0 - description: 'A threshold for deciding whether XGBoost should use - - one-hot encoding based split for categorical data.' - isOptional: true - parameterType: NUMBER_INTEGER - max_delta_step: - defaultValue: 0.0 - description: 'Maximum delta step we allow each tree''s weight estimation to - - be.' - isOptional: true - parameterType: NUMBER_DOUBLE - max_depth: - defaultValue: 6.0 - description: Maximum depth of a tree. - isOptional: true - parameterType: NUMBER_INTEGER - max_leaves: - defaultValue: 0.0 - description: Maximum number of nodes to be added. - isOptional: true - parameterType: NUMBER_INTEGER - max_selected_features: - defaultValue: -1.0 - description: Maximum number of features to select. - isOptional: true - parameterType: NUMBER_INTEGER - min_child_weight: - defaultValue: 1.0 - description: Minimum sum of instance weight(hessian) needed in a child. - isOptional: true - parameterType: NUMBER_DOUBLE - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model. - isOptional: true - parameterType: STRING - monotone_constraints: - defaultValue: '' - description: Constraint of variable monotonicity. - isOptional: true - parameterType: STRING - normalize_type: - defaultValue: tree - description: '[dart booster only] Type of normalization algorithm, - - Choices:["tree", "forest"]' - isOptional: true - parameterType: STRING - num_boost_round: - defaultValue: 10.0 - description: Number of boosting iterations. - isOptional: true - parameterType: NUMBER_INTEGER - num_parallel_tree: - defaultValue: 1.0 - description: 'Number of parallel trees constructed during each - - iteration. This option is used to support boosted random forest.' - isOptional: true - parameterType: NUMBER_INTEGER - objective: - description: 'Specifies the learning task and the learning objective. Must - be - - one of [reg:squarederror, reg:squaredlogerror, - - reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, - - binary:logistic, multi:softprob].' - parameterType: STRING - one_drop: - defaultValue: 0.0 - description: '[dart booster only] When this flag is enabled, at least one - tree - - is always dropped during the dropout (allows Binomial-plus-one or - - epsilon-dropout from the original DART paper).' - isOptional: true - parameterType: NUMBER_INTEGER - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - process_type: - defaultValue: default - description: 'A type of boosting process to run. Choices:["default", - - "update"]' - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - rate_drop: - defaultValue: 0.0 - description: '[dart booster only] Dropout rate.''' - isOptional: true - parameterType: NUMBER_DOUBLE - refresh_leaf: - defaultValue: 1.0 - description: 'Refresh updater plugin. Update tree leaf and nodes''s stats - if - - True. When it is False, only node stats are updated.' - isOptional: true - parameterType: NUMBER_INTEGER - reg_alpha: - defaultValue: 0.0 - description: L1 regularization term on weights. - isOptional: true - parameterType: NUMBER_DOUBLE - reg_lambda: - defaultValue: 1.0 - description: L2 regularization term on weights. - isOptional: true - parameterType: NUMBER_DOUBLE - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_evaluation: - defaultValue: true - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: Whether to enable feature selection. - isOptional: true - parameterType: BOOLEAN - sample_type: - defaultValue: uniform - description: '[dart booster only] Type of sampling algorithm. - - Choices:["uniform", "weighted"]' - isOptional: true - parameterType: STRING - sampling_method: - defaultValue: uniform - description: The method to use to sample the training instances. - isOptional: true - parameterType: STRING - scale_pos_weight: - defaultValue: 1.0 - description: Control the balance of positive and negative weights. - isOptional: true - parameterType: NUMBER_DOUBLE - seed: - defaultValue: 0.0 - description: Random seed. - isOptional: true - parameterType: NUMBER_INTEGER - seed_per_iteration: - defaultValue: false - description: Seed PRNG determnisticly via iterator number. - isOptional: true - parameterType: BOOLEAN - skip_drop: - defaultValue: 0.0 - description: '[dart booster only] Probability of skipping the dropout procedure - - during a boosting iteration.' - isOptional: true - parameterType: NUMBER_DOUBLE - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - subsample: - defaultValue: 1.0 - description: Subsample ratio of the training instance. - isOptional: true - parameterType: NUMBER_DOUBLE - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: Test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - description: 'List of auto transform features in the - - comma-separated string format.' - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - description: 'TF custom transformation definitions - - in string format.' - isOptional: true - parameterType: LIST - tf_transformations_path: - defaultValue: '' - description: Path to TF transformation configuration. - isOptional: true - parameterType: STRING - top_k: - defaultValue: 0.0 - description: 'The number of top features to select in greedy and thrifty feature - - selector. The value of 0 means using all the features.' - isOptional: true - parameterType: NUMBER_INTEGER - training_accelerator_count: - defaultValue: 0.0 - description: Accelerator count. - isOptional: true - parameterType: NUMBER_INTEGER - training_accelerator_type: - defaultValue: '' - description: Accelerator type. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - training_machine_type: - defaultValue: c2-standard-16 - description: Machine type. - isOptional: true - parameterType: STRING - training_total_replica_count: - defaultValue: 1.0 - description: Number of workers. - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - tree_method: - defaultValue: auto - description: 'The tree construction algorithm used in XGBoost. Choices: - - ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' - isOptional: true - parameterType: STRING - tweedie_variance_power: - defaultValue: 1.5 - description: 'Parameter that controls the variance of the Tweedie - - distribution.' - isOptional: true - parameterType: NUMBER_DOUBLE - updater: - defaultValue: '' - description: 'A comma separated string defining the sequence of tree updaters - to - - run.' - isOptional: true - parameterType: STRING - validation_fraction: - defaultValue: -1.0 - description: Validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py deleted file mode 100644 index 6dbcd85caf..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""GA AutoML forecasting components.""" - -from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp - -__all__ = [ - 'ProphetTrainerOp', -] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml deleted file mode 100644 index 14c7dd13b2..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +++ /dev/null @@ -1,1159 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-bqml-arima-prediction -# Description: Forecasts using a BQML ARIMA_PLUS model. -# Inputs: -# bigquery_destination_uri: str [Default: ''] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# encryption_spec_key_name: str [Default: ''] -# generate_explanation: bool [Default: False] -# location: str -# model_name: str -# project: str -components: - comp-bigquery-create-dataset: - executorLabel: exec-bigquery-create-dataset - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-create-dataset-2: - executorLabel: exec-bigquery-create-dataset-2 - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-delete-dataset-with-prefix: - executorLabel: exec-bigquery-delete-dataset-with-prefix - inputDefinitions: - parameters: - dataset_prefix: - parameterType: STRING - delete_contents: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - project: - parameterType: STRING - comp-bigquery-query-job: - executorLabel: exec-bigquery-query-job - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-build-job-configuration-query: - executorLabel: exec-build-job-configuration-query - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-exit-handler-1: - dag: - tasks: - bigquery-create-dataset: - cachingOptions: {} - componentRef: - name: comp-bigquery-create-dataset - dependentTasks: - - get-table-location - - validate-inputs - inputs: - parameters: - dataset: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-tmp-dataset - bigquery-create-dataset-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-create-dataset-2 - dependentTasks: - - get-table-location - - maybe-replace-with-default - - validate-inputs - inputs: - parameters: - dataset: - taskOutputParameter: - outputParameterKey: Output - producerTask: maybe-replace-with-default - exists_ok: - runtimeValue: - constant: 1.0 - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-prediction-dataset - bigquery-query-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job - dependentTasks: - - build-job-configuration-query - - get-first-valid - - get-model-metadata - - get-table-location - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--get-first-valid-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-first-valid - pipelinechannel--get-model-metadata-forecast_horizon: - taskOutputParameter: - outputParameterKey: forecast_horizon - producerTask: get-model-metadata - pipelinechannel--get-model-metadata-target_column: - taskOutputParameter: - outputParameterKey: target_column - producerTask: get-model-metadata - pipelinechannel--get-model-metadata-time_column: - taskOutputParameter: - outputParameterKey: time_column - producerTask: get-model-metadata - pipelinechannel--get-model-metadata-time_series_identifier_column: - taskOutputParameter: - outputParameterKey: time_series_identifier_column - producerTask: get-model-metadata - pipelinechannel--model_name: - componentInputParameter: pipelinechannel--model_name - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n SELECT\n target.*,\n STRUCT(prediction.time_series_adjusted_data\ - \ AS value)\n AS predicted_{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}},\n\ - \ prediction.* EXCEPT (\n {{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}},\n\ - \ time_series_timestamp,\n time_series_adjusted_data\n\ - \ ),\n FROM\n ML.EXPLAIN_FORECAST(\n \ - \ MODEL `{{$.inputs.parameters['pipelinechannel--model_name']}}`,\n\ - \ STRUCT({{$.inputs.parameters['pipelinechannel--get-model-metadata-forecast_horizon']}}\ - \ AS horizon)) AS prediction\n RIGHT JOIN `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\ - \ AS target\n ON\n CAST(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ - \ AS STRING)\n = CAST(prediction.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ - \ AS STRING)\n AND TIMESTAMP(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_column']}})\ - \ = prediction.time_series_timestamp\n WHERE target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}}\ - \ IS NULL\n " - taskInfo: - name: predictions-table - build-job-configuration-query: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query - dependentTasks: - - bigquery-create-dataset-2 - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset-2 - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' - table_id: - runtimeValue: - constant: predictions_{{$.pipeline_job_uuid}} - taskInfo: - name: build-job-configuration-query - get-first-valid: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-first-valid - dependentTasks: - - load-table-from-uri - inputs: - parameters: - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - pipelinechannel--load-table-from-uri-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: load-table-from-uri - values: - runtimeValue: - constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", - "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' - taskInfo: - name: get-first-valid - get-model-metadata: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-model-metadata - dependentTasks: - - get-table-location - - validate-inputs - inputs: - parameters: - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - model: - componentInputParameter: pipelinechannel--model_name - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-model-metadata - get-table-location: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-table-location - inputs: - parameters: - default_location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - table: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - taskInfo: - name: get-table-location - load-table-from-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-load-table-from-uri - dependentTasks: - - bigquery-create-dataset - - get-table-location - inputs: - parameters: - destination: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - project: - componentInputParameter: pipelinechannel--project - source_format: - runtimeValue: - constant: CSV - source_uris: - componentInputParameter: pipelinechannel--data_source_csv_filenames - taskInfo: - name: load-table-from-uri - maybe-replace-with-default: - cachingOptions: - enableCache: true - componentRef: - name: comp-maybe-replace-with-default - inputs: - parameters: - default: - runtimeValue: - constant: prediction_{{$.pipeline_job_uuid}} - value: - componentInputParameter: pipelinechannel--bigquery_destination_uri - taskInfo: - name: maybe-replace-with-default - validate-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-validate-inputs - inputs: - parameters: - bigquery_destination_uri: - componentInputParameter: pipelinechannel--bigquery_destination_uri - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - source_model_uri: - componentInputParameter: pipelinechannel--model_name - taskInfo: - name: validate-inputs - inputDefinitions: - parameters: - pipelinechannel--bigquery_destination_uri: - parameterType: STRING - pipelinechannel--data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--data_source_csv_filenames: - parameterType: STRING - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - comp-get-first-valid: - executorLabel: exec-get-first-valid - inputDefinitions: - parameters: - values: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-model-metadata: - executorLabel: exec-get-model-metadata - inputDefinitions: - parameters: - location: - parameterType: STRING - model: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - forecast_horizon: - parameterType: NUMBER_INTEGER - target_column: - parameterType: STRING - time_column: - parameterType: STRING - time_series_identifier_column: - parameterType: STRING - comp-get-table-location: - executorLabel: exec-get-table-location - inputDefinitions: - parameters: - default_location: - defaultValue: '' - description: Location to return if no table was given. - isOptional: true - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - table: - description: The BigQuery table to get a location for. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-load-table-from-uri: - executorLabel: exec-load-table-from-uri - inputDefinitions: - parameters: - destination: - description: Table into which data is to be loaded. - parameterType: STRING - location: - description: The GCP region. - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - source_format: - defaultValue: CSV - description: 'The file format for the files being imported. Only CSV is - - supported.' - isOptional: true - parameterType: STRING - source_uris: - description: 'URIs of data files to be loaded; in format - - gs:///.' - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-maybe-replace-with-default: - executorLabel: exec-maybe-replace-with-default - inputDefinitions: - parameters: - default: - defaultValue: '' - isOptional: true - parameterType: STRING - value: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-validate-inputs: - executorLabel: exec-validate-inputs - inputDefinitions: - parameters: - bigquery_destination_uri: - isOptional: true - parameterType: STRING - data_granularity_unit: - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - isOptional: true - parameterType: STRING - data_source_csv_filenames: - isOptional: true - parameterType: STRING - optimization_objective: - isOptional: true - parameterType: STRING - predefined_split_key: - isOptional: true - parameterType: STRING - source_model_uri: - isOptional: true - parameterType: STRING - target_column: - isOptional: true - parameterType: STRING - test_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - isOptional: true - parameterType: STRING - time_series_identifier_column: - isOptional: true - parameterType: STRING - timestamp_split_key: - isOptional: true - parameterType: STRING - training_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - isOptional: true - parameterType: STRING - window_max_count: - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - isOptional: true - parameterType: NUMBER_INTEGER -deploymentSpec: - executors: - exec-bigquery-create-dataset: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-create-dataset-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-delete-dataset-with-prefix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_delete_dataset_with_prefix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ - \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ - \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ - \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ - \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ - \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ - \n" - image: python:3.7-slim - exec-bigquery-query-job: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-build-job-configuration-query: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-get-first-valid: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_first_valid - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ - \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n for value in json.loads(values):\n if value:\n return value\n\ - \ raise ValueError('No valid values.')\n\n" - image: python:3.7-slim - exec-get-model-metadata: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_model_metadata - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_model_metadata(\n project: str,\n location: str,\n\ - \ model: str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('time_column',\ - \ str),\n ('time_series_identifier_column', str),\n ('target_column',\ - \ str),\n ('forecast_horizon', int),\n ],\n):\n \"\"\"Retrieves\ - \ training options for a BQML model.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n options\ - \ = client.get_model(model).training_runs[0].training_options\n return\ - \ collections.namedtuple(\n 'Outputs', [\n 'time_column',\n\ - \ 'time_series_identifier_column',\n 'target_column',\n\ - \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ - \ options.time_series_id_column,\n options.time_series_data_column,\n\ - \ options.horizon,\n )\n\n" - image: python:3.7-slim - exec-get-table-location: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_table_location - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ - \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ - \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ - \ table: The BigQuery table to get a location for.\n default_location:\ - \ Location to return if no table was given.\n\n Returns:\n A GCP region\ - \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ - \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ - \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ - \ return client.get_table(table).location\n\n" - image: python:3.7-slim - exec-load-table-from-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - load_table_from_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ - \ source_uris: str,\n destination: str,\n source_format: str =\ - \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ - \ project: The GCP project.\n location: The GCP region.\n source_uris:\ - \ URIs of data files to be loaded; in format\n gs:///.\n\ - \ destination: Table into which data is to be loaded.\n source_format:\ - \ The file format for the files being imported. Only CSV is\n supported.\n\ - \n Returns:\n The destination table containing imported data.\n \"\"\ - \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ - \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ - \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ - \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ - \ destination=destination,\n project=project,\n location=location,\n\ - \ job_config=job_config).result()\n return destination\n\n" - image: python:3.7-slim - exec-maybe-replace-with-default: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - maybe_replace_with_default - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ - \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ - \n return default if not value else value\n\n" - image: python:3.7-slim - exec-validate-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - validate_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ - \ time_series_identifier_column: Optional[str] = None,\n target_column:\ - \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ - \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ - \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ - \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ - \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ - \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ - \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ - \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ - \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ - \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ - \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ - \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ - \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ - \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ - \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ - \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ - \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ - \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ - \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ - \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ - \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ - \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ - \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ - \n # Validate data source.\n data_source_count = sum([bool(source) for\ - \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ - \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ - \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ - \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ - \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ - \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ - \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ - \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ - \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ - \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ - \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ - \ = [None if fraction == -1 else fraction\n for fraction\ - \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ - \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ - \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ - \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ - \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ - \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ - \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ - \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ - \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ - \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ - \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ - \ and not all(fraction_splits):\n raise ValueError('All fractions must\ - \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ - \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ - \ == -1:\n window_max_count = None\n window_configs = [window_column,\ - \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ - \ for config in window_configs])\n if window_config_count > 1:\n raise\ - \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ - \ if window_column and not column_pattern.fullmatch(window_column):\n \ - \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ - \ and (window_stride_length < 1 or\n window_stride_length\ - \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ - \ '\n f'{window_stride_length}.')\n if window_max_count\ - \ and (window_max_count < 1000 or\n window_max_count\ - \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ - \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ - \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ - \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ - \ not in valid_optimization_objectives:\n raise ValueError(\n \ - \ 'Optimization objective should be one of the following: '\n \ - \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ - \n # Validate data granularity unit.\n valid_data_granularity_units =\ - \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ - \ if data_granularity_unit not in valid_data_granularity_units:\n \ - \ raise ValueError(\n 'Granularity unit should be one of the\ - \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ - \n" - image: python:3.7-slim -pipelineInfo: - description: Forecasts using a BQML ARIMA_PLUS model. - name: automl-tabular-bqml-arima-prediction -root: - dag: - tasks: - bigquery-delete-dataset-with-prefix: - cachingOptions: {} - componentRef: - name: comp-bigquery-delete-dataset-with-prefix - dependentTasks: - - exit-handler-1 - inputs: - parameters: - dataset_prefix: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - delete_contents: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: project - taskInfo: - name: delete-tmp-dataset - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--bigquery_destination_uri: - componentInputParameter: bigquery_destination_uri - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - pipelinechannel--data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_name: - componentInputParameter: model_name - pipelinechannel--project: - componentInputParameter: project - taskInfo: - name: exit-handler-1 - inputDefinitions: - parameters: - bigquery_destination_uri: - defaultValue: '' - description: 'URI of the desired destination dataset. If not - - specified, a resource will be created under a new dataset in the project.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - generate_explanation: - defaultValue: false - description: 'Generate explanation along with the batch prediction - - results. This will cause the batch prediction output to include - - explanations.' - isOptional: true - parameterType: BOOLEAN - location: - description: The GCP region for Vertex AI. - parameterType: STRING - model_name: - description: ARIMA_PLUS BQML model URI. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml deleted file mode 100644 index 1d23bd2993..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +++ /dev/null @@ -1,5085 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular-bqml-arima-train -# Description: Trains a BQML ARIMA_PLUS model. -# Inputs: -# bigquery_destination_uri: str [Default: ''] -# data_granularity_unit: str -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# encryption_spec_key_name: str [Default: ''] -# forecast_horizon: int -# location: str -# max_order: int [Default: 5.0] -# override_destination: bool [Default: False] -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: True] -# target_column: str -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# training_fraction: float [Default: -1.0] -# validation_fraction: float [Default: -1.0] -# window_column: str [Default: ''] -# window_max_count: int [Default: -1.0] -# window_stride_length: int [Default: -1.0] -# Outputs: -# create-metrics-artifact-evaluation_metrics: system.Metrics -components: - comp-bigquery-create-dataset: - executorLabel: exec-bigquery-create-dataset - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-create-dataset-2: - executorLabel: exec-bigquery-create-dataset-2 - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-create-model-job: - executorLabel: exec-bigquery-create-model-job - inputDefinitions: - parameters: - job_configuration_query: - defaultValue: {} - description: 'A json formatted string describing the rest of the job configuration. - - For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: "The labels associated with this job. You can\nuse these to\ - \ organize and group your jobs. Label keys and values can\nbe no longer\ - \ than 63 characters, can only containlowercase letters,\nnumeric characters,\ - \ underscores and dashes. International characters\nare allowed. Label\ - \ values are optional. Label keys must start with a\nletter and each label\ - \ in the list must have a different key.\n Example: { \"name\": \"wrench\"\ - , \"mass\": \"1.3kg\", \"count\": \"3\" }." - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location of the job to create the BigQuery model. If not set, - default to - - `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run BigQuery model creation job. - parameterType: STRING - query: - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'Query parameters for standard SQL queries. - - If query_parameters are both specified in here and in - - job_configuration_query, the value in here will override the other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.BQMLModel - schemaVersion: 0.0.1 - description: Describes the model which is created. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-delete-dataset-with-prefix: - executorLabel: exec-bigquery-delete-dataset-with-prefix - inputDefinitions: - parameters: - dataset_prefix: - parameterType: STRING - delete_contents: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - project: - parameterType: STRING - comp-bigquery-list-rows: - executorLabel: exec-bigquery-list-rows - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: A google.BQTable artifact. - parameters: - location: - description: The GCP region. - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-bigquery-list-rows-2: - executorLabel: exec-bigquery-list-rows-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: A google.BQTable artifact. - parameters: - location: - description: The GCP region. - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-bigquery-query-job: - executorLabel: exec-bigquery-query-job - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-query-job-2: - executorLabel: exec-bigquery-query-job-2 - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-query-job-3: - executorLabel: exec-bigquery-query-job-3 - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-query-job-4: - executorLabel: exec-bigquery-query-job-4 - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-query-job-5: - executorLabel: exec-bigquery-query-job-5 - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-build-job-configuration-query: - executorLabel: exec-build-job-configuration-query - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-2: - executorLabel: exec-build-job-configuration-query-2 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-3: - executorLabel: exec-build-job-configuration-query-3 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-4: - executorLabel: exec-build-job-configuration-query-4 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-5: - executorLabel: exec-build-job-configuration-query-5 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-6: - executorLabel: exec-build-job-configuration-query-6 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-serialized-query-parameters: - executorLabel: exec-build-serialized-query-parameters - inputDefinitions: - parameters: - data_granularity_unit: - description: 'The data granularity unit. Accepted values are: - - minute, hour, day, week, month, year.' - isOptional: true - parameterType: STRING - forecast_horizon: - description: 'The number of time periods into the future for which - - forecasts will be created. Future periods start after the latest timestamp - - for each time series.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon_off_by_one: - defaultValue: false - description: 'If True, subtract 1 from the forecast horizon - - in the query parameters.' - isOptional: true - parameterType: BOOLEAN - max_order: - description: 'Integer between 1 and 5 representing the size of the parameter - - search space for ARIMA_PLUS. 5 would result in the highest accuracy model, - - but also the longest training runtime.' - isOptional: true - parameterType: NUMBER_INTEGER - splits: - description: Dataset splits to be used to train the model. - isOptional: true - parameterType: LIST - window: - description: 'Dict containing information about the forecast window the - model - - should have. If no window is provided, the window will start after the - - latest period in the available data.' - isOptional: true - parameterType: STRUCT - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-build-serialized-query-parameters-2: - executorLabel: exec-build-serialized-query-parameters-2 - inputDefinitions: - parameters: - data_granularity_unit: - description: 'The data granularity unit. Accepted values are: - - minute, hour, day, week, month, year.' - isOptional: true - parameterType: STRING - forecast_horizon: - description: 'The number of time periods into the future for which - - forecasts will be created. Future periods start after the latest timestamp - - for each time series.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon_off_by_one: - defaultValue: false - description: 'If True, subtract 1 from the forecast horizon - - in the query parameters.' - isOptional: true - parameterType: BOOLEAN - max_order: - description: 'Integer between 1 and 5 representing the size of the parameter - - search space for ARIMA_PLUS. 5 would result in the highest accuracy model, - - but also the longest training runtime.' - isOptional: true - parameterType: NUMBER_INTEGER - splits: - description: Dataset splits to be used to train the model. - isOptional: true - parameterType: LIST - window: - description: 'Dict containing information about the forecast window the - model - - should have. If no window is provided, the window will start after the - - latest period in the available data.' - isOptional: true - parameterType: STRUCT - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-build-serialized-query-parameters-3: - executorLabel: exec-build-serialized-query-parameters-3 - inputDefinitions: - parameters: - data_granularity_unit: - description: 'The data granularity unit. Accepted values are: - - minute, hour, day, week, month, year.' - isOptional: true - parameterType: STRING - forecast_horizon: - description: 'The number of time periods into the future for which - - forecasts will be created. Future periods start after the latest timestamp - - for each time series.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon_off_by_one: - defaultValue: false - description: 'If True, subtract 1 from the forecast horizon - - in the query parameters.' - isOptional: true - parameterType: BOOLEAN - max_order: - description: 'Integer between 1 and 5 representing the size of the parameter - - search space for ARIMA_PLUS. 5 would result in the highest accuracy model, - - but also the longest training runtime.' - isOptional: true - parameterType: NUMBER_INTEGER - splits: - description: Dataset splits to be used to train the model. - isOptional: true - parameterType: LIST - window: - description: 'Dict containing information about the forecast window the - model - - should have. If no window is provided, the window will start after the - - latest period in the available data.' - isOptional: true - parameterType: STRUCT - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-cond: - executorLabel: exec-cond - inputDefinitions: - parameters: - false_str: - parameterType: STRING - predicate: - parameterType: BOOLEAN - true_str: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-condition-2: - dag: - outputs: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: create-metrics-artifact - tasks: - bigquery-list-rows: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-list-rows - dependentTasks: - - bigquery-query-job - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job - parameters: - location: - componentInputParameter: pipelinechannel--get-table-location-Output - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: bigquery-list-rows - bigquery-list-rows-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-list-rows-2 - dependentTasks: - - bigquery-query-job-4 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job-4 - parameters: - location: - componentInputParameter: pipelinechannel--get-table-location-Output - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: bigquery-list-rows-2 - bigquery-query-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job - dependentTasks: - - build-job-configuration-query - - build-serialized-query-parameters - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - pipelinechannel--get-fte-suffix-Output: - componentInputParameter: pipelinechannel--get-fte-suffix-Output - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n WITH\n time_series_windows AS (\n \ - \ SELECT\n FIRST_VALUE({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ OVER (horizon) AS start_time,\n COUNT(*) OVER (horizon)\ - \ AS count,\n FIRST_VALUE(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ - \ OVER (horizon) AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ - \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ - \ WHERE UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ - \ IN UNNEST(@splits)\n WINDOW horizon AS (\n \ - \ PARTITION BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}}\n\ - \ ROWS BETWEEN 0 PRECEDING AND @forecast_horizon FOLLOWING)\n\ - \ )\n SELECT\n start_time,\n TIMESTAMP(DATETIME_ADD(\n\ - \ DATETIME(start_time),\n INTERVAL @forecast_horizon\ - \ {{$.inputs.parameters['pipelinechannel--data_granularity_unit']}}\n\ - \ )) AS end_time,\n SUM(count) AS count,\n \ - \ ROW_NUMBER() OVER () AS window_number,\n FROM time_series_windows\n\ - \ WHERE window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\n\ - \ GROUP BY start_time\n " - query_parameters: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-serialized-query-parameters - taskInfo: - name: create-eval-windows-table - bigquery-query-job-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job-2 - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.metrics`\ - \ (\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ - \ TIMESTAMP,\n MAE FLOAT64,\n MSE\ - \ FLOAT64,\n MAPE FLOAT64,\n prediction_count\ - \ INT64\n )\n " - taskInfo: - name: create-tmp-metrics-table - bigquery-query-job-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job-3 - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.evaluated_examples`\ - \ (\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ - \ STRING,\n {{$.inputs.parameters['pipelinechannel--time_column']}}\ - \ TIMESTAMP,\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ - \ TIMESTAMP,\n {{$.inputs.parameters['pipelinechannel--target_column']}}\ - \ FLOAT64,\n predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\ - \ STRUCT\n )\n " - taskInfo: - name: create-evaluated-examples-table - bigquery-query-job-4: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job-4 - dependentTasks: - - build-job-configuration-query-5 - - for-loop-3 - - table-to-uri - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-5 - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--table-to-uri-uri: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n SELECT\n SUM(MAE * prediction_count) /\ - \ SUM(prediction_count) AS MAE,\n SQRT(SUM(MSE * prediction_count)\ - \ / SUM(prediction_count)) AS RMSE,\n SUM(MAPE * prediction_count)\ - \ / SUM(prediction_count) AS MAPE,\n FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}}`\n\ - \ " - taskInfo: - name: create-backtest-table - bigquery-query-job-5: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job-5 - dependentTasks: - - build-job-configuration-query-6 - - for-loop-3 - - table-to-uri-2 - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-6 - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--table-to-uri-2-uri: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: SELECT * FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}` - taskInfo: - name: export-evaluated-examples-table - build-job-configuration-query: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' - table_id: - runtimeValue: - constant: windows - taskInfo: - name: build-job-configuration-query - build-job-configuration-query-5: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-5 - dependentTasks: - - cond - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - pipelinechannel--cond-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: cond - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' - table_id: - runtimeValue: - constant: final_metrics - write_disposition: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' - taskInfo: - name: build-job-configuration-query-5 - build-job-configuration-query-6: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-6 - dependentTasks: - - cond - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--cond-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: cond - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' - table_id: - runtimeValue: - constant: evaluated_examples - write_disposition: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' - taskInfo: - name: build-job-configuration-query-6 - build-serialized-query-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-serialized-query-parameters - inputs: - parameters: - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecast_horizon_off_by_one: - runtimeValue: - constant: 1.0 - splits: - runtimeValue: - constant: - - TEST - taskInfo: - name: build-serialized-query-parameters - cond: - cachingOptions: - enableCache: true - componentRef: - name: comp-cond - inputs: - parameters: - false_str: - runtimeValue: - constant: WRITE_EMPTY - predicate: - componentInputParameter: pipelinechannel--override_destination - true_str: - runtimeValue: - constant: WRITE_TRUNCATE - taskInfo: - name: cond - create-metrics-artifact: - cachingOptions: - enableCache: true - componentRef: - name: comp-create-metrics-artifact - dependentTasks: - - bigquery-list-rows-2 - inputs: - parameters: - metrics_rows: - taskOutputParameter: - outputParameterKey: Output - producerTask: bigquery-list-rows-2 - taskInfo: - name: create-metrics-artifact - for-loop-3: - componentRef: - name: comp-for-loop-3 - dependentTasks: - - bigquery-list-rows - - table-to-uri - - table-to-uri-2 - inputs: - parameters: - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - pipelinechannel--bigquery-list-rows-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bigquery-list-rows - pipelinechannel--data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - pipelinechannel--forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - pipelinechannel--get-fte-suffix-Output: - componentInputParameter: pipelinechannel--get-fte-suffix-Output - pipelinechannel--get-table-location-Output: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--max_order: - componentInputParameter: pipelinechannel--max_order - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--table-to-uri-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: table-to-uri-2 - pipelinechannel--table-to-uri-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: table-to-uri-2 - pipelinechannel--table-to-uri-2-table_id: - taskOutputParameter: - outputParameterKey: table_id - producerTask: table-to-uri-2 - pipelinechannel--table-to-uri-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: table-to-uri - pipelinechannel--table-to-uri-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: table-to-uri - pipelinechannel--table-to-uri-table_id: - taskOutputParameter: - outputParameterKey: table_id - producerTask: table-to-uri - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - iteratorPolicy: - parallelismLimit: 50 - parameterIterator: - itemInput: pipelinechannel--bigquery-list-rows-Output-loop-item - items: - inputParameter: pipelinechannel--bigquery-list-rows-Output - taskInfo: - name: for-loop-3 - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - bigquery-query-job-2 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job-2 - taskInfo: - name: table-to-uri - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - bigquery-query-job-3 - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job-3 - taskInfo: - name: table-to-uri-2 - inputDefinitions: - parameters: - pipelinechannel--bigquery-create-dataset-2-dataset_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-2-project_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-dataset_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-project_id: - parameterType: STRING - pipelinechannel--data_granularity_unit: - parameterType: STRING - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--get-fte-suffix-Output: - parameterType: STRING - pipelinechannel--get-table-location-Output: - parameterType: STRING - pipelinechannel--max_order: - parameterType: NUMBER_INTEGER - pipelinechannel--override_destination: - parameterType: BOOLEAN - pipelinechannel--project: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_identifier_column: - parameterType: STRING - outputDefinitions: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-create-metrics-artifact: - executorLabel: exec-create-metrics-artifact - inputDefinitions: - parameters: - metrics_rows: - parameterType: LIST - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: create-metrics-artifact-evaluation_metrics - producerSubtask: condition-2 - tasks: - bigquery-create-dataset: - cachingOptions: {} - componentRef: - name: comp-bigquery-create-dataset - dependentTasks: - - get-table-location - - validate-inputs - inputs: - parameters: - dataset: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-tmp-dataset - bigquery-create-dataset-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-create-dataset-2 - dependentTasks: - - get-table-location - - maybe-replace-with-default - - validate-inputs - inputs: - parameters: - dataset: - taskOutputParameter: - outputParameterKey: Output - producerTask: maybe-replace-with-default - exists_ok: - runtimeValue: - constant: 1.0 - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-export-dataset - bigquery-create-model-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-create-model-job - dependentTasks: - - bigquery-create-dataset-2 - - build-serialized-query-parameters-3 - - get-fte-suffix - - get-table-location - inputs: - parameters: - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--bigquery-create-dataset-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--get-fte-suffix-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-fte-suffix - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.model_{{$.pipeline_job_uuid}}`\n\ - \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ - \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ - \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ - \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ - \ horizon = @forecast_horizon,\n auto_arima\ - \ = True,\n auto_arima_max_order = @max_order,\n \ - \ data_frequency = @data_granularity_unit,\n holiday_region\ - \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ - \ adjust_step_changes = True,\n decompose_time_series\ - \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ - \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ - \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ < @start_time\n " - query_parameters: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-serialized-query-parameters-3 - taskInfo: - name: create-serving-model - build-serialized-query-parameters-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-serialized-query-parameters-3 - inputs: - parameters: - data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - max_order: - componentInputParameter: pipelinechannel--max_order - splits: - runtimeValue: - constant: - - TRAIN - - VALIDATE - - TEST - taskInfo: - name: build-serialized-query-parameters-3 - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - bigquery-create-dataset - - bigquery-create-dataset-2 - - get-fte-suffix - - get-table-location - inputs: - parameters: - pipelinechannel--bigquery-create-dataset-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - pipelinechannel--data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - pipelinechannel--get-fte-suffix-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-fte-suffix - pipelinechannel--get-table-location-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--max_order: - componentInputParameter: pipelinechannel--max_order - pipelinechannel--override_destination: - componentInputParameter: pipelinechannel--override_destination - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - dependentTasks: - - bigquery-create-dataset-2 - inputs: - parameters: - autodetect_csv_schema: - runtimeValue: - constant: 1.0 - bigquery_staging_full_dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - forecasting_apply_windowing: - runtimeValue: - constant: 0.0 - forecasting_context_window: - runtimeValue: - constant: 0.0 - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--bigquery-create-dataset-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset-2 - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - runtimeValue: - constant: {} - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - taskInfo: - name: feature-transform-engine - get-fte-suffix: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-fte-suffix - dependentTasks: - - bigquery-create-dataset-2 - - feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' - fte_table: - runtimeValue: - constant: fte_time_series_output - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--bigquery-create-dataset-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset-2 - pipelinechannel--bigquery-create-dataset-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset-2 - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-fte-suffix - get-table-location: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-table-location - inputs: - parameters: - default_location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - table: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - taskInfo: - name: get-table-location - maybe-replace-with-default: - cachingOptions: - enableCache: true - componentRef: - name: comp-maybe-replace-with-default - inputs: - parameters: - default: - runtimeValue: - constant: export_{{$.pipeline_job_uuid}} - value: - componentInputParameter: pipelinechannel--bigquery_destination_uri - taskInfo: - name: maybe-replace-with-default - validate-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-validate-inputs - inputs: - parameters: - bigquery_destination_uri: - componentInputParameter: pipelinechannel--bigquery_destination_uri - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - window_column: - componentInputParameter: pipelinechannel--window_column - window_max_count: - componentInputParameter: pipelinechannel--window_max_count - window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - taskInfo: - name: validate-inputs - inputDefinitions: - parameters: - pipelinechannel--bigquery_destination_uri: - parameterType: STRING - pipelinechannel--data_granularity_unit: - parameterType: STRING - pipelinechannel--data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--data_source_csv_filenames: - parameterType: STRING - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_order: - parameterType: NUMBER_INTEGER - pipelinechannel--override_destination: - parameterType: BOOLEAN - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--window_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-for-loop-3: - dag: - tasks: - build-job-configuration-query-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-2 - dependentTasks: - - get-window-query-priority - inputs: - parameters: - pipelinechannel--get-window-query-priority-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-window-query-priority - priority: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' - taskInfo: - name: build-job-configuration-query-2 - build-job-configuration-query-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-3 - dependentTasks: - - get-window-query-priority - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-dataset_id'']}}' - pipelinechannel--get-window-query-priority-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-window-query-priority - pipelinechannel--table-to-uri-dataset_id: - componentInputParameter: pipelinechannel--table-to-uri-dataset_id - pipelinechannel--table-to-uri-project_id: - componentInputParameter: pipelinechannel--table-to-uri-project_id - pipelinechannel--table-to-uri-table_id: - componentInputParameter: pipelinechannel--table-to-uri-table_id - priority: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-project_id'']}}' - table_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-table_id'']}}' - write_disposition: - runtimeValue: - constant: WRITE_APPEND - taskInfo: - name: build-job-configuration-query-3 - build-job-configuration-query-4: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-4 - dependentTasks: - - get-window-query-priority - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' - pipelinechannel--get-window-query-priority-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-window-query-priority - pipelinechannel--table-to-uri-2-dataset_id: - componentInputParameter: pipelinechannel--table-to-uri-2-dataset_id - pipelinechannel--table-to-uri-2-project_id: - componentInputParameter: pipelinechannel--table-to-uri-2-project_id - pipelinechannel--table-to-uri-2-table_id: - componentInputParameter: pipelinechannel--table-to-uri-2-table_id - priority: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' - table_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' - write_disposition: - runtimeValue: - constant: WRITE_APPEND - taskInfo: - name: build-job-configuration-query-4 - build-serialized-query-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-serialized-query-parameters-2 - inputs: - parameters: - data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - max_order: - componentInputParameter: pipelinechannel--max_order - splits: - runtimeValue: - constant: - - TRAIN - - VALIDATE - - TEST - window: - componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item - taskInfo: - name: build-serialized-query-parameters-2 - get-value: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-value - inputs: - parameters: - d: - componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item - key: - runtimeValue: - constant: window_number - taskInfo: - name: get_window_number - get-window-query-priority: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-window-query-priority - inputs: - parameters: - max_interactive: - runtimeValue: - constant: 50.0 - window: - componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item - taskInfo: - name: get-window-query-priority - query-with-retry: - cachingOptions: - enableCache: true - componentRef: - name: comp-query-with-retry - dependentTasks: - - build-job-configuration-query-2 - - build-serialized-query-parameters-2 - - get-value - inputs: - parameters: - destination_uri: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.model_{{$.inputs.parameters[''pipelinechannel--get-value-Output'']}}' - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-2 - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--bigquery-create-dataset-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id - pipelinechannel--bigquery-create-dataset-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id - pipelinechannel--get-fte-suffix-Output: - componentInputParameter: pipelinechannel--get-fte-suffix-Output - pipelinechannel--get-value-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-value - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.model_{{$.inputs.parameters['pipelinechannel--get-value-Output']}}`\n\ - \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ - \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ - \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ - \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ - \ horizon = @forecast_horizon,\n auto_arima\ - \ = True,\n auto_arima_max_order = @max_order,\n \ - \ data_frequency = @data_granularity_unit,\n holiday_region\ - \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ - \ adjust_step_changes = True,\n decompose_time_series\ - \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ - \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ - \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ < @start_time\n " - query_parameters: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-serialized-query-parameters-2 - taskInfo: - name: create-eval-model - query-with-retry-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-query-with-retry-2 - dependentTasks: - - build-job-configuration-query-3 - - build-serialized-query-parameters-2 - - query-with-retry - inputs: - parameters: - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-3 - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - pipelinechannel--get-fte-suffix-Output: - componentInputParameter: pipelinechannel--get-fte-suffix-Output - pipelinechannel--query-with-retry-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: query-with-retry - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n SELECT\n @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ AVG(mean_absolute_error) AS MAE,\n AVG(mean_squared_error)\ - \ AS MSE,\n AVG(mean_absolute_percentage_error) AS MAPE,\n\ - \ @prediction_count AS prediction_count,\n FROM ML.EVALUATE(\n\ - \ MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ - \ TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`,\n\ - \ STRUCT(True AS perform_aggregation, {{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ - \ as horizon))\n " - query_parameters: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-serialized-query-parameters-2 - taskInfo: - name: append-evaluation-metrics - query-with-retry-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-query-with-retry-3 - dependentTasks: - - build-job-configuration-query-4 - - build-serialized-query-parameters-2 - - query-with-retry - inputs: - parameters: - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-4 - location: - componentInputParameter: pipelinechannel--get-table-location-Output - pipelinechannel--bigquery-create-dataset-2-dataset_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id - pipelinechannel--bigquery-create-dataset-2-project_id: - componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id - pipelinechannel--forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - pipelinechannel--get-fte-suffix-Output: - componentInputParameter: pipelinechannel--get-fte-suffix-Output - pipelinechannel--query-with-retry-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: query-with-retry - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n SELECT\n CAST(actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ - \ AS STRING)\n AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ CAST(actual.{{$.inputs.parameters['pipelinechannel--target_column']}}\ - \ AS FLOAT64) AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ STRUCT(pred.forecast_value AS value) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ FROM\n ML.FORECAST(\n MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ - \ STRUCT({{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ - \ AS horizon)) pred\n JOIN `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\ - \ actual\n ON\n pred.forecast_timestamp = TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\n\ - \ AND pred.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ - \ = actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ - \ " - query_parameters: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-serialized-query-parameters-2 - taskInfo: - name: append-evaluated-examples - inputDefinitions: - parameters: - pipelinechannel--bigquery-create-dataset-2-dataset_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-2-project_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-dataset_id: - parameterType: STRING - pipelinechannel--bigquery-create-dataset-project_id: - parameterType: STRING - pipelinechannel--bigquery-list-rows-Output: - parameterType: LIST - pipelinechannel--bigquery-list-rows-Output-loop-item: - parameterType: STRUCT - pipelinechannel--data_granularity_unit: - parameterType: STRING - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--get-fte-suffix-Output: - parameterType: STRING - pipelinechannel--get-table-location-Output: - parameterType: STRING - pipelinechannel--max_order: - parameterType: NUMBER_INTEGER - pipelinechannel--project: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--table-to-uri-2-dataset_id: - parameterType: STRING - pipelinechannel--table-to-uri-2-project_id: - parameterType: STRING - pipelinechannel--table-to-uri-2-table_id: - parameterType: STRING - pipelinechannel--table-to-uri-dataset_id: - parameterType: STRING - pipelinechannel--table-to-uri-project_id: - parameterType: STRING - pipelinechannel--table-to-uri-table_id: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_identifier_column: - parameterType: STRING - comp-get-fte-suffix: - executorLabel: exec-get-fte-suffix - inputDefinitions: - parameters: - bigquery_staging_full_dataset_id: - parameterType: STRING - fte_table: - parameterType: STRING - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-table-location: - executorLabel: exec-get-table-location - inputDefinitions: - parameters: - default_location: - defaultValue: '' - description: Location to return if no table was given. - isOptional: true - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - table: - description: The BigQuery table to get a location for. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-value: - executorLabel: exec-get-value - inputDefinitions: - parameters: - d: - parameterType: STRUCT - key: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-window-query-priority: - executorLabel: exec-get-window-query-priority - inputDefinitions: - parameters: - max_interactive: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - window: - parameterType: STRUCT - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-maybe-replace-with-default: - executorLabel: exec-maybe-replace-with-default - inputDefinitions: - parameters: - default: - defaultValue: '' - isOptional: true - parameterType: STRING - value: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-query-with-retry: - executorLabel: exec-query-with-retry - inputDefinitions: - parameters: - destination_uri: - defaultValue: '' - description: Optional BigQuery URI to output if the query succeeds. - isOptional: true - parameterType: STRING - job_configuration_query: - description: Additional query job configurations. - isOptional: true - parameterType: STRUCT - location: - description: The GCP region. - parameterType: STRING - max_retry_count: - defaultValue: 5.0 - description: Maximum number of times to retry the query. - isOptional: true - parameterType: NUMBER_INTEGER - project: - description: The GCP project. - parameterType: STRING - query: - description: The query to run. - parameterType: STRING - query_parameters: - description: A list of query parameters. - isOptional: true - parameterType: LIST - retry_wait_seconds: - defaultValue: 10.0 - description: 'Approximate initial number of seconds to wait before - - making another query attempt with exponential backoff.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-query-with-retry-2: - executorLabel: exec-query-with-retry-2 - inputDefinitions: - parameters: - destination_uri: - defaultValue: '' - description: Optional BigQuery URI to output if the query succeeds. - isOptional: true - parameterType: STRING - job_configuration_query: - description: Additional query job configurations. - isOptional: true - parameterType: STRUCT - location: - description: The GCP region. - parameterType: STRING - max_retry_count: - defaultValue: 5.0 - description: Maximum number of times to retry the query. - isOptional: true - parameterType: NUMBER_INTEGER - project: - description: The GCP project. - parameterType: STRING - query: - description: The query to run. - parameterType: STRING - query_parameters: - description: A list of query parameters. - isOptional: true - parameterType: LIST - retry_wait_seconds: - defaultValue: 10.0 - description: 'Approximate initial number of seconds to wait before - - making another query attempt with exponential backoff.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-query-with-retry-3: - executorLabel: exec-query-with-retry-3 - inputDefinitions: - parameters: - destination_uri: - defaultValue: '' - description: Optional BigQuery URI to output if the query succeeds. - isOptional: true - parameterType: STRING - job_configuration_query: - description: Additional query job configurations. - isOptional: true - parameterType: STRUCT - location: - description: The GCP region. - parameterType: STRING - max_retry_count: - defaultValue: 5.0 - description: Maximum number of times to retry the query. - isOptional: true - parameterType: NUMBER_INTEGER - project: - description: The GCP project. - parameterType: STRING - query: - description: The query to run. - parameterType: STRING - query_parameters: - description: A list of query parameters. - isOptional: true - parameterType: LIST - retry_wait_seconds: - defaultValue: 10.0 - description: 'Approximate initial number of seconds to wait before - - making another query attempt with exponential backoff.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-validate-inputs: - executorLabel: exec-validate-inputs - inputDefinitions: - parameters: - bigquery_destination_uri: - isOptional: true - parameterType: STRING - data_granularity_unit: - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - isOptional: true - parameterType: STRING - data_source_csv_filenames: - isOptional: true - parameterType: STRING - optimization_objective: - isOptional: true - parameterType: STRING - predefined_split_key: - isOptional: true - parameterType: STRING - source_model_uri: - isOptional: true - parameterType: STRING - target_column: - isOptional: true - parameterType: STRING - test_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - isOptional: true - parameterType: STRING - time_series_identifier_column: - isOptional: true - parameterType: STRING - timestamp_split_key: - isOptional: true - parameterType: STRING - training_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - isOptional: true - parameterType: STRING - window_max_count: - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - isOptional: true - parameterType: NUMBER_INTEGER -deploymentSpec: - executors: - exec-bigquery-create-dataset: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-create-dataset-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-create-model-job: - container: - args: - - --type - - BigqueryCreateModelJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.create_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-delete-dataset-with-prefix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_delete_dataset_with_prefix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ - \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ - \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ - \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ - \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ - \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ - \n" - image: python:3.7-slim - exec-bigquery-list-rows: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_list_rows - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ - \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ - \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ - \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ - \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ - \ Rows are keyed by column, and\n all values are stored as strings.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n metadata\ - \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ - \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ - \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ - \ return result\n\n" - image: python:3.7-slim - exec-bigquery-list-rows-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_list_rows - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ - \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ - \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ - \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ - \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ - \ Rows are keyed by column, and\n all values are stored as strings.\n\ - \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n metadata\ - \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ - \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ - \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ - \ return result\n\n" - image: python:3.7-slim - exec-bigquery-query-job: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-query-job-2: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-query-job-3: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-query-job-4: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-query-job-5: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-build-job-configuration-query: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-4: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-5: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-6: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-serialized-query-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_serialized_query_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ - \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ - \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ - \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ - ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ - \ JSON objects for BQML queries.\n\n All query parameters will be stored\ - \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ - \n Args:\n forecast_horizon: The number of time periods into the future\ - \ for which\n forecasts will be created. Future periods start after\ - \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ - \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ - \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ - \ minute, hour, day, week, month, year.\n splits: Dataset splits\ - \ to be used to train the model.\n window: Dict containing information\ - \ about the forecast window the model\n should have. If no window is\ - \ provided, the window will start after the\n latest period in the\ - \ available data.\n max_order: Integer between 1 and 5 representing the\ - \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ - \ in the highest accuracy model,\n but also the longest training runtime.\n\ - \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ - \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ - \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ - \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ - \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ - \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ - \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ - \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ - \ 'parameterType': {\n 'type': 'STRING'\n },\n\ - \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ - \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ - \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ - \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ - \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ - \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ - \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': str(forecast_horizon)\n },\n })\n if splits\ - \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ - \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ - \ {\n 'type': 'STRING'\n },\n },\n \ - \ 'parameterValue': {\n 'arrayValues': [{\n \ - \ 'value': split\n } for split in splits],\n },\n \ - \ })\n\n if window is not None:\n query_parameters.append({\n \ - \ 'name': 'prediction_count',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ - \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ - \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ - \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ - \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim - exec-build-serialized-query-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_serialized_query_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ - \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ - \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ - \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ - ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ - \ JSON objects for BQML queries.\n\n All query parameters will be stored\ - \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ - \n Args:\n forecast_horizon: The number of time periods into the future\ - \ for which\n forecasts will be created. Future periods start after\ - \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ - \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ - \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ - \ minute, hour, day, week, month, year.\n splits: Dataset splits\ - \ to be used to train the model.\n window: Dict containing information\ - \ about the forecast window the model\n should have. If no window is\ - \ provided, the window will start after the\n latest period in the\ - \ available data.\n max_order: Integer between 1 and 5 representing the\ - \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ - \ in the highest accuracy model,\n but also the longest training runtime.\n\ - \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ - \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ - \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ - \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ - \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ - \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ - \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ - \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ - \ 'parameterType': {\n 'type': 'STRING'\n },\n\ - \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ - \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ - \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ - \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ - \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ - \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ - \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': str(forecast_horizon)\n },\n })\n if splits\ - \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ - \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ - \ {\n 'type': 'STRING'\n },\n },\n \ - \ 'parameterValue': {\n 'arrayValues': [{\n \ - \ 'value': split\n } for split in splits],\n },\n \ - \ })\n\n if window is not None:\n query_parameters.append({\n \ - \ 'name': 'prediction_count',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ - \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ - \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ - \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ - \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim - exec-build-serialized-query-parameters-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_serialized_query_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ - \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ - \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ - \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ - ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ - \ JSON objects for BQML queries.\n\n All query parameters will be stored\ - \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ - \n Args:\n forecast_horizon: The number of time periods into the future\ - \ for which\n forecasts will be created. Future periods start after\ - \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ - \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ - \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ - \ minute, hour, day, week, month, year.\n splits: Dataset splits\ - \ to be used to train the model.\n window: Dict containing information\ - \ about the forecast window the model\n should have. If no window is\ - \ provided, the window will start after the\n latest period in the\ - \ available data.\n max_order: Integer between 1 and 5 representing the\ - \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ - \ in the highest accuracy model,\n but also the longest training runtime.\n\ - \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ - \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ - \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ - \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ - \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ - \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ - \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ - \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ - \ 'parameterType': {\n 'type': 'STRING'\n },\n\ - \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ - \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ - \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ - \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ - \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ - \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ - \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': str(forecast_horizon)\n },\n })\n if splits\ - \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ - \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ - \ {\n 'type': 'STRING'\n },\n },\n \ - \ 'parameterValue': {\n 'arrayValues': [{\n \ - \ 'value': split\n } for split in splits],\n },\n \ - \ })\n\n if window is not None:\n query_parameters.append({\n \ - \ 'name': 'prediction_count',\n 'parameterType': {\n \ - \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ - \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ - \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ - \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ - \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ - \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim - exec-cond: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - cond - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ - \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ - \ return true_str if predicate else false_str\n\n" - image: python:3.7-slim - exec-create-metrics-artifact: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - create_metrics_artifact - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef create_metrics_artifact(\n metrics_rows: List[Dict[str, str]],\n\ - \ evaluation_metrics: dsl.Output[dsl.Metrics],\n) -> None:\n \"\"\"\ - Converts the rows of a metrics table into an Artifact.\"\"\"\n # Use the\ - \ Vertex Eval component's Metrics metadata naming from\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/metadata/schema/google/artifact_schema.py?cl=467006447&l=344\n\ - \ metric_name_map = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE':\ - \ 'rootMeanSquaredError',\n 'MAPE': 'meanAbsolutePercentageError',\n\ - \ }\n metrics = {metric_name_map[k]: v for k, v in dict(metrics_rows[0]).items()}\n\ - \ evaluation_metrics.metadata = metrics\n\n" - image: python:3.7-slim - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-get-fte-suffix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_fte_suffix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ - \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ - \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n for\ - \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ - \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ - \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ - \n" - image: python:3.7-slim - exec-get-table-location: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_table_location - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ - \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ - \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ - \ table: The BigQuery table to get a location for.\n default_location:\ - \ Location to return if no table was given.\n\n Returns:\n A GCP region\ - \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ - \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ - \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ - \ return client.get_table(table).location\n\n" - image: python:3.7-slim - exec-get-value: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_value - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ - \n" - image: python:3.7-slim - exec-get-window-query-priority: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_window_query_priority - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_window_query_priority(\n window: Dict[str, str],\n \ - \ max_interactive: int = 100,\n) -> str:\n \"\"\"Returns a query priority\ - \ depending on the window number.\"\"\"\n if int(window['window_number'])\ - \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ - \n" - image: python:3.7-slim - exec-maybe-replace-with-default: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - maybe_replace_with_default - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ - \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ - \n return default if not value else value\n\n" - image: python:3.7-slim - exec-query-with-retry: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - query_with_retry - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ - \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ - \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ - \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ - \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ - \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ - \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ - \ GCP region.\n query: The query to run.\n query_parameters: A list\ - \ of query parameters.\n job_configuration_query: Additional query job\ - \ configurations.\n max_retry_count: Maximum number of times to retry\ - \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ - \ to wait before\n making another query attempt with exponential backoff.\n\ - \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ - \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import logging\n import random\n import time\n\n from google.api_core\ - \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n query_parameters = query_parameters or []\n job_configuration_query\ - \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ - \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ - \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ - \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ - \ client.query(query, job_config=job_config).result()\n break\n\ - \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ - \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ - \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ - \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ - \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ - \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ - \n" - image: python:3.7-slim - exec-query-with-retry-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - query_with_retry - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ - \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ - \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ - \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ - \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ - \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ - \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ - \ GCP region.\n query: The query to run.\n query_parameters: A list\ - \ of query parameters.\n job_configuration_query: Additional query job\ - \ configurations.\n max_retry_count: Maximum number of times to retry\ - \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ - \ to wait before\n making another query attempt with exponential backoff.\n\ - \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ - \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import logging\n import random\n import time\n\n from google.api_core\ - \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n query_parameters = query_parameters or []\n job_configuration_query\ - \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ - \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ - \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ - \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ - \ client.query(query, job_config=job_config).result()\n break\n\ - \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ - \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ - \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ - \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ - \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ - \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ - \n" - image: python:3.7-slim - exec-query-with-retry-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - query_with_retry - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ - \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ - \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ - \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ - \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ - \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ - \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ - \ GCP region.\n query: The query to run.\n query_parameters: A list\ - \ of query parameters.\n job_configuration_query: Additional query job\ - \ configurations.\n max_retry_count: Maximum number of times to retry\ - \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ - \ to wait before\n making another query attempt with exponential backoff.\n\ - \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ - \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import logging\n import random\n import time\n\n from google.api_core\ - \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n query_parameters = query_parameters or []\n job_configuration_query\ - \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ - \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ - \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ - \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ - \ client.query(query, job_config=job_config).result()\n break\n\ - \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ - \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ - \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ - \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ - \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ - \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ - \n" - image: python:3.7-slim - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-validate-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - validate_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ - \ time_series_identifier_column: Optional[str] = None,\n target_column:\ - \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ - \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ - \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ - \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ - \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ - \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ - \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ - \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ - \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ - \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ - \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ - \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ - \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ - \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ - \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ - \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ - \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ - \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ - \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ - \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ - \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ - \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ - \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ - \n # Validate data source.\n data_source_count = sum([bool(source) for\ - \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ - \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ - \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ - \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ - \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ - \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ - \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ - \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ - \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ - \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ - \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ - \ = [None if fraction == -1 else fraction\n for fraction\ - \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ - \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ - \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ - \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ - \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ - \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ - \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ - \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ - \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ - \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ - \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ - \ and not all(fraction_splits):\n raise ValueError('All fractions must\ - \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ - \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ - \ == -1:\n window_max_count = None\n window_configs = [window_column,\ - \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ - \ for config in window_configs])\n if window_config_count > 1:\n raise\ - \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ - \ if window_column and not column_pattern.fullmatch(window_column):\n \ - \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ - \ and (window_stride_length < 1 or\n window_stride_length\ - \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ - \ '\n f'{window_stride_length}.')\n if window_max_count\ - \ and (window_max_count < 1000 or\n window_max_count\ - \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ - \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ - \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ - \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ - \ not in valid_optimization_objectives:\n raise ValueError(\n \ - \ 'Optimization objective should be one of the following: '\n \ - \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ - \n # Validate data granularity unit.\n valid_data_granularity_units =\ - \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ - \ if data_granularity_unit not in valid_data_granularity_units:\n \ - \ raise ValueError(\n 'Granularity unit should be one of the\ - \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ - \n" - image: python:3.7-slim -pipelineInfo: - description: Trains a BQML ARIMA_PLUS model. - name: automl-tabular-bqml-arima-train -root: - dag: - outputs: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: create-metrics-artifact-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - bigquery-delete-dataset-with-prefix: - cachingOptions: {} - componentRef: - name: comp-bigquery-delete-dataset-with-prefix - dependentTasks: - - exit-handler-1 - inputs: - parameters: - dataset_prefix: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - delete_contents: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: project - taskInfo: - name: delete-tmp-dataset - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--bigquery_destination_uri: - componentInputParameter: bigquery_destination_uri - pipelinechannel--data_granularity_unit: - componentInputParameter: data_granularity_unit - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - pipelinechannel--data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_order: - componentInputParameter: max_order - pipelinechannel--override_destination: - componentInputParameter: override_destination - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--window_column: - componentInputParameter: window_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - inputDefinitions: - parameters: - bigquery_destination_uri: - defaultValue: '' - description: 'URI of the desired destination dataset. If not - - specified, resources will be created under a new dataset in the project. - - Unlike in Vertex Forecasting, all resources will be given hardcoded names - - under this dataset, and the model artifact will also be exported here.' - isOptional: true - parameterType: STRING - data_granularity_unit: - description: 'The data granularity unit. Accepted values are: - - minute, hour, day, week, month, year.' - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - forecast_horizon: - description: 'The number of time periods into the future for which - - forecasts will be created. Future periods start after the latest timestamp - - for each time series.' - parameterType: NUMBER_INTEGER - location: - description: The GCP region for Vertex AI. - parameterType: STRING - max_order: - defaultValue: 5.0 - description: 'Integer between 1 and 5 representing the size of the parameter - - search space for ARIMA_PLUS. 5 would result in the highest accuracy model, - - but also the longest training runtime.' - isOptional: true - parameterType: NUMBER_INTEGER - override_destination: - defaultValue: false - description: 'Whether to overwrite the metrics and evaluated - - examples tables if they already exist. If this is False and the tables - - exist, this pipeline will fail.' - isOptional: true - parameterType: BOOLEAN - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_evaluation: - defaultValue: true - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - target_column: - description: Name of the column that the model is to predict values for. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: float = The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: 'Name of the column that identifies time order in the time - - series.' - parameterType: STRING - time_series_identifier_column: - description: 'Name of the column that identifies the time - - series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - defaultValue: '' - description: 'Name of the column that should be used to filter input rows. - - The column should contain either booleans or string booleans; if the value - - of the row is True, generate a sliding window from that row.' - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: -1.0 - description: 'Number of rows that should be used to generate input - - examples. If the total row count is larger than this number, the input - - data will be randomly sampled to hit the count.' - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - defaultValue: -1.0 - description: 'Step length used to generate input examples. Every - - window_stride_length rows will be used to generate a sliding window.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - create-metrics-artifact-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml deleted file mode 100644 index 6cdb273900..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +++ /dev/null @@ -1,2150 +0,0 @@ -# PIPELINE DEFINITION -# Name: prophet-predict -# Description: Creates a batch prediction using a Prophet model. -# Inputs: -# bigquery_destination_uri: str [Default: ''] -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# encryption_spec_key_name: str [Default: ''] -# location: str -# machine_type: str [Default: 'n1-standard-2'] -# max_num_workers: int [Default: 10.0] -# model_name: str -# project: str -# target_column: str -# time_column: str -# time_series_identifier_column: str -components: - comp-bigquery-create-dataset: - executorLabel: exec-bigquery-create-dataset - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-delete-dataset-with-prefix: - executorLabel: exec-bigquery-delete-dataset-with-prefix - inputDefinitions: - parameters: - dataset_prefix: - parameterType: STRING - delete_contents: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - project: - parameterType: STRING - comp-bigquery-query-job: - executorLabel: exec-bigquery-query-job - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bigquery-query-job-2: - executorLabel: exec-bigquery-query-job-2 - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-build-job-configuration-query: - executorLabel: exec-build-job-configuration-query - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-build-job-configuration-query-2: - executorLabel: exec-build-job-configuration-query-2 - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-exit-handler-1: - dag: - tasks: - bigquery-create-dataset: - cachingOptions: {} - componentRef: - name: comp-bigquery-create-dataset - dependentTasks: - - get-table-location - - validate-inputs - inputs: - parameters: - dataset: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-tmp-dataset - bigquery-query-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job - dependentTasks: - - build-job-configuration-query - - get-first-valid - - get-table-location - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--get-first-valid-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-first-valid - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n WITH\n base_data AS (\n SELECT\ - \ * FROM `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\n\ - \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ - \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ \n \n \n FROM base_data\n GROUP\ - \ BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ - \ " - taskInfo: - name: remove-feature-columns - bigquery-query-job-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job-2 - dependentTasks: - - build-job-configuration-query-2 - - get-table-location-2 - - table-to-uri-2 - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query-2 - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location-2 - pipelinechannel--table-to-uri-2-uri: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n WITH\n predictions AS (\n SELECT\n\ - \ {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ JSON_QUERY_ARRAY(prediction, '$.{{$.inputs.parameters['pipelinechannel--time_column']}}')\ - \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ JSON_EXTRACT(\n prediction,\n \ - \ '$.predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}'\n\ - \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ JSON_QUERY_ARRAY(\n prediction,\n \ - \ '$.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}'\n\ - \ ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}`\n\ - \ )\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ - \"',\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\n\ - \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ - \"',\n {{$.inputs.parameters['pipelinechannel--time_column']}}[SAFE_OFFSET(index)]\n\ - \ ) AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ STRUCT(\n CAST(predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}[SAFE_OFFSET(index)]\ - \ AS FLOAT64)\n AS value\n ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\n\ - \ FROM predictions\n CROSS JOIN\n UNNEST(GENERATE_ARRAY(0,\ - \ ARRAY_LENGTH({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ - 1)) AS index\n " - taskInfo: - name: create-predictions-table - build-job-configuration-query: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query - dependentTasks: - - bigquery-create-dataset - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' - table_id: - runtimeValue: - constant: data - write_disposition: - runtimeValue: - constant: WRITE_EMPTY - taskInfo: - name: build-job-configuration-query - build-job-configuration-query-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query-2 - dependentTasks: - - table-to-uri-2 - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' - pipelinechannel--table-to-uri-2-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: table-to-uri-2 - pipelinechannel--table-to-uri-2-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: table-to-uri-2 - pipelinechannel--table-to-uri-2-table_id: - taskOutputParameter: - outputParameterKey: table_id - producerTask: table-to-uri-2 - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' - table_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' - write_disposition: - runtimeValue: - constant: WRITE_TRUNCATE - taskInfo: - name: build-job-configuration-query-2 - get-first-valid: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-first-valid - dependentTasks: - - load-table-from-uri - inputs: - parameters: - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - pipelinechannel--load-table-from-uri-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: load-table-from-uri - values: - runtimeValue: - constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", - "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' - taskInfo: - name: get-first-valid - get-table-location: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-table-location - inputs: - parameters: - default_location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - table: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - taskInfo: - name: get-table-location - get-table-location-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-table-location-2 - dependentTasks: - - table-to-uri-2 - inputs: - parameters: - project: - componentInputParameter: pipelinechannel--project - table: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri-2 - taskInfo: - name: get-table-location-2 - load-table-from-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-load-table-from-uri - dependentTasks: - - bigquery-create-dataset - - get-table-location - inputs: - parameters: - destination: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - project: - componentInputParameter: pipelinechannel--project - source_format: - runtimeValue: - constant: CSV - source_uris: - componentInputParameter: pipelinechannel--data_source_csv_filenames - taskInfo: - name: load-table-from-uri - make-vertex-model-artifact: - cachingOptions: - enableCache: true - componentRef: - name: comp-make-vertex-model-artifact - inputs: - parameters: - location: - componentInputParameter: pipelinechannel--location - model_resource_name: - componentInputParameter: pipelinechannel--model_name - taskInfo: - name: make-vertex-model-artifact - maybe-replace-with-default: - cachingOptions: - enableCache: true - componentRef: - name: comp-maybe-replace-with-default - inputs: - parameters: - default: - componentInputParameter: pipelinechannel--project - value: - componentInputParameter: pipelinechannel--bigquery_destination_uri - taskInfo: - name: maybe-replace-with-default - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - dependentTasks: - - make-vertex-model-artifact - - maybe-replace-with-default - - table-to-uri - inputs: - artifacts: - model: - taskOutputArtifact: - outputArtifactKey: vertex_model - producerTask: make-vertex-model-artifact - parameters: - bigquery_destination_output_uri: - runtimeValue: - constant: bq://{{$.inputs.parameters['pipelinechannel--maybe-replace-with-default-Output']}} - bigquery_source_input_uri: - runtimeValue: - constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} - instances_format: - runtimeValue: - constant: bigquery - job_display_name: - runtimeValue: - constant: batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--machine_type - max_replica_count: - componentInputParameter: pipelinechannel--max_num_workers - pipelinechannel--maybe-replace-with-default-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: maybe-replace-with-default - pipelinechannel--table-to-uri-uri: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - predictions_format: - runtimeValue: - constant: bigquery - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-batch-predict - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - bigquery-query-job - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job - taskInfo: - name: table-to-uri - table-to-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri-2 - dependentTasks: - - model-batch-predict - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: bigquery_output_table - producerTask: model-batch-predict - taskInfo: - name: table-to-uri-2 - validate-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-validate-inputs - inputs: - parameters: - bigquery_destination_uri: - componentInputParameter: pipelinechannel--bigquery_destination_uri - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - taskInfo: - name: validate-inputs - inputDefinitions: - parameters: - pipelinechannel--bigquery_destination_uri: - parameterType: STRING - pipelinechannel--data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--data_source_csv_filenames: - parameterType: STRING - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--machine_type: - parameterType: STRING - pipelinechannel--max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--model_name: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_identifier_column: - parameterType: STRING - comp-get-first-valid: - executorLabel: exec-get-first-valid - inputDefinitions: - parameters: - values: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-table-location: - executorLabel: exec-get-table-location - inputDefinitions: - parameters: - default_location: - defaultValue: '' - description: Location to return if no table was given. - isOptional: true - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - table: - description: The BigQuery table to get a location for. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-table-location-2: - executorLabel: exec-get-table-location-2 - inputDefinitions: - parameters: - default_location: - defaultValue: '' - description: Location to return if no table was given. - isOptional: true - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - table: - description: The BigQuery table to get a location for. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-load-table-from-uri: - executorLabel: exec-load-table-from-uri - inputDefinitions: - parameters: - destination: - description: Table into which data is to be loaded. - parameterType: STRING - location: - description: The GCP region. - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - source_format: - defaultValue: CSV - description: 'The file format for the files being imported. Only CSV is - - supported.' - isOptional: true - parameterType: STRING - source_uris: - description: 'URIs of data files to be loaded; in format - - gs:///.' - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-make-vertex-model-artifact: - executorLabel: exec-make-vertex-model-artifact - inputDefinitions: - parameters: - location: - parameterType: STRING - model_resource_name: - parameterType: STRING - outputDefinitions: - artifacts: - vertex_model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-maybe-replace-with-default: - executorLabel: exec-maybe-replace-with-default - inputDefinitions: - parameters: - default: - defaultValue: '' - isOptional: true - parameterType: STRING - value: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-table-to-uri-2: - executorLabel: exec-table-to-uri-2 - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-validate-inputs: - executorLabel: exec-validate-inputs - inputDefinitions: - parameters: - bigquery_destination_uri: - isOptional: true - parameterType: STRING - data_granularity_unit: - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - isOptional: true - parameterType: STRING - data_source_csv_filenames: - isOptional: true - parameterType: STRING - optimization_objective: - isOptional: true - parameterType: STRING - predefined_split_key: - isOptional: true - parameterType: STRING - source_model_uri: - isOptional: true - parameterType: STRING - target_column: - isOptional: true - parameterType: STRING - test_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - isOptional: true - parameterType: STRING - time_series_identifier_column: - isOptional: true - parameterType: STRING - timestamp_split_key: - isOptional: true - parameterType: STRING - training_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - isOptional: true - parameterType: STRING - window_max_count: - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - isOptional: true - parameterType: NUMBER_INTEGER -deploymentSpec: - executors: - exec-bigquery-create-dataset: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-delete-dataset-with-prefix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_delete_dataset_with_prefix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ - \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ - \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ - \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ - \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ - \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ - \n" - image: python:3.7-slim - exec-bigquery-query-job: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-bigquery-query-job-2: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-build-job-configuration-query: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-build-job-configuration-query-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-get-first-valid: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_first_valid - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ - \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n for value in json.loads(values):\n if value:\n return value\n\ - \ raise ValueError('No valid values.')\n\n" - image: python:3.7-slim - exec-get-table-location: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_table_location - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ - \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ - \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ - \ table: The BigQuery table to get a location for.\n default_location:\ - \ Location to return if no table was given.\n\n Returns:\n A GCP region\ - \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ - \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ - \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ - \ return client.get_table(table).location\n\n" - image: python:3.7-slim - exec-get-table-location-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_table_location - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ - \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ - \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ - \ table: The BigQuery table to get a location for.\n default_location:\ - \ Location to return if no table was given.\n\n Returns:\n A GCP region\ - \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ - \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ - \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ - \ return client.get_table(table).location\n\n" - image: python:3.7-slim - exec-load-table-from-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - load_table_from_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ - \ source_uris: str,\n destination: str,\n source_format: str =\ - \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ - \ project: The GCP project.\n location: The GCP region.\n source_uris:\ - \ URIs of data files to be loaded; in format\n gs:///.\n\ - \ destination: Table into which data is to be loaded.\n source_format:\ - \ The file format for the files being imported. Only CSV is\n supported.\n\ - \n Returns:\n The destination table containing imported data.\n \"\"\ - \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ - \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ - \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ - \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ - \ destination=destination,\n project=project,\n location=location,\n\ - \ job_config=job_config).result()\n return destination\n\n" - image: python:3.7-slim - exec-make-vertex-model-artifact: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - make_vertex_model_artifact - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef make_vertex_model_artifact(\n location: str,\n model_resource_name:\ - \ str,\n vertex_model: dsl.Output[dsl.Artifact],\n) -> None:\n \"\"\"\ - Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ - \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ - \ f'/v1/{model_resource_name}')\n\n" - image: python:3.7-slim - exec-maybe-replace-with-default: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - maybe_replace_with_default - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ - \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ - \n return default if not value else value\n\n" - image: python:3.7-slim - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-table-to-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-validate-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - validate_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ - \ time_series_identifier_column: Optional[str] = None,\n target_column:\ - \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ - \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ - \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ - \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ - \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ - \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ - \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ - \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ - \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ - \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ - \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ - \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ - \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ - \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ - \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ - \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ - \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ - \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ - \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ - \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ - \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ - \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ - \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ - \n # Validate data source.\n data_source_count = sum([bool(source) for\ - \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ - \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ - \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ - \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ - \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ - \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ - \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ - \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ - \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ - \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ - \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ - \ = [None if fraction == -1 else fraction\n for fraction\ - \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ - \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ - \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ - \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ - \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ - \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ - \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ - \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ - \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ - \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ - \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ - \ and not all(fraction_splits):\n raise ValueError('All fractions must\ - \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ - \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ - \ == -1:\n window_max_count = None\n window_configs = [window_column,\ - \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ - \ for config in window_configs])\n if window_config_count > 1:\n raise\ - \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ - \ if window_column and not column_pattern.fullmatch(window_column):\n \ - \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ - \ and (window_stride_length < 1 or\n window_stride_length\ - \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ - \ '\n f'{window_stride_length}.')\n if window_max_count\ - \ and (window_max_count < 1000 or\n window_max_count\ - \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ - \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ - \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ - \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ - \ not in valid_optimization_objectives:\n raise ValueError(\n \ - \ 'Optimization objective should be one of the following: '\n \ - \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ - \n # Validate data granularity unit.\n valid_data_granularity_units =\ - \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ - \ if data_granularity_unit not in valid_data_granularity_units:\n \ - \ raise ValueError(\n 'Granularity unit should be one of the\ - \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ - \n" - image: python:3.7-slim -pipelineInfo: - description: Creates a batch prediction using a Prophet model. - name: prophet-predict -root: - dag: - tasks: - bigquery-delete-dataset-with-prefix: - cachingOptions: {} - componentRef: - name: comp-bigquery-delete-dataset-with-prefix - dependentTasks: - - exit-handler-1 - inputs: - parameters: - dataset_prefix: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - delete_contents: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: project - taskInfo: - name: delete-tmp-dataset - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--bigquery_destination_uri: - componentInputParameter: bigquery_destination_uri - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - pipelinechannel--data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--machine_type: - componentInputParameter: machine_type - pipelinechannel--max_num_workers: - componentInputParameter: max_num_workers - pipelinechannel--model_name: - componentInputParameter: model_name - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - taskInfo: - name: exit-handler-1 - inputDefinitions: - parameters: - bigquery_destination_uri: - defaultValue: '' - description: 'URI of the desired destination dataset. If not - - specified, resources will be created under a new dataset in the project. - - Unlike in Vertex Forecasting, all resources will be given hardcoded names - - under this dataset, and the model artifact will also be exported here.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - location: - description: The GCP region for Vertex AI. - parameterType: STRING - machine_type: - defaultValue: n1-standard-2 - description: The machine type used for batch prediction. - isOptional: true - parameterType: STRING - max_num_workers: - defaultValue: 10.0 - description: The max number of workers used for batch prediction. - isOptional: true - parameterType: NUMBER_INTEGER - model_name: - description: 'The name of the Model resource, in a form of - - projects/{project}/locations/{location}/models/{model}.' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - target_column: - description: Name of the column that the model is to predict values for. - parameterType: STRING - time_column: - description: 'Name of the column that identifies time order in the time - - series.' - parameterType: STRING - time_series_identifier_column: - description: 'Name of the column that identifies the time - - series.' - parameterType: STRING -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py deleted file mode 100644 index 7c3bb6111b..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Prophet trainer component spec.""" - -from typing import Optional -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Output - - -# pylint: disable=g-doc-args,unused-argument -@dsl.container_component -def prophet_trainer( - project: str, - location: str, - root_dir: str, - target_column: str, - time_column: str, - time_series_identifier_column: str, - forecast_horizon: int, - window_column: str, - data_granularity_unit: str, - predefined_split_column: str, - source_bigquery_uri: str, - gcp_resources: dsl.OutputPath(str), - unmanaged_container_model: Output[UnmanagedContainerModel], - evaluated_examples_directory: Output[Artifact], - optimization_objective: Optional[str] = 'rmse', - max_num_trials: Optional[int] = 6, - encryption_spec_key_name: Optional[str] = '', - dataflow_max_num_workers: Optional[int] = 10, - dataflow_machine_type: Optional[str] = 'n1-standard-1', - dataflow_disk_size_gb: Optional[int] = 40, - dataflow_service_account: Optional[str] = '', - dataflow_subnetwork: Optional[str] = '', - dataflow_use_public_ips: Optional[bool] = True, -): - # fmt: off - """Trains and tunes one Prophet model per time series using Dataflow. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region for Vertex AI. - root_dir: The Cloud Storage location to store the output. - time_column: Name of the column that identifies time order in the - time series. - time_series_identifier_column: Name of the column that identifies - the time series. - target_column: Name of the column that the model is to predict - values for. - forecast_horizon: The number of time periods into the future for - which forecasts will be created. Future periods start after the latest - timestamp for each time series. - optimization_objective: Optimization objective for tuning. Supported - metrics come from Prophet's performance_metrics function. These are mse, - rmse, mae, mape, mdape, smape, and coverage. - data_granularity_unit: String representing the units of time for the - time column. - predefined_split_column: The predefined_split column name. A string - that represents a list of comma separated CSV filenames. - source_bigquery_uri: The BigQuery table path of format - bq (str)://bq_project.bq_dataset.bq_table - window_column: Name of the column that should be used to filter - input rows. The column should contain either booleans or string - booleans; if the value of the row is True, generate a sliding window - from that row. - max_num_trials: Maximum number of tuning trials to perform - per time series. There are up to 100 possible combinations to explore - for each time series. Recommended values to try are 3, 6, and 24. - encryption_spec_key_name: Customer-managed encryption key. - dataflow_machine_type: The dataflow machine type used for - training. - dataflow_max_num_workers: The max number of Dataflow - workers used for training. - dataflow_disk_size_gb: Dataflow worker's disk size in GB - during training. - dataflow_service_account: Custom service account to run - dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - - Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. - unmanaged_container_model: The UnmanagedContainerModel artifact. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - '{"display_name": ' - + f'"prophet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}", ', - '"encryption_spec": {"kms_key_name":"', - encryption_spec_key_name, - '"}, ', - '"job_spec": {"worker_pool_specs": [{"replica_count":"1", ', - '"machine_spec": {"machine_type": "n1-standard-4"}, ', - ( - '"container_spec":' - ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", ' - ), - '"args": ["prophet_trainer", "', - f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "', - ( - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", "' - ), - ( - '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325", "' - ), - '--artifacts_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/model/", "', - '--evaluated_examples_dir=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/eval/", "', - '--region=', - location, - '", "', - '--source_bigquery_uri=', - source_bigquery_uri, - '", "', - '--target_column=', - target_column, - '", "', - '--time_column=', - time_column, - '", "', - '--time_series_identifier_column=', - time_series_identifier_column, - '", "', - '--forecast_horizon=', - forecast_horizon, - '", "', - '--window_column=', - window_column, - '", "', - '--optimization_objective=', - optimization_objective, - '", "', - '--data_granularity_unit=', - data_granularity_unit, - '", "', - '--predefined_split_column=', - predefined_split_column, - '", "', - '--max_num_trials=', - max_num_trials, - '", "', - '--dataflow_project=', - project, - '", "', - '--dataflow_max_num_workers=', - dataflow_max_num_workers, - '", "', - '--dataflow_machine_type=', - dataflow_machine_type, - '", "', - '--dataflow_disk_size_gb=', - dataflow_disk_size_gb, - '", "', - '--dataflow_service_account=', - dataflow_service_account, - '", "', - '--dataflow_subnetwork=', - dataflow_subnetwork, - '", "', - '--dataflow_use_public_ips=', - dataflow_use_public_ips, - '", "', - '--gcp_resources_path=', - gcp_resources, - '", "', - '--executor_input={{$.json_escape[1]}}"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml deleted file mode 100644 index 2fadb6830e..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +++ /dev/null @@ -1,2958 +0,0 @@ -# PIPELINE DEFINITION -# Name: prophet-train -# Description: Trains one Prophet model per time series. -# Inputs: -# data_granularity_unit: str -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# encryption_spec_key_name: str [Default: ''] -# evaluation_dataflow_disk_size_gb: int [Default: 40.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-1'] -# evaluation_dataflow_max_num_workers: int [Default: 10.0] -# forecast_horizon: int -# location: str -# max_num_trials: int [Default: 6.0] -# optimization_objective: str -# predefined_split_key: str [Default: ''] -# project: str -# root_dir: str -# run_evaluation: bool [Default: True] -# target_column: str -# test_fraction: float [Default: -1.0] -# time_column: str -# time_series_identifier_column: str -# timestamp_split_key: str [Default: ''] -# trainer_dataflow_disk_size_gb: int [Default: 40.0] -# trainer_dataflow_machine_type: str [Default: 'n1-standard-1'] -# trainer_dataflow_max_num_workers: int [Default: 10.0] -# training_fraction: float [Default: -1.0] -# validation_fraction: float [Default: -1.0] -# window_column: str [Default: ''] -# window_max_count: int [Default: -1.0] -# window_stride_length: int [Default: -1.0] -components: - comp-bigquery-create-dataset: - executorLabel: exec-bigquery-create-dataset - inputDefinitions: - parameters: - dataset: - parameterType: STRING - exists_ok: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - comp-bigquery-delete-dataset-with-prefix: - executorLabel: exec-bigquery-delete-dataset-with-prefix - inputDefinitions: - parameters: - dataset_prefix: - parameterType: STRING - delete_contents: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - project: - parameterType: STRING - comp-bigquery-query-job: - executorLabel: exec-bigquery-query-job - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: 'Describes the Cloud - - KMS encryption key that will be used to protect destination - - BigQuery table. The BigQuery Service Account associated with your - - project requires access to this encryption key. If - - encryption_spec_key_name are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - job_configuration_query: - defaultValue: {} - description: 'A json formatted string - - describing the rest of the job configuration. For more details, see - - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels associated with this job. You can - - use these to organize and group your jobs. Label keys and values can - - be no longer than 63 characters, can only containlowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. Label values are optional. Label keys must start with a - - letter and each label in the list must have a different key. - - Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BigQuery job. If not - - set, default to `US` multi-region. For more details, see - - https://cloud.google.com/bigquery/docs/locations#specifying_your_location' - isOptional: true - parameterType: STRING - project: - description: Project to run the BigQuery query job. - parameterType: STRING - query: - defaultValue: '' - description: 'SQL query text to execute. Only standard SQL is - - supported. If query are both specified in here and in - - job_configuration_query, the value in here will override the other - - one.' - isOptional: true - parameterType: STRING - query_parameters: - defaultValue: [] - description: 'jobs.query parameters for - - standard SQL queries. If query_parameters are both specified in here - - and in job_configuration_query, the value in here will override the - - other one.' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - destination_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Describes the table where the query results should be stored. - - This property must be set for large results that exceed the maximum - - response size. - - For queries that produce anonymous (cached) results, this field will - - be populated by BigQuery.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the BigQuery job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-build-job-configuration-query: - executorLabel: exec-build-job-configuration-query - inputDefinitions: - parameters: - dataset_id: - defaultValue: '' - isOptional: true - parameterType: STRING - priority: - defaultValue: INTERACTIVE - isOptional: true - parameterType: STRING - project_id: - defaultValue: '' - isOptional: true - parameterType: STRING - table_id: - defaultValue: '' - isOptional: true - parameterType: STRING - write_disposition: - defaultValue: '' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRUCT - comp-condition-2: - dag: - tasks: - model-evaluation-regression: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-regression - inputs: - artifacts: - predictions_gcs_source: - componentInputArtifact: pipelinechannel--prophet-trainer-evaluated_examples_directory - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_gcs_source: - runtimeValue: - constant: [] - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - prediction_score_column: - runtimeValue: - constant: prediction.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}} - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - target_field_name: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: model-evaluation-regression - inputDefinitions: - artifacts: - pipelinechannel--prophet-trainer-evaluated_examples_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--target_column: - parameterType: STRING - comp-exit-handler-1: - dag: - tasks: - bigquery-create-dataset: - cachingOptions: {} - componentRef: - name: comp-bigquery-create-dataset - dependentTasks: - - get-table-location - - validate-inputs - inputs: - parameters: - dataset: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: create-tmp-dataset - bigquery-query-job: - cachingOptions: - enableCache: true - componentRef: - name: comp-bigquery-query-job - dependentTasks: - - bigquery-create-dataset - - build-job-configuration-query - - get-fte-suffix - - get-table-location - inputs: - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - job_configuration_query: - taskOutputParameter: - outputParameterKey: Output - producerTask: build-job-configuration-query - location: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-table-location - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - pipelinechannel--get-fte-suffix-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-fte-suffix - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--time_column: - componentInputParameter: pipelinechannel--time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - project: - componentInputParameter: pipelinechannel--project - query: - runtimeValue: - constant: "\n WITH\n base_data AS (\n SELECT\ - \ * FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ - \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ - \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ - \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ - \ ARRAY_AGG({{$.inputs.parameters['pipelinechannel--target_column']}}\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ - \ ARRAY_AGG(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ - \ ARRAY_AGG(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ - \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ - \ AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ - \ FROM base_data\n GROUP BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ - \ " - taskInfo: - name: aggregate-by-time-series-id - build-job-configuration-query: - cachingOptions: - enableCache: true - componentRef: - name: comp-build-job-configuration-query - dependentTasks: - - bigquery-create-dataset - inputs: - parameters: - dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - project_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' - table_id: - runtimeValue: - constant: data - write_disposition: - runtimeValue: - constant: WRITE_EMPTY - taskInfo: - name: build-job-configuration-query - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - prophet-trainer - inputs: - artifacts: - pipelinechannel--prophet-trainer-evaluated_examples_directory: - taskOutputArtifact: - outputArtifactKey: evaluated_examples_directory - producerTask: prophet-trainer - parameters: - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: run-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--run_evaluation'] - == true - feature-transform-engine: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-transform-engine - dependentTasks: - - bigquery-create-dataset - inputs: - parameters: - autodetect_csv_schema: - runtimeValue: - constant: 1.0 - bigquery_staging_full_dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - forecasting_apply_windowing: - runtimeValue: - constant: 0.0 - forecasting_context_window: - runtimeValue: - constant: 0.0 - forecasting_forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - forecasting_predefined_window_column: - componentInputParameter: pipelinechannel--window_column - forecasting_time_column: - componentInputParameter: pipelinechannel--time_column - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - forecasting_window_max_count: - componentInputParameter: pipelinechannel--window_max_count - forecasting_window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - runtimeValue: - constant: time_series - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - runtimeValue: - constant: {} - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - taskInfo: - name: feature-transform-engine - get-fte-suffix: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-fte-suffix - dependentTasks: - - bigquery-create-dataset - - feature-transform-engine - inputs: - parameters: - bigquery_staging_full_dataset_id: - runtimeValue: - constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' - fte_table: - runtimeValue: - constant: fte_time_series_output - location: - componentInputParameter: pipelinechannel--location - pipelinechannel--bigquery-create-dataset-dataset_id: - taskOutputParameter: - outputParameterKey: dataset_id - producerTask: bigquery-create-dataset - pipelinechannel--bigquery-create-dataset-project_id: - taskOutputParameter: - outputParameterKey: project_id - producerTask: bigquery-create-dataset - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: get-fte-suffix - get-table-location: - cachingOptions: - enableCache: true - componentRef: - name: comp-get-table-location - inputs: - parameters: - default_location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - table: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - taskInfo: - name: get-table-location - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - prophet-trainer - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: prophet-trainer - parameters: - description: - runtimeValue: - constant: Prophet model. - display_name: - runtimeValue: - constant: prophet_{{$.pipeline_job_uuid}} - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - prophet-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-prophet-trainer - dependentTasks: - - get-fte-suffix - - table-to-uri - inputs: - parameters: - data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--trainer_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--trainer_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--trainer_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - forecast_horizon: - componentInputParameter: pipelinechannel--forecast_horizon - location: - componentInputParameter: pipelinechannel--location - max_num_trials: - componentInputParameter: pipelinechannel--max_num_trials - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - pipelinechannel--get-fte-suffix-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: get-fte-suffix - pipelinechannel--table-to-uri-uri: - taskOutputParameter: - outputParameterKey: uri - producerTask: table-to-uri - predefined_split_column: - runtimeValue: - constant: split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - source_bigquery_uri: - runtimeValue: - constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} - target_column: - componentInputParameter: pipelinechannel--target_column - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - window_column: - runtimeValue: - constant: window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} - taskInfo: - name: prophet-trainer - table-to-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-table-to-uri - dependentTasks: - - bigquery-query-job - inputs: - artifacts: - table: - taskOutputArtifact: - outputArtifactKey: destination_table - producerTask: bigquery-query-job - taskInfo: - name: table-to-uri - validate-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-validate-inputs - inputs: - parameters: - data_granularity_unit: - componentInputParameter: pipelinechannel--data_granularity_unit - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--data_source_csv_filenames - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - target_column: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - time_column: - componentInputParameter: pipelinechannel--time_column - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - window_column: - componentInputParameter: pipelinechannel--window_column - window_max_count: - componentInputParameter: pipelinechannel--window_max_count - window_stride_length: - componentInputParameter: pipelinechannel--window_stride_length - taskInfo: - name: validate-inputs - inputDefinitions: - parameters: - pipelinechannel--data_granularity_unit: - parameterType: STRING - pipelinechannel--data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--data_source_csv_filenames: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--forecast_horizon: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_num_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--time_column: - parameterType: STRING - pipelinechannel--time_series_identifier_column: - parameterType: STRING - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--trainer_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--trainer_dataflow_machine_type: - parameterType: STRING - pipelinechannel--trainer_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--window_column: - parameterType: STRING - pipelinechannel--window_max_count: - parameterType: NUMBER_INTEGER - pipelinechannel--window_stride_length: - parameterType: NUMBER_INTEGER - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine - inputDefinitions: - parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' - isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: - defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" - isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." - isOptional: true - parameterType: LIST - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." - isOptional: true - parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. - isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - available at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' - isOptional: true - parameterType: LIST - forecasting_predefined_window_column: - defaultValue: '' - description: Forecasting predefined window column. - isOptional: true - parameterType: STRING - forecasting_time_column: - defaultValue: '' - description: Forecasting time column. - isOptional: true - parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' - isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting - - time series identifier column.' - isOptional: true - parameterType: STRING - forecasting_unavailable_at_forecast_columns: - defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' - isOptional: true - parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. - isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. - isOptional: true - parameterType: NUMBER_INTEGER - group_columns: - isOptional: true - parameterType: LIST - group_temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' - isOptional: true - parameterType: STRING - location: - description: Location for the created GCP services. - parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' - isOptional: true - parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to - - select. If specified, the transform config will be purged by only using - - the selected features that ranked top in the feature ranking, which has - - the ranking value for all supported features. If the number of input - - features is smaller than max_selected_features specified, we will still - - run the feature selection process and generate the feature ranking, no - - features will be excluded. The value will be set to 1000 by default if - - run_feature_selection is enabled.' - isOptional: true - parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features - - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or - - tide. Defaults to the empty value, `None`.' - isOptional: true - parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text - - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of - - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied - - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection - - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - description: Stratified split key. - isOptional: true - parameterType: STRING - target_column: - defaultValue: '' - description: Target column of input data. - isOptional: true - parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" - isOptional: true - parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform - - row-level TF transformations. Can be one of: "dataflow" (by default) or - - "bigquery". Using "bigquery" as the execution engine is experimental and - - is for allowlisted customers only. In addition, executing on "bigquery" - - only supports auto transformations (i.e., specified by - - tf_auto_transform_features) and will raise an error when - - tf_custom_transformation_definitions or tf_transformations_path is set.' - isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." - isOptional: true - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: Timestamp split key. - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: Weight column of input data. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the - - dataset will be included. For "AMI" algorithm, array features won''t be - - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the - - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to - - pass to the batch prediction component during distillation.' - parameterType: STRING - gcp_resources: - description: 'GCP resources created by this component. For more details, - - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-get-fte-suffix: - executorLabel: exec-get-fte-suffix - inputDefinitions: - parameters: - bigquery_staging_full_dataset_id: - parameterType: STRING - fte_table: - parameterType: STRING - location: - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-get-table-location: - executorLabel: exec-get-table-location - inputDefinitions: - parameters: - default_location: - defaultValue: '' - description: Location to return if no table was given. - isOptional: true - parameterType: STRING - project: - description: The GCP project. - parameterType: STRING - table: - description: The BigQuery table to get a location for. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-model-evaluation-regression: - executorLabel: exec-model-evaluation-regression - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The managed Vertex Model used for - - predictions job, if using Vertex batch prediction. Must share the same - - location as the provided input argument `location`.' - isOptional: true - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*". For explanation results, the files - - should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - ground_truth_bigquery_source: - defaultValue: '' - description: 'Required for custom tabular. - - The BigQuery table uri representing where the ground truth is located. - - Used to provide ground truth for each prediction instance when they are - - not part of the batch prediction jobs prediction instance.' - isOptional: true - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - description: 'Required for custom tabular and non - - tabular data. The file format for the ground truth files. `jsonl`, - - `csv`, and `bigquery` are the allowed formats. If not set, defaulted to - - `jsonl`.' - isOptional: true - parameterType: STRING - ground_truth_gcs_source: - defaultValue: [] - description: 'Required for custom - - tabular and non tabular data. The GCS uris representing where the ground - - truth is located. Used to provide ground truth for each prediction - - instance when they are not part of the batch prediction jobs prediction - - instance.' - isOptional: true - parameterType: LIST - location: - defaultValue: us-central1 - description: 'Location for running the evaluation. If not set, - - defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - description: 'The column name of the field - - containing batch prediction scores. Formatted to be able to find nested - - columns, delimited by `.`. If not set, defaulted to `prediction.scores` - - for classification.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run evaluation container. - parameterType: STRING - target_field_name: - description: 'The full name path of the features target field - - in the predictions file. Formatted to be able to find nested columns, - - delimited by `.`. Alternatively referred to as the ground truth (or - - ground_truth_column) field.' - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics representing the classification - - evaluation metrics in GCS.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - parent_model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" - isOptional: true - parameters: - description: - defaultValue: '' - description: The description of the model. - isOptional: true - parameterType: STRING - display_name: - description: 'The display name of the Model. The name - - can be up to 128 characters long and can be consist of any UTF-8 - - characters.' - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key spec for a Model. If set, this Model and all sub-resources of this - - Model will be secured by this key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - description: 'Metadata describing the Model''s - - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your model. Label keys and values can be no longer than 64 - - characters (Unicode codepoints), can only contain lowercase letters, - - numeric characters, underscores and dashes. International characters - - are allowed. See https://goo.gl/xmQnxf for more information and - - examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Optional location to upload this model to. If - - not set, default to us-central1.' - isOptional: true - parameterType: STRING - project: - description: Project to upload this model to. - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: Artifact tracking the created model. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-prophet-trainer: - executorLabel: exec-prophet-trainer - inputDefinitions: - parameters: - data_granularity_unit: - description: 'String representing the units of time for the - - time column.' - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB - - during training.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-1 - description: 'The dataflow machine type used for - - training.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of Dataflow - - workers used for training.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used.' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - forecast_horizon: - description: 'The number of time periods into the future for - - which forecasts will be created. Future periods start after the latest - - timestamp for each time series.' - parameterType: NUMBER_INTEGER - location: - description: The GCP region for Vertex AI. - parameterType: STRING - max_num_trials: - defaultValue: 6.0 - description: 'Maximum number of tuning trials to perform - - per time series. There are up to 100 possible combinations to explore - - for each time series. Recommended values to try are 3, 6, and 24.' - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - defaultValue: rmse - description: 'Optimization objective for tuning. Supported - - metrics come from Prophet''s performance_metrics function. These are mse, - - rmse, mae, mape, mdape, smape, and coverage.' - isOptional: true - parameterType: STRING - predefined_split_column: - description: 'The predefined_split column name. A string - - that represents a list of comma separated CSV filenames.' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - source_bigquery_uri: - description: 'The BigQuery table path of format - - bq (str)://bq_project.bq_dataset.bq_table' - parameterType: STRING - target_column: - description: 'Name of the column that the model is to predict - - values for.' - parameterType: STRING - time_column: - description: 'Name of the column that identifies time order in the - - time series.' - parameterType: STRING - time_series_identifier_column: - description: 'Name of the column that identifies - - the time series.' - parameterType: STRING - window_column: - description: 'Name of the column that should be used to filter - - input rows. The column should contain either booleans or string - - booleans; if the value of the row is True, generate a sliding window - - from that row.' - parameterType: STRING - outputDefinitions: - artifacts: - evaluated_examples_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: The UnmanagedContainerModel artifact. - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the custom training - - job.' - parameterType: STRING - comp-table-to-uri: - executorLabel: exec-table-to-uri - inputDefinitions: - artifacts: - table: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - use_bq_prefix: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - outputDefinitions: - parameters: - dataset_id: - parameterType: STRING - project_id: - parameterType: STRING - table_id: - parameterType: STRING - uri: - parameterType: STRING - comp-validate-inputs: - executorLabel: exec-validate-inputs - inputDefinitions: - parameters: - bigquery_destination_uri: - isOptional: true - parameterType: STRING - data_granularity_unit: - isOptional: true - parameterType: STRING - data_source_bigquery_table_path: - isOptional: true - parameterType: STRING - data_source_csv_filenames: - isOptional: true - parameterType: STRING - optimization_objective: - isOptional: true - parameterType: STRING - predefined_split_key: - isOptional: true - parameterType: STRING - source_model_uri: - isOptional: true - parameterType: STRING - target_column: - isOptional: true - parameterType: STRING - test_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - isOptional: true - parameterType: STRING - time_series_identifier_column: - isOptional: true - parameterType: STRING - timestamp_split_key: - isOptional: true - parameterType: STRING - training_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - isOptional: true - parameterType: STRING - window_max_count: - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - isOptional: true - parameterType: NUMBER_INTEGER -deploymentSpec: - executors: - exec-bigquery-create-dataset: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_create_dataset - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ - \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ - \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ - \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import collections\n\n from google.cloud import bigquery\n # pylint:\ - \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n ref\ - \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ - \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ - \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim - exec-bigquery-delete-dataset-with-prefix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - bigquery_delete_dataset_with_prefix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ - \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ - \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ - \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ - \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ - \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ - \n" - image: python:3.7-slim - exec-bigquery-query-job: - container: - args: - - --type - - BigqueryQueryJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --payload - - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", - ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' - - --job_configuration_query_override - - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", - "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", - ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-build-job-configuration-query: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - build_job_configuration_query - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ - \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ - \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ - \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ - \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ - \ config['destinationTable'] = {\n 'projectId': project_id,\n\ - \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ - \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ - \ return config\n\n" - image: python:3.7-slim - exec-feature-transform-engine: - container: - args: - - feature_transform_engine - - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' - - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' - - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' - - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' - - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' - - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' - - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' - - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' - - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' - - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' - - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' - - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' - - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' - - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": - ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' - - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' - - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' - - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' - - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' - - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' - - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' - - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' - - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' - - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", - "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' - - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' - - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' - - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' - - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' - - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' - - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' - - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' - - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' - - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' - - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' - - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' - - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' - - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' - - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' - - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' - - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' - - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' - - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' - - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", - "{{$.inputs.parameters[''group_columns'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", - "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": - ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": - ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 - exec-get-fte-suffix: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_fte_suffix - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ - \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ - \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n client = bigquery.Client(project=project, location=location)\n for\ - \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ - \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ - \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ - \n" - image: python:3.7-slim - exec-get-table-location: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - get_table_location - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ - \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ - \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ - \ table: The BigQuery table to get a location for.\n default_location:\ - \ Location to return if no table was given.\n\n Returns:\n A GCP region\ - \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ - \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ - \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ - \ return client.get_table(table).location\n\n" - image: python:3.7-slim - exec-model-evaluation-regression: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - regression - - --target_field_name - - '{"Concat": ["instance.", "{{$.inputs.parameters[''target_field_name'']}}"]}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --ground_truth_gcs_source - - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' - - --ground_truth_bigquery_source - - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --dataflow_job_prefix - - evaluation-regression-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-prophet-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"prophet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", - ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": - {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325\", - ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", - \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325\", - \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325\", - \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", - \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", - "\", \"", "--source_bigquery_uri=", "{{$.inputs.parameters[''source_bigquery_uri'']}}", - "\", \"", "--target_column=", "{{$.inputs.parameters[''target_column'']}}", - "\", \"", "--time_column=", "{{$.inputs.parameters[''time_column'']}}", - "\", \"", "--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}", - "\", \"", "--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}", - "\", \"", "--window_column=", "{{$.inputs.parameters[''window_column'']}}", - "\", \"", "--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}", - "\", \"", "--data_granularity_unit=", "{{$.inputs.parameters[''data_granularity_unit'']}}", - "\", \"", "--predefined_split_column=", "{{$.inputs.parameters[''predefined_split_column'']}}", - "\", \"", "--max_num_trials=", "{{$.inputs.parameters[''max_num_trials'']}}", - "\", \"", "--dataflow_project=", "{{$.inputs.parameters[''project'']}}", - "\", \"", "--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"", "--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"", "--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"", "--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"", "--dataflow_subnetwork=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"", "--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"", "--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"", "--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-table-to-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - table_to_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ - \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ - \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ - \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ - \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ - \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ - \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ - \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim - exec-validate-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - validate_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ - \ time_series_identifier_column: Optional[str] = None,\n target_column:\ - \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ - \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ - \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ - \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ - \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ - \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ - \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ - \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ - \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ - \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ - \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ - \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ - \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ - \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ - \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ - \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ - \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ - \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ - \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ - \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ - \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ - \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ - \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ - \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ - \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ - \n # Validate data source.\n data_source_count = sum([bool(source) for\ - \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ - \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ - \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ - \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ - \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ - \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ - \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ - \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ - \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ - \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ - \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ - \ = [None if fraction == -1 else fraction\n for fraction\ - \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ - \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ - \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ - \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ - \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ - \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ - \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ - \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ - \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ - \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ - \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ - \ and not all(fraction_splits):\n raise ValueError('All fractions must\ - \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ - \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ - \ == -1:\n window_max_count = None\n window_configs = [window_column,\ - \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ - \ for config in window_configs])\n if window_config_count > 1:\n raise\ - \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ - \ if window_column and not column_pattern.fullmatch(window_column):\n \ - \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ - \ and (window_stride_length < 1 or\n window_stride_length\ - \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ - \ '\n f'{window_stride_length}.')\n if window_max_count\ - \ and (window_max_count < 1000 or\n window_max_count\ - \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ - \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ - \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ - \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ - \ not in valid_optimization_objectives:\n raise ValueError(\n \ - \ 'Optimization objective should be one of the following: '\n \ - \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ - \n # Validate data granularity unit.\n valid_data_granularity_units =\ - \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ - \ if data_granularity_unit not in valid_data_granularity_units:\n \ - \ raise ValueError(\n 'Granularity unit should be one of the\ - \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ - \n" - image: python:3.7-slim -pipelineInfo: - description: Trains one Prophet model per time series. - name: prophet-train -root: - dag: - tasks: - bigquery-delete-dataset-with-prefix: - cachingOptions: {} - componentRef: - name: comp-bigquery-delete-dataset-with-prefix - dependentTasks: - - exit-handler-1 - inputs: - parameters: - dataset_prefix: - runtimeValue: - constant: tmp_{{$.pipeline_job_uuid}} - delete_contents: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: project - taskInfo: - name: delete-tmp-dataset - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--data_granularity_unit: - componentInputParameter: data_granularity_unit - pipelinechannel--data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - pipelinechannel--data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--forecast_horizon: - componentInputParameter: forecast_horizon - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--max_num_trials: - componentInputParameter: max_num_trials - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--time_column: - componentInputParameter: time_column - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--trainer_dataflow_disk_size_gb: - componentInputParameter: trainer_dataflow_disk_size_gb - pipelinechannel--trainer_dataflow_machine_type: - componentInputParameter: trainer_dataflow_machine_type - pipelinechannel--trainer_dataflow_max_num_workers: - componentInputParameter: trainer_dataflow_max_num_workers - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--window_column: - componentInputParameter: window_column - pipelinechannel--window_max_count: - componentInputParameter: window_max_count - pipelinechannel--window_stride_length: - componentInputParameter: window_stride_length - taskInfo: - name: exit-handler-1 - inputDefinitions: - parameters: - data_granularity_unit: - description: 'String representing the units of time for the time - - column.' - parameterType: STRING - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used.' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluation_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB during - - evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-1 - description: 'The dataflow machine type used for - - evaluation.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of Dataflow workers used - - for evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - forecast_horizon: - description: 'The number of time periods into the future for which - - forecasts will be created. Future periods start after the latest timestamp - - for each time series.' - parameterType: NUMBER_INTEGER - location: - description: The GCP region for Vertex AI. - parameterType: STRING - max_num_trials: - defaultValue: 6.0 - description: 'Maximum number of tuning trials to perform per time series. - - There are up to 100 possible combinations to explore for each time series. - - Recommended values to try are 3, 6, and 24.' - isOptional: true - parameterType: NUMBER_INTEGER - optimization_objective: - description: Optimization objective for the model. - parameterType: STRING - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_evaluation: - defaultValue: true - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - target_column: - description: Name of the column that the model is to predict values for. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - time_column: - description: 'Name of the column that identifies time order in the time - - series.' - parameterType: STRING - time_series_identifier_column: - description: 'Name of the column that identifies the time - - series.' - parameterType: STRING - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - trainer_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB during - - training.' - isOptional: true - parameterType: NUMBER_INTEGER - trainer_dataflow_machine_type: - defaultValue: n1-standard-1 - description: The dataflow machine type used for training. - isOptional: true - parameterType: STRING - trainer_dataflow_max_num_workers: - defaultValue: 10.0 - description: 'The max number of Dataflow workers used - - for training.' - isOptional: true - parameterType: NUMBER_INTEGER - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - window_column: - defaultValue: '' - description: 'Name of the column that should be used to filter input rows. - - The column should contain either booleans or string booleans; if the value - - of the row is True, generate a sliding window from that row.' - isOptional: true - parameterType: STRING - window_max_count: - defaultValue: -1.0 - description: 'Number of rows that should be used to generate input - - examples. If the total row count is larger than this number, the input - - data will be randomly sampled to hit the count.' - isOptional: true - parameterType: NUMBER_INTEGER - window_stride_length: - defaultValue: -1.0 - description: 'Step length used to generate input examples. Every - - window_stride_length rows will be used to generate a sliding window.' - isOptional: true - parameterType: NUMBER_INTEGER -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py deleted file mode 100644 index b69d5430a5..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py +++ /dev/null @@ -1,341 +0,0 @@ -"""Util functions for Vertex Forecasting pipelines.""" - -import os -import pathlib -from typing import Any, Dict, Tuple - -_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() - - -def get_bqml_arima_train_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - time_column: str, - time_series_identifier_column: str, - target_column: str, - forecast_horizon: int, - data_granularity_unit: str, - predefined_split_key: str = '', - timestamp_split_key: str = '', - training_fraction: float = -1.0, - validation_fraction: float = -1.0, - test_fraction: float = -1.0, - data_source_csv_filenames: str = '', - data_source_bigquery_table_path: str = '', - window_column: str = '', - window_stride_length: int = -1, - window_max_count: int = -1, - bigquery_destination_uri: str = '', - override_destination: bool = False, - max_order: int = 5, - run_evaluation: bool = True, -) -> Tuple[str, Dict[str, Any]]: - """Get the BQML ARIMA_PLUS training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region for Vertex AI. - root_dir: The Cloud Storage location to store the output. - time_column: Name of the column that identifies time order in the time - series. - time_series_identifier_column: Name of the column that identifies the time - series. - target_column: Name of the column that the model is to predict values for. - forecast_horizon: The number of time periods into the future for which - forecasts will be created. Future periods start after the latest timestamp - for each time series. - data_granularity_unit: The data granularity unit. Accepted values are: - minute, hour, day, week, month, year. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - window_column: Name of the column that should be used to filter input rows. - The column should contain either booleans or string booleans; if the value - of the row is True, generate a sliding window from that row. - window_stride_length: Step length used to generate input examples. Every - window_stride_length rows will be used to generate a sliding window. - window_max_count: Number of rows that should be used to generate input - examples. If the total row count is larger than this number, the input - data will be randomly sampled to hit the count. - bigquery_destination_uri: URI of the desired destination dataset. If not - specified, resources will be created under a new dataset in the project. - Unlike in Vertex Forecasting, all resources will be given hardcoded names - under this dataset, and the model artifact will also be exported here. - override_destination: Whether to overwrite the metrics and evaluated - examples tables if they already exist. If this is False and the tables - exist, this pipeline will fail. - max_order: Integer between 1 and 5 representing the size of the parameter - search space for ARIMA_PLUS. 5 would result in the highest accuracy model, - but also the longest training runtime. - run_evaluation: Whether to run evaluation steps during training. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'time_column': time_column, - 'time_series_identifier_column': time_series_identifier_column, - 'target_column': target_column, - 'forecast_horizon': forecast_horizon, - 'data_granularity_unit': data_granularity_unit, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'window_column': window_column, - 'window_stride_length': window_stride_length, - 'window_max_count': window_max_count, - 'bigquery_destination_uri': bigquery_destination_uri, - 'override_destination': override_destination, - 'max_order': max_order, - 'run_evaluation': run_evaluation, - } - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, 'bqml_arima_train_pipeline.yaml' - ) - return pipeline_definition_path, parameter_values - - -def get_bqml_arima_predict_pipeline_and_parameters( - project: str, - location: str, - model_name: str, - data_source_csv_filenames: str = '', - data_source_bigquery_table_path: str = '', - bigquery_destination_uri: str = '', - generate_explanation: bool = False, -) -> Tuple[str, Dict[str, Any]]: - """Get the BQML ARIMA_PLUS prediction pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region for Vertex AI. - model_name: ARIMA_PLUS BQML model URI. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - bigquery_destination_uri: URI of the desired destination dataset. If not - specified, a resource will be created under a new dataset in the project. - generate_explanation: Generate explanation along with the batch prediction - results. This will cause the batch prediction output to include - explanations. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = { - 'project': project, - 'location': location, - 'model_name': model_name, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_destination_uri': bigquery_destination_uri, - 'generate_explanation': generate_explanation, - } - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, 'bqml_arima_predict_pipeline.yaml' - ) - return pipeline_definition_path, parameter_values - - -def get_prophet_train_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - time_column: str, - time_series_identifier_column: str, - target_column: str, - forecast_horizon: int, - optimization_objective: str, - data_granularity_unit: str, - predefined_split_key: str = '', - timestamp_split_key: str = '', - training_fraction: float = -1.0, - validation_fraction: float = -1.0, - test_fraction: float = -1.0, - data_source_csv_filenames: str = '', - data_source_bigquery_table_path: str = '', - window_column: str = '', - window_stride_length: int = -1, - window_max_count: int = -1, - max_num_trials: int = 6, - trainer_dataflow_machine_type: str = 'n1-standard-1', - trainer_dataflow_max_num_workers: int = 10, - trainer_dataflow_disk_size_gb: int = 40, - evaluation_dataflow_machine_type: str = 'n1-standard-1', - evaluation_dataflow_max_num_workers: int = 10, - evaluation_dataflow_disk_size_gb: int = 40, - dataflow_service_account: str = '', - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - run_evaluation: bool = True, -) -> Tuple[str, Dict[str, Any]]: - """Returns Prophet train pipeline and formatted parameters. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region for Vertex AI. - root_dir: The Cloud Storage location to store the output. - time_column: Name of the column that identifies time order in the time - series. - time_series_identifier_column: Name of the column that identifies the time - series. - target_column: Name of the column that the model is to predict values for. - forecast_horizon: The number of time periods into the future for which - forecasts will be created. Future periods start after the latest timestamp - for each time series. - optimization_objective: Optimization objective for the model. - data_granularity_unit: String representing the units of time for the time - column. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - window_column: Name of the column that should be used to filter input rows. - The column should contain either booleans or string booleans; if the value - of the row is True, generate a sliding window from that row. - window_stride_length: Step length used to generate input examples. Every - window_stride_length rows will be used to generate a sliding window. - window_max_count: Number of rows that should be used to generate input - examples. If the total row count is larger than this number, the input - data will be randomly sampled to hit the count. - max_num_trials: Maximum number of tuning trials to perform per time series. - trainer_dataflow_machine_type: The dataflow machine type used for training. - trainer_dataflow_max_num_workers: The max number of Dataflow workers used - for training. - trainer_dataflow_disk_size_gb: Dataflow worker's disk size in GB during - training. - evaluation_dataflow_machine_type: The dataflow machine type used for - evaluation. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers used - for evaluation. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB during - evaluation. - dataflow_service_account: Custom service account to run dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - run_evaluation: Whether to run evaluation steps during training. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'time_column': time_column, - 'time_series_identifier_column': time_series_identifier_column, - 'target_column': target_column, - 'forecast_horizon': forecast_horizon, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'window_column': window_column, - 'window_stride_length': window_stride_length, - 'window_max_count': window_max_count, - 'max_num_trials': max_num_trials, - 'optimization_objective': optimization_objective, - 'data_granularity_unit': data_granularity_unit, - 'trainer_dataflow_machine_type': trainer_dataflow_machine_type, - 'trainer_dataflow_max_num_workers': trainer_dataflow_max_num_workers, - 'trainer_dataflow_disk_size_gb': trainer_dataflow_disk_size_gb, - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'dataflow_service_account': dataflow_service_account, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'run_evaluation': run_evaluation, - } - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, 'prophet_trainer_pipeline.yaml' - ) - return pipeline_definition_path, parameter_values - - -def get_prophet_prediction_pipeline_and_parameters( - project: str, - location: str, - model_name: str, - time_column: str, - time_series_identifier_column: str, - target_column: str, - data_source_csv_filenames: str = '', - data_source_bigquery_table_path: str = '', - bigquery_destination_uri: str = '', - machine_type: str = 'n1-standard-2', - max_num_workers: int = 10, -) -> Tuple[str, Dict[str, Any]]: - """Returns Prophet prediction pipeline and formatted parameters. - - Unlike the prediction server for Vertex Forecasting, the Prophet prediction - server returns predictions batched by time series id. This pipeline shows how - these predictions can be disaggregated to get results similar to what Vertex - Forecasting provides. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region for Vertex AI. - model_name: The name of the Model resource, in a form of - projects/{project}/locations/{location}/models/{model}. - time_column: Name of the column that identifies time order in the time - series. - time_series_identifier_column: Name of the column that identifies the time - series. - target_column: Name of the column that the model is to predict values for. - data_source_csv_filenames: A string that represents a list of comma - separated CSV filenames. - data_source_bigquery_table_path: The BigQuery table path of format - bq://bq_project.bq_dataset.bq_table - bigquery_destination_uri: URI of the desired destination dataset. If not - specified, resources will be created under a new dataset in the project. - machine_type: The machine type used for batch prediction. - max_num_workers: The max number of workers used for batch prediction. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = { - 'project': project, - 'location': location, - 'model_name': model_name, - 'time_column': time_column, - 'time_series_identifier_column': time_series_identifier_column, - 'target_column': target_column, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'bigquery_destination_uri': bigquery_destination_uri, - 'machine_type': machine_type, - 'max_num_workers': max_num_workers, - } - pipeline_definition_path = os.path.join( - _GCPC_FORECASTING_PATH, 'prophet_predict_pipeline.yaml' - ) - return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py deleted file mode 100644 index 2522350d36..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""GA AutoML tabular components.""" - -from google_cloud_pipeline_components.v1.automl.tabular.cv_trainer import automl_tabular_cv_trainer as CvTrainerOp -from google_cloud_pipeline_components.v1.automl.tabular.ensemble import automl_tabular_ensemble as EnsembleOp -from google_cloud_pipeline_components.v1.automl.tabular.finalizer import automl_tabular_finalizer as FinalizerOp -from google_cloud_pipeline_components.v1.automl.tabular.infra_validator import automl_tabular_infra_validator as InfraValidatorOp -from google_cloud_pipeline_components.v1.automl.tabular.split_materialized_data import split_materialized_data as SplitMaterializedDataOp -from google_cloud_pipeline_components.v1.automl.tabular.stage_1_tuner import automl_tabular_stage_1_tuner as Stage1TunerOp -from google_cloud_pipeline_components.v1.automl.tabular.stats_and_example_gen import tabular_stats_and_example_gen as StatsAndExampleGenOp -from google_cloud_pipeline_components.v1.automl.tabular.training_configurator_and_validator import training_configurator_and_validator as TrainingConfiguratorAndValidatorOp -from google_cloud_pipeline_components.v1.automl.tabular.transform import automl_tabular_transform as TransformOp - -__all__ = [ - 'CvTrainerOp', - 'InfraValidatorOp', - 'Stage1TunerOp', - 'EnsembleOp', - 'StatsAndExampleGenOp', - 'TransformOp', - 'FinalizerOp', - 'SplitMaterializedDataOp', - 'TrainingConfiguratorAndValidatorOp', -] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml deleted file mode 100644 index 3c4fbb6d46..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +++ /dev/null @@ -1,11149 +0,0 @@ -# PIPELINE DEFINITION -# Name: automl-tabular -# Description: The AutoML Tabular pipeline v1. -# Inputs: -# additional_experiments: dict -# cv_trainer_worker_pool_specs_override: list -# data_source_bigquery_table_path: str [Default: ''] -# data_source_csv_filenames: str [Default: ''] -# dataflow_service_account: str [Default: ''] -# dataflow_subnetwork: str [Default: ''] -# dataflow_use_public_ips: bool [Default: True] -# disable_early_stopping: bool [Default: False] -# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] -# distill_batch_predict_max_replica_count: int [Default: 25.0] -# distill_batch_predict_starting_replica_count: int [Default: 25.0] -# enable_probabilistic_inference: bool [Default: False] -# encryption_spec_key_name: str [Default: ''] -# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_explain_max_replica_count: int [Default: 10.0] -# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] -# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] -# evaluation_batch_predict_max_replica_count: int [Default: 20.0] -# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] -# evaluation_dataflow_disk_size_gb: int [Default: 50.0] -# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] -# evaluation_dataflow_max_num_workers: int [Default: 100.0] -# evaluation_dataflow_starting_num_workers: int [Default: 10.0] -# export_additional_model_without_custom_ops: bool [Default: False] -# fast_testing: bool [Default: False] -# location: str -# model_description: str [Default: ''] -# model_display_name: str [Default: ''] -# optimization_objective: str -# optimization_objective_precision_value: float [Default: -1.0] -# optimization_objective_recall_value: float [Default: -1.0] -# predefined_split_key: str [Default: ''] -# prediction_type: str -# project: str -# quantiles: list -# root_dir: str -# run_distillation: bool [Default: False] -# run_evaluation: bool [Default: False] -# stage_1_num_parallel_trials: int [Default: 35.0] -# stage_1_tuner_worker_pool_specs_override: list -# stage_1_tuning_result_artifact_uri: str [Default: ''] -# stage_2_num_parallel_trials: int [Default: 35.0] -# stage_2_num_selected_trials: int [Default: 5.0] -# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] -# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] -# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] -# stratified_split_key: str [Default: ''] -# study_spec_parameters_override: list -# target_column: str -# test_fraction: float [Default: -1.0] -# timestamp_split_key: str [Default: ''] -# train_budget_milli_node_hours: float -# training_fraction: float [Default: -1.0] -# transform_dataflow_disk_size_gb: int [Default: 40.0] -# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] -# transform_dataflow_max_num_workers: int [Default: 25.0] -# transformations: str -# validation_fraction: float [Default: -1.0] -# vertex_dataset: system.Artifact -# weight_column: str [Default: ''] -# Outputs: -# feature-attribution-2-feature_attributions: system.Metrics -# feature-attribution-3-feature_attributions: system.Metrics -# feature-attribution-feature_attributions: system.Metrics -# model-evaluation-2-evaluation_metrics: system.Metrics -# model-evaluation-3-evaluation_metrics: system.Metrics -# model-evaluation-evaluation_metrics: system.Metrics -components: - comp-automl-tabular-cv-trainer: - executorLabel: exec-automl-tabular-cv-trainer - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-cv-trainer-2: - executorLabel: exec-automl-tabular-cv-trainer-2 - inputDefinitions: - artifacts: - materialized_cv_splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized cross-validation splits. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: AutoML Tabular tuning result. - parameters: - deadline_hours: - description: Number of hours the cross-validation trainer should run. - parameterType: NUMBER_DOUBLE - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble: - executorLabel: exec-automl-tabular-ensemble - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble-2: - executorLabel: exec-automl-tabular-ensemble-2 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-ensemble-3: - executorLabel: exec-automl-tabular-ensemble-3 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The instance baseline - - used to calculate explanations.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - tuning_result_input: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'AutoML Tabular tuning - - result.' - warmup_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: 'The warm up data. Ensemble component will save the - - warm up data together with the model artifact, used to warm up the model - - when prediction server starts.' - isOptional: true - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - export_additional_model_without_custom_ops: - defaultValue: false - description: 'True if export - - an additional model without custom TF operators to the - - `model_without_custom_ops` output.' - isOptional: true - parameterType: BOOLEAN - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - model: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model. - model_architecture: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The architecture of the output model. - model_without_custom_ops: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The output model without custom TF operators, this output will - be empty unless `export_additional_model_without_custom_ops` is set. - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - explanation_metadata: - description: The explanation parameters used by Vertex online and batch - explanations. - parameterType: STRUCT - explanation_parameters: - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-finalizer: - executorLabel: exec-automl-tabular-finalizer - inputDefinitions: - parameters: - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-infra-validator: - executorLabel: exec-automl-tabular-infra-validator - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-infra-validator-2: - executorLabel: exec-automl-tabular-infra-validator-2 - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-infra-validator-3: - executorLabel: exec-automl-tabular-infra-validator-3 - inputDefinitions: - artifacts: - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'google.UnmanagedContainerModel for model - - to be validated.' - comp-automl-tabular-stage-1-tuner: - executorLabel: exec-automl-tabular-stage-1-tuner - inputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - disable_early_stopping: - defaultValue: false - description: 'True if disable early stopping. Default - - value is false.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "model_type","categorical_value_spec": {"values": - - ["nn"]}}]' - isOptional: true - parameterType: LIST - tune_feature_selection_rate: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-stage-1-tuner-2: - executorLabel: exec-automl-tabular-stage-1-tuner-2 - inputDefinitions: - artifacts: - feature_ranking: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized eval split. - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'The materialized train - - split.' - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - deadline_hours: - description: 'Number of hours the cross-validation trainer - - should run.' - parameterType: NUMBER_DOUBLE - disable_early_stopping: - defaultValue: false - description: 'True if disable early stopping. Default - - value is false.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - num_parallel_trials: - description: Number of parallel training trials. - parameterType: NUMBER_INTEGER - num_selected_features: - defaultValue: 0.0 - description: 'Number of selected features. The number of - - features to learn in the NN models.' - isOptional: true - parameterType: NUMBER_INTEGER - num_selected_trials: - description: 'Number of selected trials. The number of weak - - learners in the final model is 5 * num_selected_trials.' - parameterType: NUMBER_INTEGER - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - reduce_search_space_mode: - defaultValue: regular - description: 'The reduce search space mode. Possible - - values: "regular" (default), "minimal", "full".' - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - single_run_max_secs: - description: Max number of seconds each training trial runs. - parameterType: NUMBER_INTEGER - study_spec_parameters_override: - defaultValue: [] - description: 'JSON study spec. E.g., - - [{"parameter_id": "model_type","categorical_value_spec": {"values": - - ["nn"]}}]' - isOptional: true - parameterType: LIST - tune_feature_selection_rate: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - worker_pool_specs_override_json: - defaultValue: [] - description: 'JSON worker pool specs. E.g., - - [{"machine_spec": {"machine_type": - - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - - "n1-standard-16"}}]' - isOptional: true - parameterType: LIST - outputDefinitions: - artifacts: - tuning_result_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The trained model and architectures. - parameters: - execution_metrics: - description: Core metrics in dictionary of component execution. - parameterType: STRUCT - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-transform: - executorLabel: exec-automl-tabular-transform - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized test split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-automl-tabular-transform-2: - executorLabel: exec-automl-tabular-transform-2 - inputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: Location for running the Cross-validation trainer. - parameterType: STRING - project: - description: Project to run Cross-validation trainer. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - outputDefinitions: - artifacts: - materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized test split. - materialized_test_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The materialized train split. - training_schema_uri: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The training schema. - transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-bool-identity: - executorLabel: exec-bool-identity - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-2: - executorLabel: exec-bool-identity-2 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-bool-identity-3: - executorLabel: exec-bool-identity-3 - inputDefinitions: - parameters: - value: - description: Boolean value to return - parameterType: BOOLEAN - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-calculate-training-parameters: - executorLabel: exec-calculate-training-parameters - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-calculate-training-parameters-2: - executorLabel: exec-calculate-training-parameters-2 - inputDefinitions: - parameters: - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - is_skip_architecture_search: - defaultValue: false - description: 'If component is being called in the - - skip_architecture_search pipeline.' - isOptional: true - parameterType: BOOLEAN - run_distillation: - description: Whether to run distill in the training pipeline. - parameterType: BOOLEAN - stage_1_num_parallel_trials: - description: Number of parallel trails for stage 1. - parameterType: NUMBER_INTEGER - stage_2_num_parallel_trials: - description: Number of parallel trails for stage 2. - parameterType: NUMBER_INTEGER - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - outputDefinitions: - parameters: - distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - reduce_search_space_mode: - parameterType: STRING - stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_1_num_selected_trials: - parameterType: NUMBER_INTEGER - stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - stage_2_deadline_hours: - parameterType: NUMBER_DOUBLE - stage_2_single_run_max_secs: - parameterType: NUMBER_INTEGER - comp-condition-2: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-3 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-3 - tasks: - automl-tabular-cv-trainer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer - dependentTasks: - - calculate-training-parameters - - importer - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: artifact - producerTask: importer - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer - automl-tabular-ensemble: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble - dependentTasks: - - automl-tabular-cv-trainer - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble - automl-tabular-infra-validator: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - taskInfo: - name: automl-tabular-infra-validator - bool-identity: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity - calculate-training-parameters: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 1.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters - condition-3: - componentRef: - name: comp-condition-3 - dependentTasks: - - automl-tabular-ensemble - - bool-identity - - model-upload - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - pipelinechannel--model-upload-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - pipelinechannel--bool-identity-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] - == 'true' - importer: - cachingOptions: - enableCache: true - componentRef: - name: comp-importer - inputs: - parameters: - uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: importer - model-upload: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload - dependentTasks: - - automl-tabular-ensemble - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-transform-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-3: - dag: - outputs: - artifacts: - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation - tasks: - feature-attribution: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution - dependentTasks: - - model-batch-explanation - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution - model-batch-explanation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation - model-batch-predict: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict - model-evaluation: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation - dependentTasks: - - model-batch-predict - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation - model-evaluation-import: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import - dependentTasks: - - feature-attribution - - model-evaluation - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation - model: - componentInputArtifact: pipelinechannel--model-upload-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-4: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-5 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-7 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-5 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-7 - tasks: - automl-tabular-cv-trainer-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-cv-trainer-2 - dependentTasks: - - automl-tabular-stage-1-tuner - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_cv_splits: - componentInputArtifact: pipelinechannel--merge-materialized-splits-splits - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-stage-1-tuner - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_2_deadline_hours - producerTask: calculate-training-parameters-2 - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_2_single_run_max_secs - producerTask: calculate-training-parameters-2 - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - taskInfo: - name: automl-tabular-cv-trainer-2 - automl-tabular-ensemble-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble-2 - dependentTasks: - - automl-tabular-cv-trainer-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-cv-trainer-2 - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble-2 - automl-tabular-infra-validator-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator-2 - dependentTasks: - - automl-tabular-ensemble-2 - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - taskInfo: - name: automl-tabular-infra-validator-2 - automl-tabular-stage-1-tuner: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-stage-1-tuner - dependentTasks: - - calculate-training-parameters-2 - inputs: - artifacts: - materialized_eval_split: - componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split - materialized_train_split: - componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output - parameters: - deadline_hours: - taskOutputParameter: - outputParameterKey: stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - taskOutputParameter: - outputParameterKey: stage_1_num_selected_trials - producerTask: calculate-training-parameters-2 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - taskOutputParameter: - outputParameterKey: reduce_search_space_mode - producerTask: calculate-training-parameters-2 - root_dir: - componentInputParameter: pipelinechannel--root_dir - single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-tabular-stage-1-tuner - bool-identity-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-2 - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_evaluation - taskInfo: - name: bool-identity-2 - bool-identity-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-bool-identity-3 - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--run_distillation - taskInfo: - name: bool-identity-3 - calculate-training-parameters-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-calculate-training-parameters-2 - inputs: - parameters: - fast_testing: - componentInputParameter: pipelinechannel--fast_testing - is_skip_architecture_search: - runtimeValue: - constant: 0.0 - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: calculate-training-parameters-2 - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - automl-tabular-ensemble-2 - - bool-identity-2 - - bool-identity-3 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-2 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-2 - pipelinechannel--bool-identity-2-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-2 - pipelinechannel--bool-identity-3-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-3 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: no-distill - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] - == 'false' - condition-7: - componentRef: - name: comp-condition-7 - dependentTasks: - - automl-tabular-ensemble-2 - - bool-identity-2 - - bool-identity-3 - - calculate-training-parameters-2 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-2 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - pipelinechannel--tabular-stats-and-example-gen-eval_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - pipelinechannel--tabular-stats-and-example-gen-metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - pipelinechannel--tabular-stats-and-example-gen-test_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split - pipelinechannel--tabular-stats-and-example-gen-train_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split - parameters: - pipelinechannel--bool-identity-2-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-2 - pipelinechannel--bool-identity-3-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: bool-identity-3 - pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: - taskOutputParameter: - outputParameterKey: distill_stage_1_deadline_hours - producerTask: calculate-training-parameters-2 - pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: - taskOutputParameter: - outputParameterKey: reduce_search_space_mode - producerTask: calculate-training-parameters-2 - pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: - taskOutputParameter: - outputParameterKey: stage_1_single_run_max_secs - producerTask: calculate-training-parameters-2 - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - taskInfo: - name: is-distill - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] - == 'true' - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-transform-materialized_eval_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-transform-materialized_train_split: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-transform-transform_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--merge-materialized-splits-splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-5: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-6 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-6 - tasks: - condition-6: - componentRef: - name: comp-condition-6 - dependentTasks: - - model-upload-2 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - pipelinechannel--model-upload-2-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-2 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - pipelinechannel--bool-identity-2-Output: - componentInputParameter: pipelinechannel--bool-identity-2-Output - pipelinechannel--bool-identity-3-Output: - componentInputParameter: pipelinechannel--bool-identity-3-Output - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] - == 'true' - model-upload-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - description: - componentInputParameter: pipelinechannel--model_description - display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-6: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation-2 - tasks: - feature-attribution-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-2 - dependentTasks: - - model-batch-explanation-2 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-2 - model-batch-explanation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-2 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-2 - model-batch-predict-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-2 - model-evaluation-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-2 - dependentTasks: - - model-batch-predict-2 - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-2 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation-2 - model-evaluation-import-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-2 - dependentTasks: - - feature-attribution-2 - - model-evaluation-2 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-2 - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-2 - model: - componentInputArtifact: pipelinechannel--model-upload-2-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-2-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-7: - dag: - outputs: - artifacts: - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-8 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-8 - tasks: - automl-tabular-ensemble-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-ensemble-3 - dependentTasks: - - automl-tabular-stage-1-tuner-2 - - automl-tabular-transform-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - instance_baseline: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform-2 - tuning_result_input: - taskOutputArtifact: - outputArtifactKey: tuning_result_output - producerTask: automl-tabular-stage-1-tuner-2 - warmup_data: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-ensemble-3 - automl-tabular-infra-validator-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-infra-validator-3 - dependentTasks: - - automl-tabular-ensemble-3 - inputs: - artifacts: - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - taskInfo: - name: automl-tabular-infra-validator-3 - automl-tabular-stage-1-tuner-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-stage-1-tuner-2 - dependentTasks: - - automl-tabular-transform-2 - inputs: - artifacts: - materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform-2 - materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform-2 - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform-2 - parameters: - deadline_hours: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours - disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - num_selected_trials: - runtimeValue: - constant: 1.0 - project: - componentInputParameter: pipelinechannel--project - reduce_search_space_mode: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_distillation: - runtimeValue: - constant: 1.0 - single_run_max_secs: - componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs - worker_pool_specs_override_json: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - taskInfo: - name: automl-tabular-stage-1-tuner-2 - automl-tabular-transform-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-transform-2 - dependentTasks: - - write-bp-result-path - - write-bp-result-path-2 - inputs: - artifacts: - dataset_schema: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema - eval_split: - taskOutputArtifact: - outputArtifactKey: result - producerTask: write-bp-result-path-2 - metadata: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata - test_split: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split - train_split: - taskOutputArtifact: - outputArtifactKey: result - producerTask: write-bp-result-path - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-transform-2 - condition-8: - componentRef: - name: comp-condition-8 - dependentTasks: - - automl-tabular-ensemble-3 - - model-upload-3 - inputs: - artifacts: - pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-3 - pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - pipelinechannel--model-upload-3-model: - taskOutputArtifact: - outputArtifactKey: model - producerTask: model-upload-3 - parameters: - pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-3 - pipelinechannel--bool-identity-2-Output: - componentInputParameter: pipelinechannel--bool-identity-2-Output - pipelinechannel--bool-identity-3-Output: - componentInputParameter: pipelinechannel--bool-identity-3-Output - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--string-not-empty-Output: - componentInputParameter: pipelinechannel--string-not-empty-Output - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - taskInfo: - name: is-evaluation - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] - == 'true' - model-batch-predict-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-3 - dependentTasks: - - read-input-uri - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - taskOutputParameter: - outputParameterKey: Output - producerTask: read-input-uri - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-train-split - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: tf-record - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-3 - model-batch-predict-4: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-4 - dependentTasks: - - read-input-uri-2 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - taskOutputParameter: - outputParameterKey: Output - producerTask: read-input-uri-2 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-eval-split - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: tf-record - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-4 - model-upload-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-upload-3 - dependentTasks: - - automl-tabular-ensemble-3 - - automl-tabular-infra-validator-3 - inputs: - artifacts: - explanation_metadata_artifact: - taskOutputArtifact: - outputArtifactKey: explanation_metadata_artifact - producerTask: automl-tabular-ensemble-3 - unmanaged_container_model: - taskOutputArtifact: - outputArtifactKey: unmanaged_container_model - producerTask: automl-tabular-ensemble-3 - parameters: - display_name: - runtimeValue: - constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - taskOutputParameter: - outputParameterKey: explanation_parameters - producerTask: automl-tabular-ensemble-3 - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: model-upload-3 - read-input-uri: - cachingOptions: - enableCache: true - componentRef: - name: comp-read-input-uri - inputs: - artifacts: - split_uri: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split - taskInfo: - name: read-input-uri - read-input-uri-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-read-input-uri-2 - inputs: - artifacts: - split_uri: - componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split - taskInfo: - name: read-input-uri-2 - write-bp-result-path: - cachingOptions: - enableCache: true - componentRef: - name: comp-write-bp-result-path - dependentTasks: - - model-batch-predict-3 - inputs: - artifacts: - bp_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-3 - taskInfo: - name: write-bp-result-path - write-bp-result-path-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-write-bp-result-path-2 - dependentTasks: - - model-batch-predict-4 - inputs: - artifacts: - bp_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-4 - taskInfo: - name: write-bp-result-path-2 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - pipelinechannel--tabular-stats-and-example-gen-train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - parameters: - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: - parameterType: STRING - pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: - parameterType: NUMBER_INTEGER - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-condition-8: - dag: - outputs: - artifacts: - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature_attributions - producerSubtask: feature-attribution-3 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: evaluation_metrics - producerSubtask: model-evaluation-3 - tasks: - feature-attribution-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-feature-attribution-3 - dependentTasks: - - model-batch-explanation-3 - inputs: - artifacts: - predictions_gcs_source: - taskOutputArtifact: - outputArtifactKey: gcs_output_directory - producerTask: model-batch-explanation-3 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - taskInfo: - name: feature-attribution-3 - model-batch-explanation-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-explanation-3 - inputs: - artifacts: - explanation_metadata_artifact: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - explanation_parameters: - componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json - generate_explanation: - runtimeValue: - constant: 1.0 - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - taskInfo: - name: model-batch-explanation-3 - model-batch-predict-5: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-batch-predict-5 - inputs: - artifacts: - unmanaged_container_model: - componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model - parameters: - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - gcs_destination_output_uri_prefix: - componentInputParameter: pipelinechannel--root_dir - gcs_source_uris: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - instances_format: - runtimeValue: - constant: tf-record - job_display_name: - runtimeValue: - constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - location: - componentInputParameter: pipelinechannel--location - machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - predictions_format: - runtimeValue: - constant: jsonl - project: - componentInputParameter: pipelinechannel--project - starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - taskInfo: - name: model-batch-predict-5 - model-evaluation-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-3 - dependentTasks: - - model-batch-predict-5 - inputs: - artifacts: - batch_prediction_job: - taskOutputArtifact: - outputArtifactKey: batchpredictionjob - producerTask: model-batch-predict-5 - parameters: - dataflow_disk_size: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - dataflow_max_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataflow_workers_num: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - ground_truth_column: - componentInputParameter: pipelinechannel--target_column - ground_truth_format: - runtimeValue: - constant: jsonl - location: - componentInputParameter: pipelinechannel--location - prediction_label_column: - runtimeValue: - constant: '' - prediction_score_column: - runtimeValue: - constant: '' - predictions_format: - runtimeValue: - constant: jsonl - problem_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: model-evaluation-3 - model-evaluation-import-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-model-evaluation-import-3 - dependentTasks: - - feature-attribution-3 - - model-evaluation-3 - inputs: - artifacts: - feature_attributions: - taskOutputArtifact: - outputArtifactKey: feature_attributions - producerTask: feature-attribution-3 - metrics: - taskOutputArtifact: - outputArtifactKey: evaluation_metrics - producerTask: model-evaluation-3 - model: - componentInputArtifact: pipelinechannel--model-upload-3-model - parameters: - dataset_paths: - componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json - dataset_type: - runtimeValue: - constant: tf-record - display_name: - runtimeValue: - constant: AutoML Tabular - problem_type: - componentInputParameter: pipelinechannel--prediction_type - taskInfo: - name: model-evaluation-import-3 - inputDefinitions: - artifacts: - pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - pipelinechannel--model-upload-3-model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: - parameterType: STRUCT - pipelinechannel--bool-identity-2-Output: - parameterType: STRING - pipelinechannel--bool-identity-3-Output: - parameterType: STRING - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--location: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--string-not-empty-Output: - parameterType: STRING - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - parameterType: LIST - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-exit-handler-1: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 - tasks: - automl-tabular-transform: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-transform - dependentTasks: - - tabular-stats-and-example-gen - inputs: - artifacts: - dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - test_split: - taskOutputArtifact: - outputArtifactKey: test_split - producerTask: tabular-stats-and-example-gen - train_split: - taskOutputArtifact: - outputArtifactKey: train_split - producerTask: tabular-stats-and-example-gen - parameters: - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - project: - componentInputParameter: pipelinechannel--project - root_dir: - componentInputParameter: pipelinechannel--root_dir - taskInfo: - name: automl-tabular-transform - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - automl-tabular-transform - - merge-materialized-splits - - string-not-empty - - tabular-stats-and-example-gen - inputs: - artifacts: - pipelinechannel--automl-tabular-transform-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - taskOutputParameter: - outputParameterKey: downsampled_test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - taskOutputParameter: - outputParameterKey: test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - automl-tabular-transform - - merge-materialized-splits - - string-not-empty - - tabular-stats-and-example-gen - inputs: - artifacts: - pipelinechannel--automl-tabular-transform-materialized_eval_split: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform - pipelinechannel--automl-tabular-transform-materialized_train_split: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform - pipelinechannel--automl-tabular-transform-transform_output: - taskOutputArtifact: - outputArtifactKey: transform_output - producerTask: automl-tabular-transform - pipelinechannel--merge-materialized-splits-splits: - taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--tabular-stats-and-example-gen-dataset_schema: - taskOutputArtifact: - outputArtifactKey: dataset_schema - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-eval_split: - taskOutputArtifact: - outputArtifactKey: eval_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-instance_baseline: - taskOutputArtifact: - outputArtifactKey: instance_baseline - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-metadata: - taskOutputArtifact: - outputArtifactKey: metadata - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split: - taskOutputArtifact: - outputArtifactKey: test_split - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-train_split: - taskOutputArtifact: - outputArtifactKey: train_split - producerTask: tabular-stats-and-example-gen - parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: pipelinechannel--distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count - pipelinechannel--encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--location: - componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: - componentInputParameter: pipelinechannel--project - pipelinechannel--root_dir: - componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override - pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: - taskOutputParameter: - outputParameterKey: downsampled_test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--tabular-stats-and-example-gen-test_split_json: - taskOutputParameter: - outputParameterKey: test_split_json - producerTask: tabular-stats-and-example-gen - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: pipelinechannel--transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers - taskInfo: - name: stage_1_tuning_result_artifact_uri_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - merge-materialized-splits: - cachingOptions: - enableCache: true - componentRef: - name: comp-merge-materialized-splits - dependentTasks: - - automl-tabular-transform - inputs: - artifacts: - split_0: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: automl-tabular-transform - split_1: - taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: automl-tabular-transform - taskInfo: - name: merge-materialized-splits - string-not-empty: - cachingOptions: - enableCache: true - componentRef: - name: comp-string-not-empty - inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - taskInfo: - name: string-not-empty - tabular-stats-and-example-gen: - cachingOptions: - enableCache: true - componentRef: - name: comp-tabular-stats-and-example-gen - inputs: - parameters: - additional_experiments_json: - componentInputParameter: pipelinechannel--additional_experiments - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames - dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb - dataflow_machine_type: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type - dataflow_max_num_workers: - componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers - dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - encryption_spec_key_name: - componentInputParameter: pipelinechannel--encryption_spec_key_name - location: - componentInputParameter: pipelinechannel--location - optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key - prediction_type: - componentInputParameter: pipelinechannel--prediction_type - project: - componentInputParameter: pipelinechannel--project - quantiles: - componentInputParameter: pipelinechannel--quantiles - root_dir: - componentInputParameter: pipelinechannel--root_dir - run_distillation: - componentInputParameter: pipelinechannel--run_distillation - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key - target_column_name: - componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - timestamp_split_key: - componentInputParameter: pipelinechannel--timestamp_split_key - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - transformations: - runtimeValue: - constant: '[]' - transformations_path: - componentInputParameter: pipelinechannel--transformations - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction - weight_column_name: - componentInputParameter: pipelinechannel--weight_column - taskInfo: - name: tabular-stats-and-example-gen - inputDefinitions: - parameters: - pipelinechannel--additional_experiments: - parameterType: STRUCT - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--dataflow_service_account: - parameterType: STRING - pipelinechannel--dataflow_subnetwork: - parameterType: STRING - pipelinechannel--dataflow_use_public_ips: - parameterType: BOOLEAN - pipelinechannel--disable_early_stopping: - parameterType: BOOLEAN - pipelinechannel--distill_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--distill_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--distill_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN - pipelinechannel--encryption_spec_key_name: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_explain_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_explain_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_machine_type: - parameterType: STRING - pipelinechannel--evaluation_batch_predict_max_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_batch_predict_starting_replica_count: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_machine_type: - parameterType: STRING - pipelinechannel--evaluation_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--evaluation_dataflow_starting_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--export_additional_model_without_custom_ops: - parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--location: - parameterType: STRING - pipelinechannel--model_description: - parameterType: STRING - pipelinechannel--optimization_objective: - parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--predefined_split_key: - parameterType: STRING - pipelinechannel--prediction_type: - parameterType: STRING - pipelinechannel--project: - parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST - pipelinechannel--root_dir: - parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING - pipelinechannel--stage_1_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--stats_and_example_gen_dataflow_machine_type: - parameterType: STRING - pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--stratified_split_key: - parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST - pipelinechannel--target_column: - parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--timestamp_split_key: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--transform_dataflow_disk_size_gb: - parameterType: NUMBER_INTEGER - pipelinechannel--transform_dataflow_machine_type: - parameterType: STRING - pipelinechannel--transform_dataflow_max_num_workers: - parameterType: NUMBER_INTEGER - pipelinechannel--transformations: - parameterType: STRING - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--weight_column: - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-3: - executorLabel: exec-feature-attribution-3 - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' - isOptional: true - parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true - parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING - outputDefinitions: - artifacts: - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-merge-materialized-splits: - executorLabel: exec-merge-materialized-splits - inputDefinitions: - artifacts: - split_0: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The first materialized split. - split_1: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The second materialized split. - outputDefinitions: - artifacts: - splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-explanation-3: - executorLabel: exec-model-batch-explanation-3 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - instances_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - job_display_name: - parameterType: STRING - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - project: - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-3: - executorLabel: exec-model-batch-predict-3 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-4: - executorLabel: exec-model-batch-predict-4 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-batch-predict-5: - executorLabel: exec-model-batch-predict-5 - inputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'The Model used to get predictions via this job. Must share - the same - - ancestor Location. Starting this job has no impact on any existing - - deployments of the Model and their resources. Either this or - - unmanaged_container_model must be specified.' - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - description: 'The unmanaged container model used to get predictions via - this job. - - This should be used for models that are not uploaded to Vertex. Either - - this or model must be specified.' - isOptional: true - parameters: - accelerator_count: - defaultValue: 0.0 - description: 'The number of accelerators to attach - - to the `machine_type`. Only used if `machine_type` is set. For more - - details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: NUMBER_INTEGER - accelerator_type: - defaultValue: '' - description: 'The type of accelerator(s) that may be - - attached to the machine as per `accelerator_count`. Only used if - - `machine_type` is set. For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - bigquery_destination_output_uri: - defaultValue: '' - description: 'The BigQuery project location where the output is to be written - to. In - - the given project a new dataset is created with name - - ``prediction__`` where is made - - BigQuery-dataset-name compatible (for example, most special characters - - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - - "based on ISO-8601" format. In the dataset two tables will be created, - - ``predictions``, and ``errors``. If the Model has both ``instance`` - - and ``prediction`` schemata defined then the tables have columns as - - follows: The ``predictions`` table contains instances for which the - - prediction succeeded, it has columns as per a concatenation of the - - Model''s instance and prediction schemata. The ``errors`` table - - contains rows for which the prediction has failed, it has instance - - columns, as per the instance schema, followed by a single "errors" - - column, which as values has ```google.rpc.Status`` `__ - - represented as a STRUCT, and containing only ``code`` and - - ``message``. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - bigquery_source_input_uri: - defaultValue: '' - description: 'BigQuery URI to a table, up to 2000 characters long. For example: - - `projectId.bqDatasetId.bqTableId` For more details about this input - - config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' - isOptional: true - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - description: 'Customer-managed encryption - - key options for a BatchPredictionJob. If this is set, then all - - resources created by the BatchPredictionJob will be encrypted with the - - provided encryption key. Has the form: - - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. - - The key needs to be in the same region as where the compute resource - - is created.' - isOptional: true - parameterType: STRING - excluded_fields: - defaultValue: [] - description: 'Fields that will be excluded in the prediction instance that - is - - sent to the Model. - - Excluded will be attached to the batch prediction output if - - [key_field][] is not specified. - - When excluded_fields is populated, [included_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord. - - may be specified via the Model''s `parameters_schema_uri`.' - isOptional: true - parameterType: LIST - explanation_metadata: - defaultValue: {} - description: 'Explanation metadata - - configuration for this BatchPredictionJob. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_metadata`. All fields of - - `explanation_metadata` are optional in the request. If a field of the - - `explanation_metadata` object is not populated, the corresponding - - field of the `Model.explanation_metadata` object is inherited. For - - more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - description: 'Parameters to configure - - explaining for Model''s predictions. Can be specified only if - - `generate_explanation` is set to `True`. This value overrides the - - value of `Model.explanation_parameters`. All fields of - - `explanation_parameters` are optional in the request. If a field of - - the `explanation_parameters` object is not populated, the - - corresponding field of the `Model.explanation_parameters` object is - - inherited. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' - isOptional: true - parameterType: STRUCT - gcs_destination_output_uri_prefix: - defaultValue: '' - description: 'The Google Cloud - - Storage location of the directory where the output is to be written - - to. In the given directory a new directory is created. Its name is - - ``prediction--``, where timestamp - - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - - ``predictions_0001.``, ``predictions_0002.``, - - ..., ``predictions_N.`` are created where ```` - - depends on chosen ``predictions_format``, and N may equal 0001 and - - depends on the total number of successfully predicted instances. If - - the Model has both ``instance`` and ``prediction`` schemata defined - - then each such file contains predictions as per the - - ``predictions_format``. If prediction for any instance failed - - (partially or completely), then an additional - - ``errors_0001.``, ``errors_0002.``,..., - - ``errors_N.`` files are created (N depends on total number - - of failed predictions). These files contain the failed instances, as - - per their schema, followed by an additional ``error`` field which as - - value has ``google.rpc.Status`` containing only ``code`` and - - ``message`` fields. For more details about this output config, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' - isOptional: true - parameterType: STRING - gcs_source_uris: - defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: LIST - generate_explanation: - defaultValue: false - description: 'Generate explanation along with - - the batch prediction results. This will cause the batch prediction - - output to include explanations based on the `prediction_format`: - - - `bigquery`: output includes a column named `explanation`. The value is - - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - - `jsonl`: The JSON objects on each line include an additional entry - - keyed `explanation`. The value of the entry is a JSON object that - - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - - Generating explanations for CSV format is not supported. If this - - field is set to true, either the Model.explanation_spec or - - explanation_metadata and explanation_parameters must be populated.' - isOptional: true - parameterType: BOOLEAN - included_fields: - defaultValue: [] - description: 'Fields that will be included in the prediction instance that - is - - sent to the Model. - - If [instance_type][] is `array`, the order of field names in - - included_fields also determines the order of the values in the array. - - When included_fields is populated, [excluded_fields][] must be empty. - - The input must be JSONL with objects at each line, CSV, BigQuery - - or TfRecord.' - isOptional: true - parameterType: LIST - instance_type: - defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ - \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" - isOptional: true - parameterType: STRING - instances_format: - defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." - isOptional: true - parameterType: STRING - job_display_name: - description: The user-defined name of this BatchPredictionJob. - parameterType: STRING - key_field: - defaultValue: '' - description: "The name of the field that is considered as a key.\nThe values\ - \ identified by the key field is not included in the\ntransformed instances\ - \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." - isOptional: true - parameterType: STRING - labels: - defaultValue: {} - description: 'The labels with user-defined metadata to - - organize your BatchPredictionJobs. Label keys and values can be no - - longer than 64 characters (Unicode codepoints), can only contain - - lowercase letters, numeric characters, underscores and dashes. - - International characters are allowed. See https://goo.gl/xmQnxf for - - more information and examples of labels.' - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' - isOptional: true - parameterType: STRING - machine_type: - defaultValue: '' - description: 'The type of machine for running batch - - prediction on dedicated resources. If the Model supports - - DEDICATED_RESOURCES this config may be provided (and the job will use - - these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, - - this config must be provided. For more details about the - - BatchDedicatedResources, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - - For more details about the machine spec, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' - isOptional: true - parameterType: STRING - manual_batch_tuning_parameters_batch_size: - defaultValue: 0.0 - description: 'The number of - - the records (e.g. instances) of the operation given in each batch to a - - machine replica. Machine type, and size of a single record should be - - considered when setting this parameter, higher value speeds up the - - batch operation''s execution, but too high value will result in a whole - - batch not fitting in a machine''s memory, and the whole operation will - - fail. The default value is 4.' - isOptional: true - parameterType: NUMBER_INTEGER - max_replica_count: - defaultValue: 0.0 - description: 'The maximum number of machine replicas the batch operation - may be scaled - - to. Only used if `machine_type` is set. Default is 10.' - isOptional: true - parameterType: NUMBER_INTEGER - model_parameters: - defaultValue: {} - description: The parameters that govern the predictions. The schema of the - parameters - isOptional: true - parameterType: STRUCT - predictions_format: - defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." - isOptional: true - parameterType: STRING - project: - description: Project to create the BatchPredictionJob. - parameterType: STRING - starting_replica_count: - defaultValue: 0.0 - description: 'The number of machine replicas - - used at the start of the batch operation. If not set, Vertex AI - - decides starting number, not greater than `max_replica_count`. Only - - used if `machine_type` is set.' - isOptional: true - parameterType: NUMBER_INTEGER - outputDefinitions: - artifacts: - batchpredictionjob: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - - instead.**] Artifact - - representation of the created batch prediction job.' - bigquery_output_table: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - bigquery_output_table is specified.' - gcs_output_directory: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'Artifact tracking the batch prediction job output. This is - only - - available if - - gcs_destination_output_uri_prefix is specified.' - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the batch prediction - job. - - For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-model-evaluation: - executorLabel: exec-model-evaluation - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-2: - executorLabel: exec-model-evaluation-2 - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-3: - executorLabel: exec-model-evaluation-3 - inputDefinitions: - artifacts: - batch_prediction_job: - artifactType: - schemaTitle: google.VertexBatchPredictionJob - schemaVersion: 0.0.1 - parameters: - dataflow_disk_size: - defaultValue: 50.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 100.0 - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 10.0 - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - example_weight_column: - defaultValue: '' - isOptional: true - parameterType: STRING - ground_truth_column: - parameterType: STRING - ground_truth_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - prediction_id_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_label_column: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_score_column: - defaultValue: '' - isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - isOptional: true - parameterType: STRING - problem_type: - parameterType: STRING - project: - parameterType: STRING - root_dir: - parameterType: STRING - outputDefinitions: - artifacts: - evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import: - executorLabel: exec-model-evaluation-import - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-2: - executorLabel: exec-model-evaluation-import-2 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-evaluation-import-3: - executorLabel: exec-model-evaluation-import-3 - inputDefinitions: - artifacts: - classification_metrics: - artifactType: - schemaTitle: google.ClassificationMetrics - schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the - - classification evaluation component.' - isOptional: true - explanation: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'Path for model explanation metrics generated from an evaluation - - component.' - isOptional: true - feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: 'The feature attributions metrics artifact generated - - from the feature attribution component.' - isOptional: true - forecasting_metrics: - artifactType: - schemaTitle: google.ForecastingMetrics - schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the - - forecasting evaluation component.' - isOptional: true - metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - description: Path of metrics generated from an evaluation component. - isOptional: true - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - description: 'Vertex model resource that will be the parent resource of - the - - uploaded evaluation.' - question_answering_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - regression_metrics: - artifactType: - schemaTitle: google.RegressionMetrics - schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression - - evaluation component.' - isOptional: true - summarization_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - text_generation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - isOptional: true - parameters: - dataset_path: - defaultValue: '' - isOptional: true - parameterType: STRING - dataset_paths: - defaultValue: [] - isOptional: true - parameterType: LIST - dataset_type: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - defaultValue: '' - description: The display name for the uploaded model evaluation resource. - isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' - isOptional: true - parameterType: STRING - outputDefinitions: - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-model-upload-3: - executorLabel: exec-model-upload-3 - inputDefinitions: - artifacts: - explanation_metadata_artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - isOptional: true - unmanaged_container_model: - artifactType: - schemaTitle: google.UnmanagedContainerModel - schemaVersion: 0.0.1 - isOptional: true - parameters: - description: - defaultValue: '' - isOptional: true - parameterType: STRING - display_name: - parameterType: STRING - encryption_spec_key_name: - defaultValue: '' - isOptional: true - parameterType: STRING - explanation_metadata: - defaultValue: {} - isOptional: true - parameterType: STRUCT - explanation_parameters: - defaultValue: {} - isOptional: true - parameterType: STRUCT - labels: - defaultValue: {} - isOptional: true - parameterType: STRUCT - location: - defaultValue: us-central1 - isOptional: true - parameterType: STRING - project: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: google.VertexModel - schemaVersion: 0.0.1 - parameters: - gcp_resources: - parameterType: STRING - comp-read-input-uri: - executorLabel: exec-read-input-uri - inputDefinitions: - artifacts: - split_uri: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: Tbe path to the file that contains Dataset data. - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-read-input-uri-2: - executorLabel: exec-read-input-uri-2 - inputDefinitions: - artifacts: - split_uri: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: Tbe path to the file that contains Dataset data. - outputDefinitions: - parameters: - Output: - parameterType: LIST - comp-set-optional-inputs: - executorLabel: exec-set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset when data source is Vertex dataset. - parameters: - data_source_bigquery_table_path: - description: The BigQuery table when data source is BQ. - parameterType: STRING - data_source_csv_filenames: - description: The CSV GCS path when data source is CSV. - parameterType: STRING - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_display_name: - description: The uploaded model's display name. - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - outputDefinitions: - parameters: - data_source_bigquery_table_path: - parameterType: STRING - data_source_csv_filenames: - parameterType: STRING - model_display_name: - parameterType: STRING - comp-string-not-empty: - executorLabel: exec-string-not-empty - inputDefinitions: - parameters: - value: - description: String value to be checked. - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-tabular-stats-and-example-gen: - executorLabel: exec-tabular-stats-and-example-gen - inputDefinitions: - parameters: - additional_experiments: - defaultValue: '' - isOptional: true - parameterType: STRING - additional_experiments_json: - defaultValue: {} - isOptional: true - parameterType: STRUCT - data_source_bigquery_table_path: - defaultValue: '' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - isOptional: true - parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' - isOptional: true - parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Custom service account to run - - dataflow jobs.' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More - - details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - enable_probabilistic_inference: - defaultValue: false - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: Customer-managed encryption key. - isOptional: true - parameterType: STRING - location: - description: 'Location for running dataset statistics and example - - generation.' - parameterType: STRING - optimization_objective: - defaultValue: '' - description: "Objective function the model is optimizing\ntowards. The training\ - \ process creates a model that maximizes/minimizes\nthe value of the objective\ - \ function over the validation set. The\nsupported optimization objectives\ - \ depend on the prediction type. If the\nfield is not set, a default objective\ - \ function is used.\n classification: \"maximize-au-roc\" (default) -\ - \ Maximize the\n area under the receiver operating characteristic (ROC)\ - \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ - \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ - \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ - \ - Maximize recall for a\n specified precision value.\n classification\ - \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ - \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ - \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ - \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." - isOptional: true - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-recall-at-precision". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when - - optimization_objective is "maximize-precision-at-recall". Must be - - between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - predefined_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - prediction_type: - description: 'The prediction type. Supported values: - - "classification", "regression".' - parameterType: STRING - project: - description: 'Project to run dataset statistics and example - - generation.' - parameterType: STRING - quantiles: - defaultValue: [] - isOptional: true - parameterType: LIST - request_type: - defaultValue: COLUMN_STATS_ONLY - isOptional: true - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'True if in distillation mode. The default value - - is false.' - isOptional: true - parameterType: BOOLEAN - stratified_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - target_column_name: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - timestamp_split_key: - defaultValue: '' - isOptional: true - parameterType: STRING - training_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - transformations: - description: 'Quote escaped JSON string for transformations. Each - - transformation will apply transform function to given input column. And - - the result will be used for training. When creating transformation for - - BigQuery Struct column, the column should be flattened using "." as the - - delimiter.' - parameterType: STRING - transformations_path: - defaultValue: '' - description: 'Path to a GCS file containing JSON - - string for transformations.' - isOptional: true - parameterType: STRING - validation_fraction: - defaultValue: -1.0 - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column_name: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - dataset_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The schema of the dataset. - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - eval_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The eval split. - instance_baseline: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The instance baseline used to calculate explanations. - metadata: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The tabular example gen metadata. - test_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The test split. - train_split: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The train split. - parameters: - downsampled_test_split_json: - description: The downsampled test split JSON object. - parameterType: LIST - gcp_resources: - description: 'GCP resources created by this component. For more details, - see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - test_split_json: - description: The test split JSON object. - parameterType: LIST - comp-write-bp-result-path: - executorLabel: exec-write-bp-result-path - inputDefinitions: - artifacts: - bp_job: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The batch prediction job artifact. - outputDefinitions: - artifacts: - result: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - comp-write-bp-result-path-2: - executorLabel: exec-write-bp-result-path-2 - inputDefinitions: - artifacts: - bp_job: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The batch prediction job artifact. - outputDefinitions: - artifacts: - result: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 -deploymentSpec: - executors: - exec-automl-tabular-cv-trainer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-cv-trainer-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", - "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", - "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", - "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", - "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", - \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-ensemble-3: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", - \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", - "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", - "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", - "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", - "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-finalizer: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-infra-validator: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-infra-validator-2: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-infra-validator-3: - container: - args: - - --executor_input - - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 - resources: - cpuLimit: 8.0 - memoryLimit: 52.0 - exec-automl-tabular-stage-1-tuner: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", - "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", - "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", - "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", - "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", - "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", - \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-stage-1-tuner-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", - "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", - "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", - "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", - "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", - "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", - \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", - "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", - "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", - "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", - "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", - "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", - "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", - "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", - "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", - "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", - "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", - "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", - \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-transform: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", - "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", - \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", - "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", - "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", - "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", - \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", - "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-automl-tabular-transform-2: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", - "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", - \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", - \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", - "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", - "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", - "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", - \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", - "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", - "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", - "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", - "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-bool-identity: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-bool-identity-3: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _bool_identity - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ - \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ - \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-calculate-training-parameters: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-calculate-training-parameters-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _calculate_training_parameters - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ - \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ - \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ - \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ - \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ - \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ - \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ - \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ - \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ - \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ - \ The train budget of creating this model,\n expressed in milli node\ - \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ - \ Number of parallel trails for stage 2.\n run_distillation: Whether\ - \ to run distill in the training pipeline.\n is_skip_architecture_search:\ - \ If component is being called in the\n skip_architecture_search pipeline.\n\ - \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ - \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ - \ stage_1_num_selected_trials: Number of selected trails for stage\ - \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ - \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ - \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ - \ Maximum number seconds to for a single stage\n 2\n training\ - \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ - \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ - \ The reduce search space mode. Possible values:\n minimal, regular,\ - \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ - \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ - \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ - \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ - \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ - \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ - \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ - \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ - \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ - \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ - \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ - \ # All of magic number \"1.3\" above is because the trial doesn't\n\ - \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ - \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ - \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ - \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ - \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ - \ case. Phase 2\n # can't finish in time after the deadline is cut,\ - \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ - \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ - \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ - \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ - \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ - \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ - \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ - \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ - \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ - \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ - \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ - \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ - \ of magic number \"1.3\" above is because the trial doesn't always\n \ - \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ - \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ - \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ - \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ - \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ - \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ - \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ - \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ - \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ - \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ - \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ - \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ - \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ - \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ - \ reduce_search_space_mode,\n )\n\n" - image: python:3.7 - exec-feature-attribution: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-2: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-feature-attribution-3: - container: - args: - - --task - - explanation - - --setup_file - - /setup.py - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --root_dir - - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", - "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' - - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", - {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", - ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' - - --dataflow_job_prefix - - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' - - --gcs_output_path - - '{{$.outputs.artifacts[''feature_attributions''].path}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 - exec-importer: - importer: - artifactUri: - runtimeParameter: uri - typeSchema: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - exec-merge-materialized-splits: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _merge_materialized_splits - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ - \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ - ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ - \ first materialized split.\n split_1: The second materialized split.\n\ - \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ - \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ - \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ - \ f.write(','.join([split_0_content, split_1_content]))\n\n" - image: python:3.7 - exec-model-batch-explanation: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-3: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-3: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-4: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-5: - container: - args: - - --type - - BatchPredictionJob - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": - \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, - " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", - "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", - "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", - "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" - ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", - \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, - {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": - ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": - ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": - {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", - "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", - "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", - "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": - \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": - \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": - ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": - ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": - ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": - {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", - "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", - ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": - {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-evaluation: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-2: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-3: - container: - args: - - --setup_file - - /setup.py - - --json_mode - - 'true' - - --project_id - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --problem_type - - '{{$.inputs.parameters[''problem_type'']}}' - - --batch_prediction_format - - '{{$.inputs.parameters[''predictions_format'']}}' - - --batch_prediction_gcs_source - - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' - - --ground_truth_format - - '{{$.inputs.parameters[''ground_truth_format'']}}' - - --key_prefix_in_prediction_dataset - - instance - - --root_dir - - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - --classification_type - - multiclass - - --ground_truth_column - - instance.{{$.inputs.parameters['ground_truth_column']}} - - --prediction_score_column - - '{{$.inputs.parameters[''prediction_score_column'']}}' - - --prediction_label_column - - '{{$.inputs.parameters[''prediction_label_column'']}}' - - --prediction_id_column - - '' - - --example_weight_column - - '' - - --generate_feature_attribution - - 'false' - - --dataflow_job_prefix - - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} - - --dataflow_service_account - - '{{$.inputs.parameters[''dataflow_service_account'']}}' - - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' - - --dataflow_machine_type - - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - - --dataflow_workers_num - - '{{$.inputs.parameters[''dataflow_workers_num'']}}' - - --dataflow_max_workers_num - - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' - - --dataflow_subnetwork - - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' - - --dataflow_use_public_ips - - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - - --kms_key_name - - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --output_metrics_gcs_path - - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python - - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.4 - exec-model-evaluation-import: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-2: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-evaluation-import-3: - container: - args: - - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", - "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' - - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", - "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", - "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", - "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", - "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", - "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", - "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", - "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' - - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", - "{{$.inputs.parameters[''problem_type'']}}"]}}' - - --display_name - - '{{$.inputs.parameters[''display_name'']}}' - - --dataset_path - - '{{$.inputs.parameters[''dataset_path'']}}' - - --dataset_paths - - '{{$.inputs.parameters[''dataset_paths'']}}' - - --dataset_type - - '{{$.inputs.parameters[''dataset_type'']}}' - - --pipeline_job_id - - '{{$.pipeline_job_uuid}}' - - --pipeline_job_resource_name - - '{{$.pipeline_job_resource_name}}' - - --model_name - - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 - exec-model-upload: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-2: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-upload-3: - container: - args: - - --type - - UploadModel - - --payload - - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", - "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", - "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", - ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --executor_input - - '{{$}}' - command: - - python3 - - -u - - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-read-input-uri: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _read_input_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ - ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ - \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ - \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ - \ list of string that represents the batch prediction input files.\n \"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ - \ return data_source['tf_record_data_source']['file_patterns']\n\n" - image: python:3.7 - exec-read-input-uri-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _read_input_uri - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ - ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ - \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ - \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ - \ list of string that represents the batch prediction input files.\n \"\ - \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ - \ return data_source['tf_record_data_source']['file_patterns']\n\n" - image: python:3.7 - exec-set-optional-inputs: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _set_optional_inputs - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ - \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ - \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ - \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \ import collections\n from google.cloud import aiplatform\n from google.cloud\ - \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ - \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ - \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ - \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ - \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ - \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim - exec-string-not-empty: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _string_not_empty - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ - \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ - \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ - \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ - \ \"\"\"\n return 'true' if value else 'false'\n\n" - image: python:3.7 - exec-tabular-stats-and-example-gen: - container: - args: - - --type - - CustomJob - - --project - - '{{$.inputs.parameters[''project'']}}' - - --location - - '{{$.inputs.parameters[''location'']}}' - - --gcp_resources - - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - - --payload - - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", - \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": - \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": - \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": - \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": - \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": - ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": - ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": - ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", - \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", - "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", - "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", - "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", - "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", - "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", - "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", - "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", - "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", - "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", - "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", - \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", - "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", - "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", - "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", - \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", - \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", - "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", - "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", - \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", - \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", - \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", - \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", - "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", - "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", - "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", - "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", - "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", - "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", - "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", - \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", - \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", - "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", - "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", - "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", - "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", - "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", - \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", - "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", - \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' - command: - - python3 - - -u - - -m - - google_cloud_pipeline_components.container.v1.custom_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 - exec-write-bp-result-path: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _write_bp_result_path - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ - \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ - \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ - \ job artifact.\n result: Tbe path to the file that contains Dataset\ - \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ - \ 'tf_record_data_source': {\n 'file_patterns': [\n \ - \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ - \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ - \n" - image: python:3.7 - exec-write-bp-result-path-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - _write_bp_result_path - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ - \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ - \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ - \ job artifact.\n result: Tbe path to the file that contains Dataset\ - \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ - \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ - \ 'tf_record_data_source': {\n 'file_patterns': [\n \ - \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ - \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ - \n" - image: python:3.7 -pipelineInfo: - description: The AutoML Tabular pipeline v1. - name: automl-tabular -root: - dag: - outputs: - artifacts: - feature-attribution-2-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-3-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-3-feature_attributions - producerSubtask: exit-handler-1 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: exit-handler-1 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: exit-handler-1 - model-evaluation-3-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-3-evaluation_metrics - producerSubtask: exit-handler-1 - model-evaluation-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: exit-handler-1 - tasks: - automl-tabular-finalizer: - cachingOptions: - enableCache: true - componentRef: - name: comp-automl-tabular-finalizer - dependentTasks: - - exit-handler-1 - inputs: - parameters: - location: - componentInputParameter: location - project: - componentInputParameter: project - root_dir: - componentInputParameter: root_dir - taskInfo: - name: automl-tabular-finalizer - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - dependentTasks: - - set-optional-inputs - inputs: - parameters: - pipelinechannel--additional_experiments: - componentInputParameter: additional_experiments - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: disable_early_stopping - pipelinechannel--distill_batch_predict_machine_type: - componentInputParameter: distill_batch_predict_machine_type - pipelinechannel--distill_batch_predict_max_replica_count: - componentInputParameter: distill_batch_predict_max_replica_count - pipelinechannel--distill_batch_predict_starting_replica_count: - componentInputParameter: distill_batch_predict_starting_replica_count - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: - componentInputParameter: encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: fast_testing - pipelinechannel--location: - componentInputParameter: location - pipelinechannel--model_description: - componentInputParameter: model_description - pipelinechannel--optimization_objective: - componentInputParameter: optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: optimization_objective_recall_value - pipelinechannel--predefined_split_key: - componentInputParameter: predefined_split_key - pipelinechannel--prediction_type: - componentInputParameter: prediction_type - pipelinechannel--project: - componentInputParameter: project - pipelinechannel--quantiles: - componentInputParameter: quantiles - pipelinechannel--root_dir: - componentInputParameter: root_dir - pipelinechannel--run_distillation: - componentInputParameter: run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: run_evaluation - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - taskOutputParameter: - outputParameterKey: data_source_bigquery_table_path - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - taskOutputParameter: - outputParameterKey: data_source_csv_filenames - producerTask: set-optional-inputs - pipelinechannel--set-optional-inputs-model_display_name: - taskOutputParameter: - outputParameterKey: model_display_name - producerTask: set-optional-inputs - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: stage_2_num_selected_trials - pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: - componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb - pipelinechannel--stats_and_example_gen_dataflow_machine_type: - componentInputParameter: stats_and_example_gen_dataflow_machine_type - pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: - componentInputParameter: stats_and_example_gen_dataflow_max_num_workers - pipelinechannel--stratified_split_key: - componentInputParameter: stratified_split_key - pipelinechannel--study_spec_parameters_override: - componentInputParameter: study_spec_parameters_override - pipelinechannel--target_column: - componentInputParameter: target_column - pipelinechannel--test_fraction: - componentInputParameter: test_fraction - pipelinechannel--timestamp_split_key: - componentInputParameter: timestamp_split_key - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: train_budget_milli_node_hours - pipelinechannel--training_fraction: - componentInputParameter: training_fraction - pipelinechannel--transform_dataflow_disk_size_gb: - componentInputParameter: transform_dataflow_disk_size_gb - pipelinechannel--transform_dataflow_machine_type: - componentInputParameter: transform_dataflow_machine_type - pipelinechannel--transform_dataflow_max_num_workers: - componentInputParameter: transform_dataflow_max_num_workers - pipelinechannel--transformations: - componentInputParameter: transformations - pipelinechannel--validation_fraction: - componentInputParameter: validation_fraction - pipelinechannel--weight_column: - componentInputParameter: weight_column - taskInfo: - name: exit-handler-1 - set-optional-inputs: - cachingOptions: - enableCache: true - componentRef: - name: comp-set-optional-inputs - inputs: - artifacts: - vertex_dataset: - componentInputArtifact: vertex_dataset - parameters: - data_source_bigquery_table_path: - componentInputParameter: data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: data_source_csv_filenames - location: - componentInputParameter: location - model_display_name: - componentInputParameter: model_display_name - project: - componentInputParameter: project - taskInfo: - name: set-optional-inputs - inputDefinitions: - artifacts: - vertex_dataset: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The Vertex dataset artifact, - parameters: - additional_experiments: - description: Use this field to config private preview features. - isOptional: true - parameterType: STRUCT - cv_trainer_worker_pool_specs_override: - description: 'The dictionary for overriding stage - - cv trainer worker pool spec.' - isOptional: true - parameterType: LIST - data_source_bigquery_table_path: - defaultValue: '' - description: 'The BigQuery table path of format - - bq://bq_project.bq_dataset.bq_table' - isOptional: true - parameterType: STRING - data_source_csv_filenames: - defaultValue: '' - description: 'A string that represents a list of comma - - separated CSV filenames.' - isOptional: true - parameterType: STRING - dataflow_service_account: - defaultValue: '' - description: Custom service account to run dataflow jobs. - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork name, when empty - - the default subnetwork will be used. Example: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow workers use public IP - - addresses.' - isOptional: true - parameterType: BOOLEAN - disable_early_stopping: - defaultValue: false - description: If disable easly stopping. - isOptional: true - parameterType: BOOLEAN - distill_batch_predict_machine_type: - defaultValue: n1-standard-16 - description: 'The prediction server machine type for - - batch predict component in the model distillation.' - isOptional: true - parameterType: STRING - distill_batch_predict_max_replica_count: - defaultValue: 25.0 - description: 'The max number of prediction server - - for batch predict component in the model distillation.' - isOptional: true - parameterType: NUMBER_INTEGER - distill_batch_predict_starting_replica_count: - defaultValue: 25.0 - description: 'The initial number of - - prediction server for batch predict component in the model distillation.' - isOptional: true - parameterType: NUMBER_INTEGER - enable_probabilistic_inference: - defaultValue: false - description: 'If probabilistic inference is enabled, the - - model will fit a distribution that captures the uncertainty of a - - prediction. At inference time, the predictive distribution is used to make - - a point prediction that minimizes the optimization objective. For example, - - the mean of a predictive distribution is the point prediction that - - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - - distribution are also returned.' - isOptional: true - parameterType: BOOLEAN - encryption_spec_key_name: - defaultValue: '' - description: The KMS key name. - isOptional: true - parameterType: STRING - evaluation_batch_explain_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch explain components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_explain_max_replica_count: - defaultValue: 10.0 - description: 'The max number of prediction - - server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_explain_starting_replica_count: - defaultValue: 10.0 - description: 'The initial number of - - prediction server for batch explain components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_machine_type: - defaultValue: n1-highmem-8 - description: 'The prediction server machine type - - for batch predict components during evaluation.' - isOptional: true - parameterType: STRING - evaluation_batch_predict_max_replica_count: - defaultValue: 20.0 - description: 'The max number of prediction - - server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_batch_predict_starting_replica_count: - defaultValue: 20.0 - description: 'The initial number of - - prediction server for batch predict components during evaluation.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_disk_size_gb: - defaultValue: 50.0 - description: 'Dataflow worker''s disk size in GB for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The dataflow machine type for evaluation - - components.' - isOptional: true - parameterType: STRING - evaluation_dataflow_max_num_workers: - defaultValue: 100.0 - description: 'The max number of Dataflow workers for - - evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - evaluation_dataflow_starting_num_workers: - defaultValue: 10.0 - description: 'The initial number of Dataflow - - workers for evaluation components.' - isOptional: true - parameterType: NUMBER_INTEGER - export_additional_model_without_custom_ops: - defaultValue: false - description: 'Whether to export additional - - model without custom TensorFlow operators.' - isOptional: true - parameterType: BOOLEAN - fast_testing: - defaultValue: false - description: Internal flag used for presubmit tests. - isOptional: true - parameterType: BOOLEAN - location: - description: The GCP region that runs the pipeline components. - parameterType: STRING - model_description: - defaultValue: '' - description: The description name of the uploaded Vertex model, - isOptional: true - parameterType: STRING - model_display_name: - defaultValue: '' - description: The display name of the uploaded Vertex model, - isOptional: true - parameterType: STRING - optimization_objective: - description: 'For binary classification, "maximize-au-roc", - - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", - or - - "maximize-recall-at-precision". For multi class classification, - - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - - "minimize-rmsle".' - parameterType: STRING - optimization_objective_precision_value: - defaultValue: -1.0 - description: 'Required when optimization_objective - - is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - optimization_objective_recall_value: - defaultValue: -1.0 - description: 'Required when optimization_objective is - - ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' - isOptional: true - parameterType: NUMBER_DOUBLE - predefined_split_key: - defaultValue: '' - description: The predefined_split column name. - isOptional: true - parameterType: STRING - prediction_type: - description: 'The type of prediction the model is to produce. - - "classification" or "regression".' - parameterType: STRING - project: - description: The GCP project that runs the pipeline components. - parameterType: STRING - quantiles: - description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles - - are allowed of values between 0 and 1, exclusive. Represents the quantiles - - to use for that objective. Quantiles must be unique.' - isOptional: true - parameterType: LIST - root_dir: - description: The root GCS directory for the pipeline components. - parameterType: STRING - run_distillation: - defaultValue: false - description: 'Whether the distillation should be applied to the - - training.' - isOptional: true - parameterType: BOOLEAN - run_evaluation: - defaultValue: false - description: Whether to run evaluation steps during training. - isOptional: true - parameterType: BOOLEAN - stage_1_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 1. - isOptional: true - parameterType: NUMBER_INTEGER - stage_1_tuner_worker_pool_specs_override: - description: 'The dictionary for overriding - - stage 1 tuner worker pool spec.' - isOptional: true - parameterType: LIST - stage_1_tuning_result_artifact_uri: - defaultValue: '' - description: 'The stage 1 tuning result artifact GCS - - URI.' - isOptional: true - parameterType: STRING - stage_2_num_parallel_trials: - defaultValue: 35.0 - description: Number of parallel trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stage_2_num_selected_trials: - defaultValue: 5.0 - description: Number of selected trails for stage 2. - isOptional: true - parameterType: NUMBER_INTEGER - stats_and_example_gen_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in - - GB for stats_and_example_gen component.' - isOptional: true - parameterType: NUMBER_INTEGER - stats_and_example_gen_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for - - stats_and_example_gen component.' - isOptional: true - parameterType: STRING - stats_and_example_gen_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow - - workers for stats_and_example_gen component.' - isOptional: true - parameterType: NUMBER_INTEGER - stratified_split_key: - defaultValue: '' - description: The stratified_split column name. - isOptional: true - parameterType: STRING - study_spec_parameters_override: - description: The list for overriding study spec. - isOptional: true - parameterType: LIST - target_column: - description: The target column name. - parameterType: STRING - test_fraction: - defaultValue: -1.0 - description: float = The test fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - timestamp_split_key: - defaultValue: '' - description: The timestamp_split column name. - isOptional: true - parameterType: STRING - train_budget_milli_node_hours: - description: 'The train budget of creating this model, - - expressed in milli node hours i.e. 1,000 value in this field means 1 node - - hour.' - parameterType: NUMBER_DOUBLE - training_fraction: - defaultValue: -1.0 - description: The training fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - transform_dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'Dataflow worker''s disk size in GB for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transform_dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The dataflow machine type for transform - - component.' - isOptional: true - parameterType: STRING - transform_dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The max number of Dataflow workers for - - transform component.' - isOptional: true - parameterType: NUMBER_INTEGER - transformations: - description: 'The path to a GCS file containing the transformations to - - apply.' - parameterType: STRING - validation_fraction: - defaultValue: -1.0 - description: The validation fraction. - isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: - defaultValue: '' - description: The weight column name. - isOptional: true - parameterType: STRING - outputDefinitions: - artifacts: - feature-attribution-2-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-3-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-3-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py deleted file mode 100644 index 716d6f1ba4..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Tabular Cross Validation Trainer component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def automl_tabular_cv_trainer( - project: str, - location: str, - root_dir: str, - deadline_hours: float, - num_parallel_trials: int, - single_run_max_secs: int, - num_selected_trials: int, - transform_output: Input[Artifact], - metadata: Input[Artifact], - materialized_cv_splits: Input[Artifact], - tuning_result_input: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - tuning_result_output: Output[Artifact], - execution_metrics: dsl.OutputPath(dict), - worker_pool_specs_override_json: Optional[list] = [], - num_selected_features: Optional[int] = 0, - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Tunes AutoML Tabular models and selects top trials using cross-validation. - - Args: - project: Project to run Cross-validation trainer. - location: Location for running the Cross-validation trainer. - root_dir: The Cloud Storage location to store the output. - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - deadline_hours: Number of hours the cross-validation trainer should run. - num_parallel_trials: Number of parallel training trials. - single_run_max_secs: Max number of seconds each training trial runs. - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - num_selected_features: Number of selected features. The number of - features to learn in the NN models. - transform_output: The transform output artifact. - metadata: The tabular example gen metadata. - materialized_cv_splits: The materialized cross-validation splits. - tuning_result_input: AutoML Tabular tuning result. - encryption_spec_key_name: Customer-managed encryption key. - - Returns: - tuning_result_output: The trained model and architectures. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - execution_metrics: Core metrics in dictionary of component execution. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-cv-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["l2l_cv_tuner", "--transform_output_path=', - transform_output.uri, - '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - ( - f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "--training_base_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--num_parallel_trial=' - ), - num_parallel_trials, - '", "--single_run_max_secs=', - single_run_max_secs, - '", "--deadline_hours=', - deadline_hours, - ( - '", "--valid_trials_completed_threshold=0.7",' - ' "--num_selected_trials=' - ), - num_selected_trials, - '", "--num_selected_features=', - num_selected_features, - '", "--lro_job_info=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' - ' "--error_file_path=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--metadata_path=' - ), - metadata.uri, - '", "--materialized_cv_splits=', - materialized_cv_splits.uri, - '", "--tuning_result_input_path=', - tuning_result_input.uri, - '", "--tuning_result_output_path=', - tuning_result_output.uri, - '", "--kms_key_name=', - encryption_spec_key_name, - '", "--gcp_resources_path=', - gcp_resources, - '", "--execution_metrics_path=', - execution_metrics, - ( - '", "--use_custom_job=true", "--use_json=true",' - ' "--log_level=ERROR",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json deleted file mode 100644 index 5133d9cf2e..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json +++ /dev/null @@ -1,7974 +0,0 @@ -{ - "pipelineSpec": { - "components": { - "comp-automl-tabular-cv-trainer": { - "executorLabel": "exec-automl-tabular-cv-trainer", - "inputDefinitions": { - "artifacts": { - "materialized_cv_splits": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "tuning_result_input": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "deadline_hours": { - "type": "DOUBLE" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "num_parallel_trials": { - "type": "INT" - }, - "num_selected_trials": { - "type": "INT" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - }, - "single_run_max_secs": { - "type": "INT" - }, - "worker_pool_specs_override": { - "type": "STRING" - }, - "worker_pool_specs_override_json": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "tuning_result_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-ensemble": { - "executorLabel": "exec-automl-tabular-ensemble", - "inputDefinitions": { - "artifacts": { - "dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "instance_baseline": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "tuning_result_input": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "warmup_data": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "encryption_spec_key_name": { - "type": "STRING" - }, - "export_additional_model_without_custom_ops": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model_architecture": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model_without_custom_ops": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-ensemble-2": { - "executorLabel": "exec-automl-tabular-ensemble-2", - "inputDefinitions": { - "artifacts": { - "dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "instance_baseline": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "tuning_result_input": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "warmup_data": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "encryption_spec_key_name": { - "type": "STRING" - }, - "export_additional_model_without_custom_ops": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model_architecture": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "model_without_custom_ops": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-finalizer": { - "executorLabel": "exec-automl-tabular-finalizer", - "inputDefinitions": { - "parameters": { - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-infra-validator": { - "executorLabel": "exec-automl-tabular-infra-validator", - "inputDefinitions": { - "artifacts": { - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-automl-tabular-infra-validator-2": { - "executorLabel": "exec-automl-tabular-infra-validator-2", - "inputDefinitions": { - "artifacts": { - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-automl-tabular-stage-1-tuner": { - "executorLabel": "exec-automl-tabular-stage-1-tuner", - "inputDefinitions": { - "artifacts": { - "materialized_eval_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_train_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "deadline_hours": { - "type": "DOUBLE" - }, - "disable_early_stopping": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "num_parallel_trials": { - "type": "INT" - }, - "num_selected_trials": { - "type": "INT" - }, - "project": { - "type": "STRING" - }, - "reduce_search_space_mode": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - }, - "run_distillation": { - "type": "STRING" - }, - "single_run_max_secs": { - "type": "INT" - }, - "study_spec_override": { - "type": "STRING" - }, - "study_spec_parameters_override": { - "type": "STRING" - }, - "study_spec_parameters_override_json": { - "type": "STRING" - }, - "tune_feature_selection_rate": { - "type": "STRING" - }, - "worker_pool_specs_override": { - "type": "STRING" - }, - "worker_pool_specs_override_json": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "tuning_result_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-stage-1-tuner-2": { - "executorLabel": "exec-automl-tabular-stage-1-tuner-2", - "inputDefinitions": { - "artifacts": { - "materialized_eval_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_train_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "deadline_hours": { - "type": "DOUBLE" - }, - "disable_early_stopping": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "num_parallel_trials": { - "type": "INT" - }, - "num_selected_trials": { - "type": "INT" - }, - "project": { - "type": "STRING" - }, - "reduce_search_space_mode": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - }, - "run_distillation": { - "type": "STRING" - }, - "single_run_max_secs": { - "type": "INT" - }, - "study_spec_override": { - "type": "STRING" - }, - "study_spec_parameters_override": { - "type": "STRING" - }, - "study_spec_parameters_override_json": { - "type": "STRING" - }, - "tune_feature_selection_rate": { - "type": "STRING" - }, - "worker_pool_specs_override": { - "type": "STRING" - }, - "worker_pool_specs_override_json": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "tuning_result_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-transform": { - "executorLabel": "exec-automl-tabular-transform", - "inputDefinitions": { - "artifacts": { - "dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "eval_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "test_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "train_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "dataflow_disk_size_gb": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_num_workers": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "materialized_eval_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_test_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_train_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "training_schema_uri": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-automl-tabular-transform-2": { - "executorLabel": "exec-automl-tabular-transform-2", - "inputDefinitions": { - "artifacts": { - "dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "eval_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "test_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "train_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "dataflow_disk_size_gb": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_num_workers": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "materialized_eval_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_test_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "materialized_train_split": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "training_schema_uri": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "transform_output": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-bool-identity": { - "executorLabel": "exec-bool-identity", - "inputDefinitions": { - "parameters": { - "value": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "parameters": { - "Output": { - "type": "STRING" - } - } - } - }, - "comp-bool-identity-2": { - "executorLabel": "exec-bool-identity-2", - "inputDefinitions": { - "parameters": { - "value": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "parameters": { - "Output": { - "type": "STRING" - } - } - } - }, - "comp-condition-is-distill-4": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-3-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-3-evaluation_metrics", - "producerSubtask": "condition-is-evaluation-5" - } - ] - }, - "model-evaluation-4-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-4-evaluation_metrics", - "producerSubtask": "condition-is-evaluation-5" - } - ] - } - } - }, - "tasks": { - "automl-tabular-ensemble-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-ensemble-2" - }, - "dependentTasks": [ - "automl-tabular-stage-1-tuner-2", - "automl-tabular-transform-2" - ], - "inputs": { - "artifacts": { - "dataset_schema": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" - }, - "instance_baseline": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-instance_baseline" - }, - "metadata": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" - }, - "transform_output": { - "taskOutputArtifact": { - "outputArtifactKey": "transform_output", - "producerTask": "automl-tabular-transform-2" - } - }, - "tuning_result_input": { - "taskOutputArtifact": { - "outputArtifactKey": "tuning_result_output", - "producerTask": "automl-tabular-stage-1-tuner-2" - } - }, - "warmup_data": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" - } - }, - "parameters": { - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "export_additional_model_without_custom_ops": { - "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "automl-tabular-ensemble-2" - } - }, - "automl-tabular-infra-validator-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-infra-validator-2" - }, - "dependentTasks": [ - "automl-tabular-ensemble-2" - ], - "inputs": { - "artifacts": { - "unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble-2" - } - } - } - }, - "taskInfo": { - "name": "automl-tabular-infra-validator-2" - } - }, - "automl-tabular-stage-1-tuner-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-stage-1-tuner-2" - }, - "dependentTasks": [ - "automl-tabular-transform-2" - ], - "inputs": { - "artifacts": { - "materialized_eval_split": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_eval_split", - "producerTask": "automl-tabular-transform-2" - } - }, - "materialized_train_split": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_train_split", - "producerTask": "automl-tabular-transform-2" - } - }, - "metadata": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" - }, - "transform_output": { - "taskOutputArtifact": { - "outputArtifactKey": "transform_output", - "producerTask": "automl-tabular-transform-2" - } - } - }, - "parameters": { - "deadline_hours": { - "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" - }, - "disable_early_stopping": { - "componentInputParameter": "pipelineparam--disable_early_stopping" - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "num_parallel_trials": { - "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" - }, - "num_selected_trials": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "reduce_search_space_mode": { - "componentInputParameter": "pipelineparam--reduce_search_space_mode" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "run_distillation": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "single_run_max_secs": { - "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" - }, - "study_spec_override": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "study_spec_parameters_override": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - }, - "study_spec_parameters_override_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "tune_feature_selection_rate": { - "runtimeValue": { - "constantValue": { - "stringValue": "false" - } - } - }, - "worker_pool_specs_override": { - "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" - }, - "worker_pool_specs_override_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - } - } - }, - "taskInfo": { - "name": "automl-tabular-stage-1-tuner-2" - } - }, - "automl-tabular-transform-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-transform-2" - }, - "dependentTasks": [ - "write-bp-result-path", - "write-bp-result-path-2" - ], - "inputs": { - "artifacts": { - "dataset_schema": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" - }, - "eval_split": { - "taskOutputArtifact": { - "outputArtifactKey": "result", - "producerTask": "write-bp-result-path-2" - } - }, - "metadata": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" - }, - "test_split": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-test_split" - }, - "train_split": { - "taskOutputArtifact": { - "outputArtifactKey": "result", - "producerTask": "write-bp-result-path" - } - } - }, - "parameters": { - "dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" - }, - "dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_subnetwork": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_use_public_ips": { - "runtimeValue": { - "constantValue": { - "stringValue": "true" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "automl-tabular-transform-2" - } - }, - "condition-is-evaluation-5": { - "componentRef": { - "name": "comp-condition-is-evaluation-5" - }, - "dependentTasks": [ - "automl-tabular-ensemble-2", - "model-upload-3" - ], - "inputs": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { - "taskOutputArtifact": { - "outputArtifactKey": "explanation_metadata_artifact", - "producerTask": "automl-tabular-ensemble-2" - } - }, - "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble-2" - } - }, - "pipelineparam--model-upload-3-model": { - "taskOutputArtifact": { - "outputArtifactKey": "model", - "producerTask": "model-upload-3" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { - "taskOutputParameter": { - "outputParameterKey": "explanation_parameters", - "producerTask": "automl-tabular-ensemble-2" - } - }, - "pipelineparam--bool-identity-2-Output": { - "componentInputParameter": "pipelineparam--bool-identity-2-Output" - }, - "pipelineparam--bool-identity-Output": { - "componentInputParameter": "pipelineparam--bool-identity-Output" - }, - "pipelineparam--dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "pipelineparam--dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "pipelineparam--dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "pipelineparam--encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "pipelineparam--location": { - "componentInputParameter": "pipelineparam--location" - }, - "pipelineparam--prediction_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "pipelineparam--project": { - "componentInputParameter": "pipelineparam--project" - }, - "pipelineparam--root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" - }, - "pipelineparam--target_column_name": { - "componentInputParameter": "pipelineparam--target_column_name" - } - } - }, - "taskInfo": { - "name": "condition-is-evaluation-5" - }, - "triggerPolicy": { - "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" - } - }, - "model-batch-predict-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-predict-2" - }, - "dependentTasks": [ - "model-upload-2", - "read-input-uri" - ], - "inputs": { - "artifacts": { - "model": { - "taskOutputArtifact": { - "outputArtifactKey": "model", - "producerTask": "model-upload-2" - } - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "read-input-uri" - } - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-predict-train-split" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-predict-2" - } - }, - "model-batch-predict-3": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-predict-3" - }, - "dependentTasks": [ - "model-upload-2", - "read-input-uri-2" - ], - "inputs": { - "artifacts": { - "model": { - "taskOutputArtifact": { - "outputArtifactKey": "model", - "producerTask": "model-upload-2" - } - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "read-input-uri-2" - } - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-predict-eval-split" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-predict-3" - } - }, - "model-upload-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-upload-2" - }, - "dependentTasks": [ - "set-model-can-skip-validation" - ], - "inputs": { - "artifacts": { - "explanation_metadata_artifact": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" - }, - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - } - }, - "parameters": { - "description": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - } - } - }, - "taskInfo": { - "name": "model-upload-2" - } - }, - "model-upload-3": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-upload-3" - }, - "dependentTasks": [ - "automl-tabular-ensemble-2", - "automl-tabular-infra-validator-2" - ], - "inputs": { - "artifacts": { - "explanation_metadata_artifact": { - "taskOutputArtifact": { - "outputArtifactKey": "explanation_metadata_artifact", - "producerTask": "automl-tabular-ensemble-2" - } - }, - "unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble-2" - } - } - }, - "parameters": { - "description": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "taskOutputParameter": { - "outputParameterKey": "explanation_parameters", - "producerTask": "automl-tabular-ensemble-2" - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - } - } - }, - "taskInfo": { - "name": "model-upload-3" - } - }, - "read-input-uri": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-read-input-uri" - }, - "inputs": { - "artifacts": { - "split_uri": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-train_split" - } - } - }, - "taskInfo": { - "name": "read-input-uri" - } - }, - "read-input-uri-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-read-input-uri-2" - }, - "inputs": { - "artifacts": { - "split_uri": { - "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" - } - } - }, - "taskInfo": { - "name": "read-input-uri-2" - } - }, - "set-model-can-skip-validation": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-set-model-can-skip-validation" - }, - "inputs": { - "artifacts": { - "model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - } - } - }, - "taskInfo": { - "name": "set-model-can-skip-validation" - } - }, - "write-bp-result-path": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-write-bp-result-path" - }, - "dependentTasks": [ - "model-batch-predict-2" - ], - "inputs": { - "artifacts": { - "bp_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-predict-2" - } - } - } - }, - "taskInfo": { - "name": "write-bp-result-path" - } - }, - "write-bp-result-path-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-write-bp-result-path-2" - }, - "dependentTasks": [ - "model-batch-predict-3" - ], - "inputs": { - "artifacts": { - "bp_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-predict-3" - } - } - } - }, - "taskInfo": { - "name": "write-bp-result-path-2" - } - } - } - }, - "inputDefinitions": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-eval_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-test_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--tabular-stats-and-example-gen-train_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "type": "STRING" - }, - "pipelineparam--bool-identity-2-Output": { - "type": "STRING" - }, - "pipelineparam--bool-identity-Output": { - "type": "STRING" - }, - "pipelineparam--dataflow_service_account": { - "type": "STRING" - }, - "pipelineparam--dataflow_subnetwork": { - "type": "STRING" - }, - "pipelineparam--dataflow_use_public_ips": { - "type": "STRING" - }, - "pipelineparam--disable_early_stopping": { - "type": "STRING" - }, - "pipelineparam--distill_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--distill_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--distill_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--distill_stage_1_deadline_hours": { - "type": "DOUBLE" - }, - "pipelineparam--encryption_spec_key_name": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--export_additional_model_without_custom_ops": { - "type": "STRING" - }, - "pipelineparam--location": { - "type": "STRING" - }, - "pipelineparam--prediction_type": { - "type": "STRING" - }, - "pipelineparam--project": { - "type": "STRING" - }, - "pipelineparam--reduce_search_space_mode": { - "type": "STRING" - }, - "pipelineparam--root_dir": { - "type": "STRING" - }, - "pipelineparam--stage_1_num_parallel_trials": { - "type": "INT" - }, - "pipelineparam--stage_1_single_run_max_secs": { - "type": "INT" - }, - "pipelineparam--stage_1_tuner_worker_pool_specs_override": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "type": "STRING" - }, - "pipelineparam--target_column_name": { - "type": "STRING" - }, - "pipelineparam--transform_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--transform_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--transform_dataflow_max_num_workers": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-3-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-4-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-condition-is-evaluation-3": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "evaluation_metrics", - "producerSubtask": "model-evaluation-2" - } - ] - }, - "model-evaluation-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "evaluation_metrics", - "producerSubtask": "model-evaluation" - } - ] - } - } - }, - "tasks": { - "model-batch-explanation": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-explanation" - }, - "inputs": { - "artifacts": { - "explanation_metadata_artifact": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" - }, - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-explanation" - } - }, - "model-batch-predict": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-predict" - }, - "inputs": { - "artifacts": { - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-predict" - } - }, - "model-evaluation": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation" - }, - "dependentTasks": [ - "model-batch-predict" - ], - "inputs": { - "artifacts": { - "batch_prediction_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-predict" - } - } - }, - "parameters": { - "class_names": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "classification_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_disk_size": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "dataflow_max_workers_num": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "dataflow_workers_num": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "example_weight_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "generate_feature_attribution": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "ground_truth_column": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "ground_truth_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "ground_truth_gcs_source": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "key_columns": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "positive_classes": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "prediction_id_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_label_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_score_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "model-evaluation" - } - }, - "model-evaluation-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation-2" - }, - "dependentTasks": [ - "model-batch-explanation" - ], - "inputs": { - "artifacts": { - "batch_prediction_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-explanation" - } - } - }, - "parameters": { - "class_names": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "classification_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_disk_size": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "dataflow_max_workers_num": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "dataflow_workers_num": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "example_weight_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "generate_feature_attribution": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "ground_truth_column": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "ground_truth_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "ground_truth_gcs_source": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "key_columns": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "positive_classes": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "prediction_id_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_label_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_score_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "model-evaluation-2" - } - }, - "model-evaluation-import": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation-import" - }, - "dependentTasks": [ - "model-evaluation", - "model-evaluation-2" - ], - "inputs": { - "artifacts": { - "explanation": { - "taskOutputArtifact": { - "outputArtifactKey": "evaluation_metrics", - "producerTask": "model-evaluation-2" - } - }, - "metrics": { - "taskOutputArtifact": { - "outputArtifactKey": "evaluation_metrics", - "producerTask": "model-evaluation" - } - }, - "model": { - "componentInputArtifact": "pipelineparam--model-upload-model" - } - }, - "parameters": { - "dataset_path": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataset_paths": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - }, - "dataset_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - } - } - }, - "taskInfo": { - "name": "model-evaluation-import" - } - } - } - }, - "inputDefinitions": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--model-upload-model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "type": "STRING" - }, - "pipelineparam--bool-identity-2-Output": { - "type": "STRING" - }, - "pipelineparam--bool-identity-Output": { - "type": "STRING" - }, - "pipelineparam--dataflow_service_account": { - "type": "STRING" - }, - "pipelineparam--dataflow_subnetwork": { - "type": "STRING" - }, - "pipelineparam--dataflow_use_public_ips": { - "type": "STRING" - }, - "pipelineparam--encryption_spec_key_name": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--location": { - "type": "STRING" - }, - "pipelineparam--prediction_type": { - "type": "STRING" - }, - "pipelineparam--project": { - "type": "STRING" - }, - "pipelineparam--root_dir": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "type": "STRING" - }, - "pipelineparam--target_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-condition-is-evaluation-5": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-3-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "evaluation_metrics", - "producerSubtask": "model-evaluation-3" - } - ] - }, - "model-evaluation-4-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "evaluation_metrics", - "producerSubtask": "model-evaluation-4" - } - ] - } - } - }, - "tasks": { - "model-batch-explanation-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-explanation-2" - }, - "inputs": { - "artifacts": { - "explanation_metadata_artifact": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact" - }, - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "componentInputParameter": "pipelineparam--automl-tabular-ensemble-2-explanation_parameters" - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-explanation-2" - } - }, - "model-batch-predict-4": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-batch-predict-4" - }, - "inputs": { - "artifacts": { - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" - } - }, - "parameters": { - "accelerator_count": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "accelerator_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_destination_output_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "bigquery_source_input_uri": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "gcs_destination_output_uri_prefix": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "gcs_source_uris": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" - }, - "generate_explanation": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "instances_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "tf-record" - } - } - }, - "job_display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "manual_batch_tuning_parameters_batch_size": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "model_parameters": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - } - } - }, - "taskInfo": { - "name": "model-batch-predict-4" - } - }, - "model-evaluation-3": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation-3" - }, - "dependentTasks": [ - "model-batch-predict-4" - ], - "inputs": { - "artifacts": { - "batch_prediction_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-predict-4" - } - } - }, - "parameters": { - "class_names": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "classification_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_disk_size": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "dataflow_max_workers_num": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "dataflow_workers_num": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "example_weight_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "generate_feature_attribution": { - "runtimeValue": { - "constantValue": { - "intValue": "0" - } - } - }, - "ground_truth_column": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "ground_truth_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "ground_truth_gcs_source": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "key_columns": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "positive_classes": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "prediction_id_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_label_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_score_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "model-evaluation-3" - } - }, - "model-evaluation-4": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation-4" - }, - "dependentTasks": [ - "model-batch-explanation-2" - ], - "inputs": { - "artifacts": { - "batch_prediction_job": { - "taskOutputArtifact": { - "outputArtifactKey": "batchpredictionjob", - "producerTask": "model-batch-explanation-2" - } - } - }, - "parameters": { - "class_names": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "classification_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_disk_size": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "dataflow_max_workers_num": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "dataflow_workers_num": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "example_weight_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "generate_feature_attribution": { - "runtimeValue": { - "constantValue": { - "intValue": "1" - } - } - }, - "ground_truth_column": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "ground_truth_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "ground_truth_gcs_source": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "key_columns": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "positive_classes": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "prediction_id_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_label_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_score_column": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "predictions_format": { - "runtimeValue": { - "constantValue": { - "stringValue": "jsonl" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "model-evaluation-4" - } - }, - "model-evaluation-import-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-evaluation-import-2" - }, - "dependentTasks": [ - "model-evaluation-3", - "model-evaluation-4" - ], - "inputs": { - "artifacts": { - "explanation": { - "taskOutputArtifact": { - "outputArtifactKey": "evaluation_metrics", - "producerTask": "model-evaluation-4" - } - }, - "metrics": { - "taskOutputArtifact": { - "outputArtifactKey": "evaluation_metrics", - "producerTask": "model-evaluation-3" - } - }, - "model": { - "componentInputArtifact": "pipelineparam--model-upload-3-model" - } - }, - "parameters": { - "dataset_path": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataset_paths": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - }, - "dataset_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "problem_type": { - "componentInputParameter": "pipelineparam--prediction_type" - } - } - }, - "taskInfo": { - "name": "model-evaluation-import-2" - } - } - } - }, - "inputDefinitions": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--model-upload-3-model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { - "type": "STRING" - }, - "pipelineparam--bool-identity-2-Output": { - "type": "STRING" - }, - "pipelineparam--bool-identity-Output": { - "type": "STRING" - }, - "pipelineparam--dataflow_service_account": { - "type": "STRING" - }, - "pipelineparam--dataflow_subnetwork": { - "type": "STRING" - }, - "pipelineparam--dataflow_use_public_ips": { - "type": "STRING" - }, - "pipelineparam--encryption_spec_key_name": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--location": { - "type": "STRING" - }, - "pipelineparam--prediction_type": { - "type": "STRING" - }, - "pipelineparam--project": { - "type": "STRING" - }, - "pipelineparam--root_dir": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "type": "STRING" - }, - "pipelineparam--target_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-3-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-4-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-condition-no-distill-2": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-2-evaluation_metrics", - "producerSubtask": "condition-is-evaluation-3" - } - ] - }, - "model-evaluation-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-evaluation_metrics", - "producerSubtask": "condition-is-evaluation-3" - } - ] - } - } - }, - "tasks": { - "condition-is-evaluation-3": { - "componentRef": { - "name": "comp-condition-is-evaluation-3" - }, - "dependentTasks": [ - "model-upload" - ], - "inputs": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - }, - "pipelineparam--model-upload-model": { - "taskOutputArtifact": { - "outputArtifactKey": "model", - "producerTask": "model-upload" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" - }, - "pipelineparam--bool-identity-2-Output": { - "componentInputParameter": "pipelineparam--bool-identity-2-Output" - }, - "pipelineparam--bool-identity-Output": { - "componentInputParameter": "pipelineparam--bool-identity-Output" - }, - "pipelineparam--dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "pipelineparam--dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "pipelineparam--dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "pipelineparam--encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "pipelineparam--location": { - "componentInputParameter": "pipelineparam--location" - }, - "pipelineparam--prediction_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "pipelineparam--project": { - "componentInputParameter": "pipelineparam--project" - }, - "pipelineparam--root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" - }, - "pipelineparam--target_column_name": { - "componentInputParameter": "pipelineparam--target_column_name" - } - } - }, - "taskInfo": { - "name": "condition-is-evaluation-3" - }, - "triggerPolicy": { - "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" - } - }, - "model-upload": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-model-upload" - }, - "inputs": { - "artifacts": { - "explanation_metadata_artifact": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" - }, - "unmanaged_container_model": { - "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" - } - }, - "parameters": { - "description": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "display_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - } - } - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "explanation_metadata": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "explanation_parameters": { - "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" - }, - "labels": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - } - } - }, - "taskInfo": { - "name": "model-upload" - } - } - } - }, - "inputDefinitions": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "type": "STRING" - }, - "pipelineparam--bool-identity-2-Output": { - "type": "STRING" - }, - "pipelineparam--bool-identity-Output": { - "type": "STRING" - }, - "pipelineparam--dataflow_service_account": { - "type": "STRING" - }, - "pipelineparam--dataflow_subnetwork": { - "type": "STRING" - }, - "pipelineparam--dataflow_use_public_ips": { - "type": "STRING" - }, - "pipelineparam--encryption_spec_key_name": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--location": { - "type": "STRING" - }, - "pipelineparam--prediction_type": { - "type": "STRING" - }, - "pipelineparam--project": { - "type": "STRING" - }, - "pipelineparam--root_dir": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "type": "STRING" - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "type": "STRING" - }, - "pipelineparam--target_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-exit-handler-1": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-2-evaluation_metrics", - "producerSubtask": "condition-no-distill-2" - } - ] - }, - "model-evaluation-3-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-3-evaluation_metrics", - "producerSubtask": "condition-is-distill-4" - } - ] - }, - "model-evaluation-4-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-4-evaluation_metrics", - "producerSubtask": "condition-is-distill-4" - } - ] - }, - "model-evaluation-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-evaluation_metrics", - "producerSubtask": "condition-no-distill-2" - } - ] - } - } - }, - "tasks": { - "automl-tabular-cv-trainer": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-cv-trainer" - }, - "dependentTasks": [ - "automl-tabular-stage-1-tuner", - "automl-tabular-transform", - "merge-materialized-splits", - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "materialized_cv_splits": { - "taskOutputArtifact": { - "outputArtifactKey": "splits", - "producerTask": "merge-materialized-splits" - } - }, - "metadata": { - "taskOutputArtifact": { - "outputArtifactKey": "metadata", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "transform_output": { - "taskOutputArtifact": { - "outputArtifactKey": "transform_output", - "producerTask": "automl-tabular-transform" - } - }, - "tuning_result_input": { - "taskOutputArtifact": { - "outputArtifactKey": "tuning_result_output", - "producerTask": "automl-tabular-stage-1-tuner" - } - } - }, - "parameters": { - "deadline_hours": { - "componentInputParameter": "pipelineparam--stage_2_deadline_hours" - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "num_parallel_trials": { - "componentInputParameter": "pipelineparam--stage_2_num_parallel_trials" - }, - "num_selected_trials": { - "componentInputParameter": "pipelineparam--stage_2_num_selected_trials" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "single_run_max_secs": { - "componentInputParameter": "pipelineparam--stage_2_single_run_max_secs" - }, - "worker_pool_specs_override": { - "componentInputParameter": "pipelineparam--cv_trainer_worker_pool_specs_override" - }, - "worker_pool_specs_override_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - } - } - }, - "taskInfo": { - "name": "automl-tabular-cv-trainer" - } - }, - "automl-tabular-ensemble": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-ensemble" - }, - "dependentTasks": [ - "automl-tabular-cv-trainer", - "automl-tabular-transform", - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "dataset_schema": { - "taskOutputArtifact": { - "outputArtifactKey": "dataset_schema", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "instance_baseline": { - "taskOutputArtifact": { - "outputArtifactKey": "instance_baseline", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "metadata": { - "taskOutputArtifact": { - "outputArtifactKey": "metadata", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "transform_output": { - "taskOutputArtifact": { - "outputArtifactKey": "transform_output", - "producerTask": "automl-tabular-transform" - } - }, - "tuning_result_input": { - "taskOutputArtifact": { - "outputArtifactKey": "tuning_result_output", - "producerTask": "automl-tabular-cv-trainer" - } - }, - "warmup_data": { - "taskOutputArtifact": { - "outputArtifactKey": "eval_split", - "producerTask": "tabular-stats-and-example-gen" - } - } - }, - "parameters": { - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "export_additional_model_without_custom_ops": { - "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "automl-tabular-ensemble" - } - }, - "automl-tabular-infra-validator": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-infra-validator" - }, - "dependentTasks": [ - "automl-tabular-ensemble" - ], - "inputs": { - "artifacts": { - "unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble" - } - } - } - }, - "taskInfo": { - "name": "automl-tabular-infra-validator" - } - }, - "automl-tabular-stage-1-tuner": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-stage-1-tuner" - }, - "dependentTasks": [ - "automl-tabular-transform", - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "materialized_eval_split": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_eval_split", - "producerTask": "automl-tabular-transform" - } - }, - "materialized_train_split": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_train_split", - "producerTask": "automl-tabular-transform" - } - }, - "metadata": { - "taskOutputArtifact": { - "outputArtifactKey": "metadata", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "transform_output": { - "taskOutputArtifact": { - "outputArtifactKey": "transform_output", - "producerTask": "automl-tabular-transform" - } - } - }, - "parameters": { - "deadline_hours": { - "componentInputParameter": "pipelineparam--stage_1_deadline_hours" - }, - "disable_early_stopping": { - "componentInputParameter": "pipelineparam--disable_early_stopping" - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "num_parallel_trials": { - "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" - }, - "num_selected_trials": { - "componentInputParameter": "pipelineparam--stage_1_num_selected_trials" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "reduce_search_space_mode": { - "componentInputParameter": "pipelineparam--reduce_search_space_mode" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "run_distillation": { - "runtimeValue": { - "constantValue": { - "stringValue": "false" - } - } - }, - "single_run_max_secs": { - "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" - }, - "study_spec_override": { - "componentInputParameter": "pipelineparam--study_spec_override" - }, - "study_spec_parameters_override": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - }, - "study_spec_parameters_override_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "tune_feature_selection_rate": { - "runtimeValue": { - "constantValue": { - "stringValue": "false" - } - } - }, - "worker_pool_specs_override": { - "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" - }, - "worker_pool_specs_override_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "[]" - } - } - } - } - }, - "taskInfo": { - "name": "automl-tabular-stage-1-tuner" - } - }, - "automl-tabular-transform": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-automl-tabular-transform" - }, - "dependentTasks": [ - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "dataset_schema": { - "taskOutputArtifact": { - "outputArtifactKey": "dataset_schema", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "eval_split": { - "taskOutputArtifact": { - "outputArtifactKey": "eval_split", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "metadata": { - "taskOutputArtifact": { - "outputArtifactKey": "metadata", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "test_split": { - "taskOutputArtifact": { - "outputArtifactKey": "test_split", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "train_split": { - "taskOutputArtifact": { - "outputArtifactKey": "train_split", - "producerTask": "tabular-stats-and-example-gen" - } - } - }, - "parameters": { - "dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" - }, - "dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - } - } - }, - "taskInfo": { - "name": "automl-tabular-transform" - } - }, - "bool-identity": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-bool-identity" - }, - "inputs": { - "parameters": { - "value": { - "componentInputParameter": "pipelineparam--run_evaluation" - } - } - }, - "taskInfo": { - "name": "bool-identity" - } - }, - "bool-identity-2": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-bool-identity-2" - }, - "inputs": { - "parameters": { - "value": { - "componentInputParameter": "pipelineparam--run_distillation" - } - } - }, - "taskInfo": { - "name": "bool-identity-2" - } - }, - "condition-is-distill-4": { - "componentRef": { - "name": "comp-condition-is-distill-4" - }, - "dependentTasks": [ - "automl-tabular-ensemble", - "automl-tabular-infra-validator", - "bool-identity", - "bool-identity-2", - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "taskOutputArtifact": { - "outputArtifactKey": "explanation_metadata_artifact", - "producerTask": "automl-tabular-ensemble" - } - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble" - } - }, - "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { - "taskOutputArtifact": { - "outputArtifactKey": "dataset_schema", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-eval_split": { - "taskOutputArtifact": { - "outputArtifactKey": "eval_split", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { - "taskOutputArtifact": { - "outputArtifactKey": "instance_baseline", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-metadata": { - "taskOutputArtifact": { - "outputArtifactKey": "metadata", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-test_split": { - "taskOutputArtifact": { - "outputArtifactKey": "test_split", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-train_split": { - "taskOutputArtifact": { - "outputArtifactKey": "train_split", - "producerTask": "tabular-stats-and-example-gen" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "taskOutputParameter": { - "outputParameterKey": "explanation_parameters", - "producerTask": "automl-tabular-ensemble" - } - }, - "pipelineparam--bool-identity-2-Output": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "bool-identity-2" - } - }, - "pipelineparam--bool-identity-Output": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "bool-identity" - } - }, - "pipelineparam--dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "pipelineparam--dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "pipelineparam--dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "pipelineparam--disable_early_stopping": { - "componentInputParameter": "pipelineparam--disable_early_stopping" - }, - "pipelineparam--distill_batch_predict_machine_type": { - "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" - }, - "pipelineparam--distill_batch_predict_max_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" - }, - "pipelineparam--distill_batch_predict_starting_replica_count": { - "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" - }, - "pipelineparam--distill_stage_1_deadline_hours": { - "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" - }, - "pipelineparam--encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "pipelineparam--export_additional_model_without_custom_ops": { - "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" - }, - "pipelineparam--location": { - "componentInputParameter": "pipelineparam--location" - }, - "pipelineparam--prediction_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "pipelineparam--project": { - "componentInputParameter": "pipelineparam--project" - }, - "pipelineparam--reduce_search_space_mode": { - "componentInputParameter": "pipelineparam--reduce_search_space_mode" - }, - "pipelineparam--root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "pipelineparam--stage_1_num_parallel_trials": { - "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" - }, - "pipelineparam--stage_1_single_run_max_secs": { - "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" - }, - "pipelineparam--stage_1_tuner_worker_pool_specs_override": { - "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "taskOutputParameter": { - "outputParameterKey": "downsampled_test_split_json", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "taskOutputParameter": { - "outputParameterKey": "test_split_json", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--target_column_name": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "pipelineparam--transform_dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" - }, - "pipelineparam--transform_dataflow_machine_type": { - "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" - }, - "pipelineparam--transform_dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" - } - } - }, - "taskInfo": { - "name": "condition-is-distill-4" - }, - "triggerPolicy": { - "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'true'" - } - }, - "condition-no-distill-2": { - "componentRef": { - "name": "comp-condition-no-distill-2" - }, - "dependentTasks": [ - "automl-tabular-ensemble", - "automl-tabular-infra-validator", - "bool-identity", - "bool-identity-2", - "tabular-stats-and-example-gen" - ], - "inputs": { - "artifacts": { - "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { - "taskOutputArtifact": { - "outputArtifactKey": "explanation_metadata_artifact", - "producerTask": "automl-tabular-ensemble" - } - }, - "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { - "taskOutputArtifact": { - "outputArtifactKey": "unmanaged_container_model", - "producerTask": "automl-tabular-ensemble" - } - } - }, - "parameters": { - "pipelineparam--automl-tabular-ensemble-explanation_parameters": { - "taskOutputParameter": { - "outputParameterKey": "explanation_parameters", - "producerTask": "automl-tabular-ensemble" - } - }, - "pipelineparam--bool-identity-2-Output": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "bool-identity-2" - } - }, - "pipelineparam--bool-identity-Output": { - "taskOutputParameter": { - "outputParameterKey": "Output", - "producerTask": "bool-identity" - } - }, - "pipelineparam--dataflow_service_account": { - "componentInputParameter": "pipelineparam--dataflow_service_account" - }, - "pipelineparam--dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "pipelineparam--dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "pipelineparam--encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" - }, - "pipelineparam--location": { - "componentInputParameter": "pipelineparam--location" - }, - "pipelineparam--prediction_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "pipelineparam--project": { - "componentInputParameter": "pipelineparam--project" - }, - "pipelineparam--root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { - "taskOutputParameter": { - "outputParameterKey": "downsampled_test_split_json", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--tabular-stats-and-example-gen-test_split_json": { - "taskOutputParameter": { - "outputParameterKey": "test_split_json", - "producerTask": "tabular-stats-and-example-gen" - } - }, - "pipelineparam--target_column_name": { - "componentInputParameter": "pipelineparam--target_column_name" - } - } - }, - "taskInfo": { - "name": "condition-no-distill-2" - }, - "triggerPolicy": { - "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'false'" - } - }, - "merge-materialized-splits": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-merge-materialized-splits" - }, - "dependentTasks": [ - "automl-tabular-transform" - ], - "inputs": { - "artifacts": { - "split_0": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_train_split", - "producerTask": "automl-tabular-transform" - } - }, - "split_1": { - "taskOutputArtifact": { - "outputArtifactKey": "materialized_eval_split", - "producerTask": "automl-tabular-transform" - } - } - } - }, - "taskInfo": { - "name": "merge-materialized-splits" - } - }, - "tabular-stats-and-example-gen": { - "cachingOptions": { - "enableCache": true - }, - "componentRef": { - "name": "comp-tabular-stats-and-example-gen" - }, - "inputs": { - "parameters": { - "additional_experiments": { - "componentInputParameter": "pipelineparam--additional_experiments" - }, - "additional_experiments_json": { - "runtimeValue": { - "constantValue": { - "stringValue": "{}" - } - } - }, - "data_source": { - "componentInputParameter": "pipelineparam--data_source" - }, - "data_source_bigquery_table_path": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "data_source_csv_filenames": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_disk_size_gb": { - "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb" - }, - "dataflow_machine_type": { - "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_machine_type" - }, - "dataflow_max_num_workers": { - "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_max_num_workers" - }, - "dataflow_service_account": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "dataflow_subnetwork": { - "componentInputParameter": "pipelineparam--dataflow_subnetwork" - }, - "dataflow_use_public_ips": { - "componentInputParameter": "pipelineparam--dataflow_use_public_ips" - }, - "encryption_spec_key_name": { - "componentInputParameter": "pipelineparam--encryption_spec_key_name" - }, - "location": { - "componentInputParameter": "pipelineparam--location" - }, - "optimization_objective": { - "componentInputParameter": "pipelineparam--optimization_objective" - }, - "optimization_objective_precision_value": { - "componentInputParameter": "pipelineparam--optimization_objective_precision_value" - }, - "optimization_objective_recall_value": { - "componentInputParameter": "pipelineparam--optimization_objective_recall_value" - }, - "predefined_split_key": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "prediction_type": { - "componentInputParameter": "pipelineparam--prediction_type" - }, - "project": { - "componentInputParameter": "pipelineparam--project" - }, - "request_type": { - "runtimeValue": { - "constantValue": { - "stringValue": "COLUMN_STATS_ONLY" - } - } - }, - "root_dir": { - "componentInputParameter": "pipelineparam--root_dir" - }, - "run_distillation": { - "componentInputParameter": "pipelineparam--run_distillation" - }, - "split_spec": { - "componentInputParameter": "pipelineparam--split_spec" - }, - "stratified_split_key": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "target_column_name": { - "componentInputParameter": "pipelineparam--target_column_name" - }, - "test_fraction": { - "runtimeValue": { - "constantValue": { - "stringValue": "-1" - } - } - }, - "timestamp_split_key": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "training_fraction": { - "runtimeValue": { - "constantValue": { - "stringValue": "-1" - } - } - }, - "transformations": { - "componentInputParameter": "pipelineparam--transformations" - }, - "transformations_path": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "validation_fraction": { - "runtimeValue": { - "constantValue": { - "stringValue": "-1" - } - } - }, - "weight_column_name": { - "componentInputParameter": "pipelineparam--weight_column_name" - } - } - }, - "taskInfo": { - "name": "tabular-stats-and-example-gen" - } - } - } - }, - "inputDefinitions": { - "parameters": { - "pipelineparam--additional_experiments": { - "type": "STRING" - }, - "pipelineparam--cv_trainer_worker_pool_specs_override": { - "type": "STRING" - }, - "pipelineparam--data_source": { - "type": "STRING" - }, - "pipelineparam--dataflow_service_account": { - "type": "STRING" - }, - "pipelineparam--dataflow_subnetwork": { - "type": "STRING" - }, - "pipelineparam--dataflow_use_public_ips": { - "type": "STRING" - }, - "pipelineparam--disable_early_stopping": { - "type": "STRING" - }, - "pipelineparam--distill_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--distill_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--distill_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--distill_stage_1_deadline_hours": { - "type": "DOUBLE" - }, - "pipelineparam--encryption_spec_key_name": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--export_additional_model_without_custom_ops": { - "type": "STRING" - }, - "pipelineparam--location": { - "type": "STRING" - }, - "pipelineparam--optimization_objective": { - "type": "STRING" - }, - "pipelineparam--optimization_objective_precision_value": { - "type": "DOUBLE" - }, - "pipelineparam--optimization_objective_recall_value": { - "type": "DOUBLE" - }, - "pipelineparam--prediction_type": { - "type": "STRING" - }, - "pipelineparam--project": { - "type": "STRING" - }, - "pipelineparam--reduce_search_space_mode": { - "type": "STRING" - }, - "pipelineparam--root_dir": { - "type": "STRING" - }, - "pipelineparam--run_distillation": { - "type": "STRING" - }, - "pipelineparam--run_evaluation": { - "type": "STRING" - }, - "pipelineparam--split_spec": { - "type": "STRING" - }, - "pipelineparam--stage_1_deadline_hours": { - "type": "DOUBLE" - }, - "pipelineparam--stage_1_num_parallel_trials": { - "type": "INT" - }, - "pipelineparam--stage_1_num_selected_trials": { - "type": "INT" - }, - "pipelineparam--stage_1_single_run_max_secs": { - "type": "INT" - }, - "pipelineparam--stage_1_tuner_worker_pool_specs_override": { - "type": "STRING" - }, - "pipelineparam--stage_2_deadline_hours": { - "type": "DOUBLE" - }, - "pipelineparam--stage_2_num_parallel_trials": { - "type": "INT" - }, - "pipelineparam--stage_2_num_selected_trials": { - "type": "INT" - }, - "pipelineparam--stage_2_single_run_max_secs": { - "type": "INT" - }, - "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--stats_and_example_gen_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--study_spec_override": { - "type": "STRING" - }, - "pipelineparam--target_column_name": { - "type": "STRING" - }, - "pipelineparam--transform_dataflow_disk_size_gb": { - "type": "INT" - }, - "pipelineparam--transform_dataflow_machine_type": { - "type": "STRING" - }, - "pipelineparam--transform_dataflow_max_num_workers": { - "type": "INT" - }, - "pipelineparam--transformations": { - "type": "STRING" - }, - "pipelineparam--weight_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-3-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-4-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-merge-materialized-splits": { - "executorLabel": "exec-merge-materialized-splits", - "inputDefinitions": { - "artifacts": { - "split_0": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "split_1": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - } - }, - "outputDefinitions": { - "artifacts": { - "splits": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-model-batch-explanation": { - "executorLabel": "exec-model-batch-explanation", - "inputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-batch-explanation-2": { - "executorLabel": "exec-model-batch-explanation-2", - "inputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-batch-predict": { - "executorLabel": "exec-model-batch-predict", - "inputDefinitions": { - "artifacts": { - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-batch-predict-2": { - "executorLabel": "exec-model-batch-predict-2", - "inputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-batch-predict-3": { - "executorLabel": "exec-model-batch-predict-3", - "inputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-batch-predict-4": { - "executorLabel": "exec-model-batch-predict-4", - "inputDefinitions": { - "artifacts": { - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "accelerator_count": { - "type": "INT" - }, - "accelerator_type": { - "type": "STRING" - }, - "bigquery_destination_output_uri": { - "type": "STRING" - }, - "bigquery_source_input_uri": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "gcs_destination_output_uri_prefix": { - "type": "STRING" - }, - "gcs_source_uris": { - "type": "STRING" - }, - "generate_explanation": { - "type": "STRING" - }, - "instances_format": { - "type": "STRING" - }, - "job_display_name": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "machine_type": { - "type": "STRING" - }, - "manual_batch_tuning_parameters_batch_size": { - "type": "INT" - }, - "max_replica_count": { - "type": "INT" - }, - "model_parameters": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "starting_replica_count": { - "type": "INT" - } - } - }, - "outputDefinitions": { - "artifacts": { - "batchpredictionjob": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - }, - "bigquery_output_table": { - "artifactType": { - "schemaTitle": "google.BQTable", - "schemaVersion": "0.0.1" - } - }, - "gcs_output_directory": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation": { - "executorLabel": "exec-model-evaluation", - "inputDefinitions": { - "artifacts": { - "batch_prediction_job": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "class_names": { - "type": "STRING" - }, - "classification_type": { - "type": "STRING" - }, - "dataflow_disk_size": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_workers_num": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "dataflow_workers_num": { - "type": "INT" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "example_weight_column": { - "type": "STRING" - }, - "generate_feature_attribution": { - "type": "STRING" - }, - "ground_truth_column": { - "type": "STRING" - }, - "ground_truth_format": { - "type": "STRING" - }, - "ground_truth_gcs_source": { - "type": "STRING" - }, - "key_columns": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "positive_classes": { - "type": "STRING" - }, - "prediction_id_column": { - "type": "STRING" - }, - "prediction_label_column": { - "type": "STRING" - }, - "prediction_score_column": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation-2": { - "executorLabel": "exec-model-evaluation-2", - "inputDefinitions": { - "artifacts": { - "batch_prediction_job": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "class_names": { - "type": "STRING" - }, - "classification_type": { - "type": "STRING" - }, - "dataflow_disk_size": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_workers_num": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "dataflow_workers_num": { - "type": "INT" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "example_weight_column": { - "type": "STRING" - }, - "generate_feature_attribution": { - "type": "STRING" - }, - "ground_truth_column": { - "type": "STRING" - }, - "ground_truth_format": { - "type": "STRING" - }, - "ground_truth_gcs_source": { - "type": "STRING" - }, - "key_columns": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "positive_classes": { - "type": "STRING" - }, - "prediction_id_column": { - "type": "STRING" - }, - "prediction_label_column": { - "type": "STRING" - }, - "prediction_score_column": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation-3": { - "executorLabel": "exec-model-evaluation-3", - "inputDefinitions": { - "artifacts": { - "batch_prediction_job": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "class_names": { - "type": "STRING" - }, - "classification_type": { - "type": "STRING" - }, - "dataflow_disk_size": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_workers_num": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "dataflow_workers_num": { - "type": "INT" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "example_weight_column": { - "type": "STRING" - }, - "generate_feature_attribution": { - "type": "STRING" - }, - "ground_truth_column": { - "type": "STRING" - }, - "ground_truth_format": { - "type": "STRING" - }, - "ground_truth_gcs_source": { - "type": "STRING" - }, - "key_columns": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "positive_classes": { - "type": "STRING" - }, - "prediction_id_column": { - "type": "STRING" - }, - "prediction_label_column": { - "type": "STRING" - }, - "prediction_score_column": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation-4": { - "executorLabel": "exec-model-evaluation-4", - "inputDefinitions": { - "artifacts": { - "batch_prediction_job": { - "artifactType": { - "schemaTitle": "google.VertexBatchPredictionJob", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "class_names": { - "type": "STRING" - }, - "classification_type": { - "type": "STRING" - }, - "dataflow_disk_size": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_workers_num": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "dataflow_workers_num": { - "type": "INT" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "example_weight_column": { - "type": "STRING" - }, - "generate_feature_attribution": { - "type": "STRING" - }, - "ground_truth_column": { - "type": "STRING" - }, - "ground_truth_format": { - "type": "STRING" - }, - "ground_truth_gcs_source": { - "type": "STRING" - }, - "key_columns": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "positive_classes": { - "type": "STRING" - }, - "prediction_id_column": { - "type": "STRING" - }, - "prediction_label_column": { - "type": "STRING" - }, - "prediction_score_column": { - "type": "STRING" - }, - "predictions_format": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation-import": { - "executorLabel": "exec-model-evaluation-import", - "inputDefinitions": { - "artifacts": { - "explanation": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "dataset_path": { - "type": "STRING" - }, - "dataset_paths": { - "type": "STRING" - }, - "dataset_type": { - "type": "STRING" - }, - "display_name": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-evaluation-import-2": { - "executorLabel": "exec-model-evaluation-import-2", - "inputDefinitions": { - "artifacts": { - "explanation": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "dataset_path": { - "type": "STRING" - }, - "dataset_paths": { - "type": "STRING" - }, - "dataset_type": { - "type": "STRING" - }, - "display_name": { - "type": "STRING" - }, - "problem_type": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-upload": { - "executorLabel": "exec-model-upload", - "inputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "description": { - "type": "STRING" - }, - "display_name": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-upload-2": { - "executorLabel": "exec-model-upload-2", - "inputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "description": { - "type": "STRING" - }, - "display_name": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-model-upload-3": { - "executorLabel": "exec-model-upload-3", - "inputDefinitions": { - "artifacts": { - "explanation_metadata_artifact": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "unmanaged_container_model": { - "artifactType": { - "schemaTitle": "google.UnmanagedContainerModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "description": { - "type": "STRING" - }, - "display_name": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "explanation_metadata": { - "type": "STRING" - }, - "explanation_parameters": { - "type": "STRING" - }, - "labels": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "project": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "google.VertexModel", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "gcp_resources": { - "type": "STRING" - } - } - } - }, - "comp-read-input-uri": { - "executorLabel": "exec-read-input-uri", - "inputDefinitions": { - "artifacts": { - "split_uri": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - } - }, - "outputDefinitions": { - "parameters": { - "Output": { - "type": "STRING" - } - } - } - }, - "comp-read-input-uri-2": { - "executorLabel": "exec-read-input-uri-2", - "inputDefinitions": { - "artifacts": { - "split_uri": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - } - }, - "outputDefinitions": { - "parameters": { - "Output": { - "type": "STRING" - } - } - } - }, - "comp-set-model-can-skip-validation": { - "executorLabel": "exec-set-model-can-skip-validation", - "inputDefinitions": { - "artifacts": { - "model": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-tabular-stats-and-example-gen": { - "executorLabel": "exec-tabular-stats-and-example-gen", - "inputDefinitions": { - "parameters": { - "additional_experiments": { - "type": "STRING" - }, - "additional_experiments_json": { - "type": "STRING" - }, - "data_source": { - "type": "STRING" - }, - "data_source_bigquery_table_path": { - "type": "STRING" - }, - "data_source_csv_filenames": { - "type": "STRING" - }, - "dataflow_disk_size_gb": { - "type": "INT" - }, - "dataflow_machine_type": { - "type": "STRING" - }, - "dataflow_max_num_workers": { - "type": "INT" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "optimization_objective": { - "type": "STRING" - }, - "optimization_objective_precision_value": { - "type": "DOUBLE" - }, - "optimization_objective_recall_value": { - "type": "DOUBLE" - }, - "predefined_split_key": { - "type": "STRING" - }, - "prediction_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "request_type": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - }, - "run_distillation": { - "type": "STRING" - }, - "split_spec": { - "type": "STRING" - }, - "stratified_split_key": { - "type": "STRING" - }, - "target_column_name": { - "type": "STRING" - }, - "test_fraction": { - "type": "DOUBLE" - }, - "timestamp_split_key": { - "type": "STRING" - }, - "training_fraction": { - "type": "DOUBLE" - }, - "transformations": { - "type": "STRING" - }, - "transformations_path": { - "type": "STRING" - }, - "validation_fraction": { - "type": "DOUBLE" - }, - "weight_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "dataset_schema": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "dataset_stats": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "eval_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "instance_baseline": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "metadata": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - }, - "test_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - }, - "train_split": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - }, - "parameters": { - "downsampled_test_split_json": { - "type": "STRING" - }, - "gcp_resources": { - "type": "STRING" - }, - "test_split_json": { - "type": "STRING" - } - } - } - }, - "comp-write-bp-result-path": { - "executorLabel": "exec-write-bp-result-path", - "inputDefinitions": { - "artifacts": { - "bp_job": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - } - }, - "outputDefinitions": { - "artifacts": { - "result": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "comp-write-bp-result-path-2": { - "executorLabel": "exec-write-bp-result-path-2", - "inputDefinitions": { - "artifacts": { - "bp_job": { - "artifactType": { - "schemaTitle": "system.Artifact", - "schemaVersion": "0.0.1" - } - } - } - }, - "outputDefinitions": { - "artifacts": { - "result": { - "artifactType": { - "schemaTitle": "system.Dataset", - "schemaVersion": "0.0.1" - } - } - } - } - } - }, - "deploymentSpec": { - "executors": { - "exec-automl-tabular-cv-trainer": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_cv_splits={{$.inputs.artifacts['materialized_cv_splits'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_custom_job=true\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-ensemble": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-ensemble-2": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-finalizer": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-infra-validator": { - "container": { - "args": [ - "--executor_input", - "{{$}}" - ], - "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", - "resources": { - "cpuLimit": 8.0, - "memoryLimit": 52.0 - } - } - }, - "exec-automl-tabular-infra-validator-2": { - "container": { - "args": [ - "--executor_input", - "{{$}}" - ], - "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", - "resources": { - "cpuLimit": 8.0, - "memoryLimit": 52.0 - } - } - }, - "exec-automl-tabular-stage-1-tuner": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-stage-1-tuner-2": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-transform": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-automl-tabular-transform-2": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-bool-identity": { - "container": { - "args": [ - "--value", - "{{$.inputs.parameters['value']}}", - "----output-paths", - "{{$.outputs.parameters['Output'].output_file}}" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" - ], - "image": "python:3.7-slim" - } - }, - "exec-bool-identity-2": { - "container": { - "args": [ - "--value", - "{{$.inputs.parameters['value']}}", - "----output-paths", - "{{$.outputs.parameters['Output'].output_file}}" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" - ], - "image": "python:3.7-slim" - } - }, - "exec-merge-materialized-splits": { - "container": { - "args": [ - "--split-0", - "{{$.inputs.artifacts['split_0'].path}}", - "--split-1", - "{{$.inputs.artifacts['split_1'].path}}", - "--splits", - "{{$.outputs.artifacts['splits'].path}}" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef _merge_materialized_splits(\n split_0,\n split_1,\n splits,\n):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The first materialized split.\n split_1: The second materialized split.\n splits: The merged materialized split.\n \"\"\"\n with open(split_0, 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r') as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n f.write(','.join([split_0_content, split_1_content]))\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Merge materialized splits', description='Merge two materialized splits.')\n_parser.add_argument(\"--split-0\", dest=\"split_0\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--split-1\", dest=\"split_1\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--splits\", dest=\"splits\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = _merge_materialized_splits(**_parsed_args)\n" - ], - "image": "python:3.7-slim" - } - }, - "exec-model-batch-explanation": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "launcher" - ], - "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" - } - }, - "exec-model-batch-explanation-2": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "launcher" - ], - "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" - } - }, - "exec-model-batch-predict": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-batch-predict-2": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-batch-predict-3": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-batch-predict-4": { - "container": { - "args": [ - "--type", - "BatchPredictionJob", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-evaluation": { - "container": { - "args": [ - "--setup_file", - "/setup.py", - "--json_mode", - "true", - "--project_id", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--batch_prediction_format", - "{{$.inputs.parameters['predictions_format']}}", - "--batch_prediction_gcs_source", - "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", - "--ground_truth_format", - "{{$.inputs.parameters['ground_truth_format']}}", - "--ground_truth_gcs_source", - "{{$.inputs.parameters['ground_truth_gcs_source']}}", - "--key_prefix_in_prediction_dataset", - "instance", - "--key_columns", - "{{$.inputs.parameters['key_columns']}}", - "--root_dir", - "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--classification_type", - "{{$.inputs.parameters['classification_type']}}", - "--class_names", - "{{$.inputs.parameters['class_names']}}", - "--ground_truth_column", - "instance.{{$.inputs.parameters['ground_truth_column']}}", - "--prediction_score_column", - "{{$.inputs.parameters['prediction_score_column']}}", - "--prediction_label_column", - "{{$.inputs.parameters['prediction_label_column']}}", - "--prediction_id_column", - "{{$.inputs.parameters['prediction_id_column']}}", - "--example_weight_column", - "{{$.inputs.parameters['example_weight_column']}}", - "--positive_classes", - "{{$.inputs.parameters['positive_classes']}}", - "--generate_feature_attribution", - "{{$.inputs.parameters['generate_feature_attribution']}}", - "--dataflow_job_prefix", - "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--dataflow_service_account", - "{{$.inputs.parameters['dataflow_service_account']}}", - "--dataflow_disk_size", - "{{$.inputs.parameters['dataflow_disk_size']}}", - "--dataflow_machine_type", - "{{$.inputs.parameters['dataflow_machine_type']}}", - "--dataflow_workers_num", - "{{$.inputs.parameters['dataflow_workers_num']}}", - "--dataflow_max_workers_num", - "{{$.inputs.parameters['dataflow_max_workers_num']}}", - "--dataflow_subnetwork", - "{{$.inputs.parameters['dataflow_subnetwork']}}", - "--dataflow_use_public_ips", - "{{$.inputs.parameters['dataflow_use_public_ips']}}", - "--kms_key_name", - "{{$.inputs.parameters['encryption_spec_key_name']}}", - "--output_metrics_gcs_path", - "{{$.outputs.artifacts['evaluation_metrics'].uri}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python", - "/main.py" - ], - "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" - } - }, - "exec-model-evaluation-2": { - "container": { - "args": [ - "--setup_file", - "/setup.py", - "--json_mode", - "true", - "--project_id", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--batch_prediction_format", - "{{$.inputs.parameters['predictions_format']}}", - "--batch_prediction_gcs_source", - "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", - "--ground_truth_format", - "{{$.inputs.parameters['ground_truth_format']}}", - "--ground_truth_gcs_source", - "{{$.inputs.parameters['ground_truth_gcs_source']}}", - "--key_prefix_in_prediction_dataset", - "instance", - "--key_columns", - "{{$.inputs.parameters['key_columns']}}", - "--root_dir", - "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--classification_type", - "{{$.inputs.parameters['classification_type']}}", - "--class_names", - "{{$.inputs.parameters['class_names']}}", - "--ground_truth_column", - "instance.{{$.inputs.parameters['ground_truth_column']}}", - "--prediction_score_column", - "{{$.inputs.parameters['prediction_score_column']}}", - "--prediction_label_column", - "{{$.inputs.parameters['prediction_label_column']}}", - "--prediction_id_column", - "{{$.inputs.parameters['prediction_id_column']}}", - "--example_weight_column", - "{{$.inputs.parameters['example_weight_column']}}", - "--positive_classes", - "{{$.inputs.parameters['positive_classes']}}", - "--generate_feature_attribution", - "{{$.inputs.parameters['generate_feature_attribution']}}", - "--dataflow_job_prefix", - "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--dataflow_service_account", - "{{$.inputs.parameters['dataflow_service_account']}}", - "--dataflow_disk_size", - "{{$.inputs.parameters['dataflow_disk_size']}}", - "--dataflow_machine_type", - "{{$.inputs.parameters['dataflow_machine_type']}}", - "--dataflow_workers_num", - "{{$.inputs.parameters['dataflow_workers_num']}}", - "--dataflow_max_workers_num", - "{{$.inputs.parameters['dataflow_max_workers_num']}}", - "--dataflow_subnetwork", - "{{$.inputs.parameters['dataflow_subnetwork']}}", - "--dataflow_use_public_ips", - "{{$.inputs.parameters['dataflow_use_public_ips']}}", - "--kms_key_name", - "{{$.inputs.parameters['encryption_spec_key_name']}}", - "--output_metrics_gcs_path", - "{{$.outputs.artifacts['evaluation_metrics'].uri}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python", - "/main.py" - ], - "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" - } - }, - "exec-model-evaluation-3": { - "container": { - "args": [ - "--setup_file", - "/setup.py", - "--json_mode", - "true", - "--project_id", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--batch_prediction_format", - "{{$.inputs.parameters['predictions_format']}}", - "--batch_prediction_gcs_source", - "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", - "--ground_truth_format", - "{{$.inputs.parameters['ground_truth_format']}}", - "--ground_truth_gcs_source", - "{{$.inputs.parameters['ground_truth_gcs_source']}}", - "--key_prefix_in_prediction_dataset", - "instance", - "--key_columns", - "{{$.inputs.parameters['key_columns']}}", - "--root_dir", - "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--classification_type", - "{{$.inputs.parameters['classification_type']}}", - "--class_names", - "{{$.inputs.parameters['class_names']}}", - "--ground_truth_column", - "instance.{{$.inputs.parameters['ground_truth_column']}}", - "--prediction_score_column", - "{{$.inputs.parameters['prediction_score_column']}}", - "--prediction_label_column", - "{{$.inputs.parameters['prediction_label_column']}}", - "--prediction_id_column", - "{{$.inputs.parameters['prediction_id_column']}}", - "--example_weight_column", - "{{$.inputs.parameters['example_weight_column']}}", - "--positive_classes", - "{{$.inputs.parameters['positive_classes']}}", - "--generate_feature_attribution", - "{{$.inputs.parameters['generate_feature_attribution']}}", - "--dataflow_job_prefix", - "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--dataflow_service_account", - "{{$.inputs.parameters['dataflow_service_account']}}", - "--dataflow_disk_size", - "{{$.inputs.parameters['dataflow_disk_size']}}", - "--dataflow_machine_type", - "{{$.inputs.parameters['dataflow_machine_type']}}", - "--dataflow_workers_num", - "{{$.inputs.parameters['dataflow_workers_num']}}", - "--dataflow_max_workers_num", - "{{$.inputs.parameters['dataflow_max_workers_num']}}", - "--dataflow_subnetwork", - "{{$.inputs.parameters['dataflow_subnetwork']}}", - "--dataflow_use_public_ips", - "{{$.inputs.parameters['dataflow_use_public_ips']}}", - "--kms_key_name", - "{{$.inputs.parameters['encryption_spec_key_name']}}", - "--output_metrics_gcs_path", - "{{$.outputs.artifacts['evaluation_metrics'].uri}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python", - "/main.py" - ], - "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" - } - }, - "exec-model-evaluation-4": { - "container": { - "args": [ - "--setup_file", - "/setup.py", - "--json_mode", - "true", - "--project_id", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--batch_prediction_format", - "{{$.inputs.parameters['predictions_format']}}", - "--batch_prediction_gcs_source", - "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", - "--ground_truth_format", - "{{$.inputs.parameters['ground_truth_format']}}", - "--ground_truth_gcs_source", - "{{$.inputs.parameters['ground_truth_gcs_source']}}", - "--key_prefix_in_prediction_dataset", - "instance", - "--key_columns", - "{{$.inputs.parameters['key_columns']}}", - "--root_dir", - "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--classification_type", - "{{$.inputs.parameters['classification_type']}}", - "--class_names", - "{{$.inputs.parameters['class_names']}}", - "--ground_truth_column", - "instance.{{$.inputs.parameters['ground_truth_column']}}", - "--prediction_score_column", - "{{$.inputs.parameters['prediction_score_column']}}", - "--prediction_label_column", - "{{$.inputs.parameters['prediction_label_column']}}", - "--prediction_id_column", - "{{$.inputs.parameters['prediction_id_column']}}", - "--example_weight_column", - "{{$.inputs.parameters['example_weight_column']}}", - "--positive_classes", - "{{$.inputs.parameters['positive_classes']}}", - "--generate_feature_attribution", - "{{$.inputs.parameters['generate_feature_attribution']}}", - "--dataflow_job_prefix", - "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", - "--dataflow_service_account", - "{{$.inputs.parameters['dataflow_service_account']}}", - "--dataflow_disk_size", - "{{$.inputs.parameters['dataflow_disk_size']}}", - "--dataflow_machine_type", - "{{$.inputs.parameters['dataflow_machine_type']}}", - "--dataflow_workers_num", - "{{$.inputs.parameters['dataflow_workers_num']}}", - "--dataflow_max_workers_num", - "{{$.inputs.parameters['dataflow_max_workers_num']}}", - "--dataflow_subnetwork", - "{{$.inputs.parameters['dataflow_subnetwork']}}", - "--dataflow_use_public_ips", - "{{$.inputs.parameters['dataflow_use_public_ips']}}", - "--kms_key_name", - "{{$.inputs.parameters['encryption_spec_key_name']}}", - "--output_metrics_gcs_path", - "{{$.outputs.artifacts['evaluation_metrics'].uri}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python", - "/main.py" - ], - "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" - } - }, - "exec-model-evaluation-import": { - "container": { - "args": [ - "--metrics", - "{{$.inputs.artifacts['metrics'].uri}}", - "--metrics_explanation", - "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", - "--explanation", - "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--display_name", - "{{$.inputs.parameters['display_name']}}", - "--dataset_path", - "{{$.inputs.parameters['dataset_path']}}", - "--dataset_paths", - "{{$.inputs.parameters['dataset_paths']}}", - "--dataset_type", - "{{$.inputs.parameters['dataset_type']}}", - "--pipeline_job_id", - "{{$.pipeline_job_uuid}}", - "--pipeline_job_resource_name", - "{{$.pipeline_job_resource_name}}", - "--model_name", - "{{$.inputs.artifacts['model'].metadata['resourceName']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-evaluation-import-2": { - "container": { - "args": [ - "--metrics", - "{{$.inputs.artifacts['metrics'].uri}}", - "--metrics_explanation", - "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", - "--explanation", - "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", - "--problem_type", - "{{$.inputs.parameters['problem_type']}}", - "--display_name", - "{{$.inputs.parameters['display_name']}}", - "--dataset_path", - "{{$.inputs.parameters['dataset_path']}}", - "--dataset_paths", - "{{$.inputs.parameters['dataset_paths']}}", - "--dataset_type", - "{{$.inputs.parameters['dataset_type']}}", - "--pipeline_job_id", - "{{$.pipeline_job_uuid}}", - "--pipeline_job_resource_name", - "{{$.pipeline_job_resource_name}}", - "--model_name", - "{{$.inputs.artifacts['model'].metadata['resourceName']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-model-upload": { - "container": { - "args": [ - "--type", - "UploadModel", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "launcher" - ], - "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" - } - }, - "exec-model-upload-2": { - "container": { - "args": [ - "--type", - "UploadModel", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "launcher" - ], - "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" - } - }, - "exec-model-upload-3": { - "container": { - "args": [ - "--type", - "UploadModel", - "--payload", - "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--executor_input", - "{{$}}" - ], - "command": [ - "python3", - "-u", - "-m", - "launcher" - ], - "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" - } - }, - "exec-read-input-uri": { - "container": { - "args": [ - "--split-uri", - "{{$.inputs.artifacts['split_uri'].path}}", - "----output-paths", - "{{$.outputs.parameters['Output'].output_file}}" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" - ], - "image": "python:3.7-slim" - } - }, - "exec-read-input-uri-2": { - "container": { - "args": [ - "--split-uri", - "{{$.inputs.artifacts['split_uri'].path}}", - "----output-paths", - "{{$.outputs.parameters['Output'].output_file}}" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", - "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" - ], - "image": "python:3.7-slim" - } - }, - "exec-set-model-can-skip-validation": { - "container": { - "args": [ - "--executor_input", - "{{$}}", - "--function_to_execute", - "_set_model_can_skip_validation" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", - "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _set_model_can_skip_validation(model: Input[Artifact]):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n model: The model artifact.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import os\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n # create an empty CAN_SKIP_VALIDATION file\n with tf.io.gfile.GFile(os.path.join(model.uri, 'CAN_SKIP_VALIDATION'),\n 'w') as f:\n f.write('')\n\n" - ], - "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" - } - }, - "exec-tabular-stats-and-example-gen": { - "container": { - "args": [ - "--type", - "CustomJob", - "--project", - "{{$.inputs.parameters['project']}}", - "--location", - "{{$.inputs.parameters['location']}}", - "--gcp_resources", - "{{$.outputs.parameters['gcp_resources'].output_file}}", - "--payload", - "{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"stats_generator\",\"--train_spec={\\\"prediction_type\\\": \\\"{{$.inputs.parameters['prediction_type']}}\\\", \\\"target_column\\\": \\\"{{$.inputs.parameters['target_column_name']}}\\\", \\\"optimization_objective\\\": \\\"{{$.inputs.parameters['optimization_objective']}}\\\", \\\"weight_column_name\\\": \\\"{{$.inputs.parameters['weight_column_name']}}\\\", \\\"transformations\\\": {{$.inputs.parameters['transformations']}}}\", \"--transformations_override_path={{$.inputs.parameters['transformations_path']}}\", \"--split_spec={{$.inputs.parameters['split_spec']}}\", \"--data_source={{$.inputs.parameters['data_source']}}\", \"--data_source_csv_filenames={{$.inputs.parameters['data_source_csv_filenames']}}\", \"--data_source_bigquery_table_path={{$.inputs.parameters['data_source_bigquery_table_path']}}\", \"--predefined_split_key={{$.inputs.parameters['predefined_split_key']}}\", \"--timestamp_split_key={{$.inputs.parameters['timestamp_split_key']}}\", \"--stratified_split_key={{$.inputs.parameters['stratified_split_key']}}\", \"--training_fraction={{$.inputs.parameters['training_fraction']}}\", \"--validation_fraction={{$.inputs.parameters['validation_fraction']}}\", \"--test_fraction={{$.inputs.parameters['test_fraction']}}\", \"--target_column={{$.inputs.parameters['target_column_name']}}\", \"--request_type={{$.inputs.parameters['request_type']}}\", \"--optimization_objective_recall_value={{$.inputs.parameters['optimization_objective_recall_value']}}\", \"--optimization_objective_precision_value={{$.inputs.parameters['optimization_objective_precision_value']}}\", \"--example_gen_gcs_output_prefix={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", \"--dataset_stats_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", \"--stats_result_path={{$.outputs.artifacts['dataset_stats'].uri}}\", \"--dataset_schema_path={{$.outputs.artifacts['dataset_schema'].uri}}\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--additional_experiments={{$.inputs.parameters['additional_experiments']}}\", \"--metadata_path={{$.outputs.artifacts['metadata'].uri}}\", \"--train_split={{$.outputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.outputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.outputs.artifacts['test_split'].uri}}\", \"--test_split_for_batch_prediction_component={{$.outputs.parameters['test_split_json'].output_file}}\", \"--downsampled_test_split_for_batch_prediction_component={{$.outputs.parameters['downsampled_test_split_json'].output_file}}\", \"--instance_baseline_path={{$.outputs.artifacts['instance_baseline'].uri}}\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" - ], - "command": [ - "python3", - "-u", - "-m", - "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" - ], - "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" - } - }, - "exec-write-bp-result-path": { - "container": { - "args": [ - "--executor_input", - "{{$}}", - "--function_to_execute", - "_write_bp_result_path" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", - "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" - ], - "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" - } - }, - "exec-write-bp-result-path-2": { - "container": { - "args": [ - "--executor_input", - "{{$}}", - "--function_to_execute", - "_write_bp_result_path" - ], - "command": [ - "sh", - "-ec", - "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", - "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" - ], - "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" - } - } - } - }, - "pipelineInfo": { - "name": "automl-tabular-deprecated" - }, - "root": { - "dag": { - "outputs": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-2-evaluation_metrics", - "producerSubtask": "exit-handler-1" - } - ] - }, - "model-evaluation-3-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-3-evaluation_metrics", - "producerSubtask": "exit-handler-1" - } - ] - }, - "model-evaluation-4-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-4-evaluation_metrics", - "producerSubtask": "exit-handler-1" - } - ] - }, - "model-evaluation-evaluation_metrics": { - "artifactSelectors": [ - { - "outputArtifactKey": "model-evaluation-evaluation_metrics", - "producerSubtask": "exit-handler-1" - } - ] - } - } - }, - "tasks": { - "automl-tabular-finalizer": { - "componentRef": { - "name": "comp-automl-tabular-finalizer" - }, - "dependentTasks": [ - "exit-handler-1" - ], - "inputs": { - "parameters": { - "encryption_spec_key_name": { - "runtimeValue": { - "constantValue": { - "stringValue": "" - } - } - }, - "location": { - "componentInputParameter": "location" - }, - "project": { - "componentInputParameter": "project" - }, - "root_dir": { - "componentInputParameter": "root_dir" - } - } - }, - "taskInfo": { - "name": "automl-tabular-finalizer" - }, - "triggerPolicy": { - "strategy": "ALL_UPSTREAM_TASKS_COMPLETED" - } - }, - "exit-handler-1": { - "componentRef": { - "name": "comp-exit-handler-1" - }, - "inputs": { - "parameters": { - "pipelineparam--additional_experiments": { - "componentInputParameter": "additional_experiments" - }, - "pipelineparam--cv_trainer_worker_pool_specs_override": { - "componentInputParameter": "cv_trainer_worker_pool_specs_override" - }, - "pipelineparam--data_source": { - "componentInputParameter": "data_source" - }, - "pipelineparam--dataflow_service_account": { - "componentInputParameter": "dataflow_service_account" - }, - "pipelineparam--dataflow_subnetwork": { - "componentInputParameter": "dataflow_subnetwork" - }, - "pipelineparam--dataflow_use_public_ips": { - "componentInputParameter": "dataflow_use_public_ips" - }, - "pipelineparam--disable_early_stopping": { - "componentInputParameter": "disable_early_stopping" - }, - "pipelineparam--distill_batch_predict_machine_type": { - "componentInputParameter": "distill_batch_predict_machine_type" - }, - "pipelineparam--distill_batch_predict_max_replica_count": { - "componentInputParameter": "distill_batch_predict_max_replica_count" - }, - "pipelineparam--distill_batch_predict_starting_replica_count": { - "componentInputParameter": "distill_batch_predict_starting_replica_count" - }, - "pipelineparam--distill_stage_1_deadline_hours": { - "componentInputParameter": "distill_stage_1_deadline_hours" - }, - "pipelineparam--encryption_spec_key_name": { - "componentInputParameter": "encryption_spec_key_name" - }, - "pipelineparam--evaluation_batch_predict_machine_type": { - "componentInputParameter": "evaluation_batch_predict_machine_type" - }, - "pipelineparam--evaluation_batch_predict_max_replica_count": { - "componentInputParameter": "evaluation_batch_predict_max_replica_count" - }, - "pipelineparam--evaluation_batch_predict_starting_replica_count": { - "componentInputParameter": "evaluation_batch_predict_starting_replica_count" - }, - "pipelineparam--evaluation_dataflow_disk_size_gb": { - "componentInputParameter": "evaluation_dataflow_disk_size_gb" - }, - "pipelineparam--evaluation_dataflow_machine_type": { - "componentInputParameter": "evaluation_dataflow_machine_type" - }, - "pipelineparam--evaluation_dataflow_max_num_workers": { - "componentInputParameter": "evaluation_dataflow_max_num_workers" - }, - "pipelineparam--export_additional_model_without_custom_ops": { - "componentInputParameter": "export_additional_model_without_custom_ops" - }, - "pipelineparam--location": { - "componentInputParameter": "location" - }, - "pipelineparam--optimization_objective": { - "componentInputParameter": "optimization_objective" - }, - "pipelineparam--optimization_objective_precision_value": { - "componentInputParameter": "optimization_objective_precision_value" - }, - "pipelineparam--optimization_objective_recall_value": { - "componentInputParameter": "optimization_objective_recall_value" - }, - "pipelineparam--prediction_type": { - "componentInputParameter": "prediction_type" - }, - "pipelineparam--project": { - "componentInputParameter": "project" - }, - "pipelineparam--reduce_search_space_mode": { - "componentInputParameter": "reduce_search_space_mode" - }, - "pipelineparam--root_dir": { - "componentInputParameter": "root_dir" - }, - "pipelineparam--run_distillation": { - "componentInputParameter": "run_distillation" - }, - "pipelineparam--run_evaluation": { - "componentInputParameter": "run_evaluation" - }, - "pipelineparam--split_spec": { - "componentInputParameter": "split_spec" - }, - "pipelineparam--stage_1_deadline_hours": { - "componentInputParameter": "stage_1_deadline_hours" - }, - "pipelineparam--stage_1_num_parallel_trials": { - "componentInputParameter": "stage_1_num_parallel_trials" - }, - "pipelineparam--stage_1_num_selected_trials": { - "componentInputParameter": "stage_1_num_selected_trials" - }, - "pipelineparam--stage_1_single_run_max_secs": { - "componentInputParameter": "stage_1_single_run_max_secs" - }, - "pipelineparam--stage_1_tuner_worker_pool_specs_override": { - "componentInputParameter": "stage_1_tuner_worker_pool_specs_override" - }, - "pipelineparam--stage_2_deadline_hours": { - "componentInputParameter": "stage_2_deadline_hours" - }, - "pipelineparam--stage_2_num_parallel_trials": { - "componentInputParameter": "stage_2_num_parallel_trials" - }, - "pipelineparam--stage_2_num_selected_trials": { - "componentInputParameter": "stage_2_num_selected_trials" - }, - "pipelineparam--stage_2_single_run_max_secs": { - "componentInputParameter": "stage_2_single_run_max_secs" - }, - "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { - "componentInputParameter": "stats_and_example_gen_dataflow_disk_size_gb" - }, - "pipelineparam--stats_and_example_gen_dataflow_machine_type": { - "componentInputParameter": "stats_and_example_gen_dataflow_machine_type" - }, - "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { - "componentInputParameter": "stats_and_example_gen_dataflow_max_num_workers" - }, - "pipelineparam--study_spec_override": { - "componentInputParameter": "study_spec_override" - }, - "pipelineparam--target_column_name": { - "componentInputParameter": "target_column_name" - }, - "pipelineparam--transform_dataflow_disk_size_gb": { - "componentInputParameter": "transform_dataflow_disk_size_gb" - }, - "pipelineparam--transform_dataflow_machine_type": { - "componentInputParameter": "transform_dataflow_machine_type" - }, - "pipelineparam--transform_dataflow_max_num_workers": { - "componentInputParameter": "transform_dataflow_max_num_workers" - }, - "pipelineparam--transformations": { - "componentInputParameter": "transformations" - }, - "pipelineparam--weight_column_name": { - "componentInputParameter": "weight_column_name" - } - } - }, - "taskInfo": { - "name": "exit-handler-1" - } - } - } - }, - "inputDefinitions": { - "parameters": { - "additional_experiments": { - "type": "STRING" - }, - "cv_trainer_worker_pool_specs_override": { - "type": "STRING" - }, - "data_source": { - "type": "STRING" - }, - "dataflow_service_account": { - "type": "STRING" - }, - "dataflow_subnetwork": { - "type": "STRING" - }, - "dataflow_use_public_ips": { - "type": "STRING" - }, - "disable_early_stopping": { - "type": "STRING" - }, - "distill_batch_predict_machine_type": { - "type": "STRING" - }, - "distill_batch_predict_max_replica_count": { - "type": "INT" - }, - "distill_batch_predict_starting_replica_count": { - "type": "INT" - }, - "distill_stage_1_deadline_hours": { - "type": "DOUBLE" - }, - "encryption_spec_key_name": { - "type": "STRING" - }, - "evaluation_batch_predict_machine_type": { - "type": "STRING" - }, - "evaluation_batch_predict_max_replica_count": { - "type": "INT" - }, - "evaluation_batch_predict_starting_replica_count": { - "type": "INT" - }, - "evaluation_dataflow_disk_size_gb": { - "type": "INT" - }, - "evaluation_dataflow_machine_type": { - "type": "STRING" - }, - "evaluation_dataflow_max_num_workers": { - "type": "INT" - }, - "export_additional_model_without_custom_ops": { - "type": "STRING" - }, - "location": { - "type": "STRING" - }, - "optimization_objective": { - "type": "STRING" - }, - "optimization_objective_precision_value": { - "type": "DOUBLE" - }, - "optimization_objective_recall_value": { - "type": "DOUBLE" - }, - "prediction_type": { - "type": "STRING" - }, - "project": { - "type": "STRING" - }, - "reduce_search_space_mode": { - "type": "STRING" - }, - "root_dir": { - "type": "STRING" - }, - "run_distillation": { - "type": "STRING" - }, - "run_evaluation": { - "type": "STRING" - }, - "split_spec": { - "type": "STRING" - }, - "stage_1_deadline_hours": { - "type": "DOUBLE" - }, - "stage_1_num_parallel_trials": { - "type": "INT" - }, - "stage_1_num_selected_trials": { - "type": "INT" - }, - "stage_1_single_run_max_secs": { - "type": "INT" - }, - "stage_1_tuner_worker_pool_specs_override": { - "type": "STRING" - }, - "stage_2_deadline_hours": { - "type": "DOUBLE" - }, - "stage_2_num_parallel_trials": { - "type": "INT" - }, - "stage_2_num_selected_trials": { - "type": "INT" - }, - "stage_2_single_run_max_secs": { - "type": "INT" - }, - "stats_and_example_gen_dataflow_disk_size_gb": { - "type": "INT" - }, - "stats_and_example_gen_dataflow_machine_type": { - "type": "STRING" - }, - "stats_and_example_gen_dataflow_max_num_workers": { - "type": "INT" - }, - "study_spec_override": { - "type": "STRING" - }, - "target_column_name": { - "type": "STRING" - }, - "transform_dataflow_disk_size_gb": { - "type": "INT" - }, - "transform_dataflow_machine_type": { - "type": "STRING" - }, - "transform_dataflow_max_num_workers": { - "type": "INT" - }, - "transformations": { - "type": "STRING" - }, - "weight_column_name": { - "type": "STRING" - } - } - }, - "outputDefinitions": { - "artifacts": { - "model-evaluation-2-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-3-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-4-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - }, - "model-evaluation-evaluation_metrics": { - "artifactType": { - "schemaTitle": "system.Metrics", - "schemaVersion": "0.0.1" - } - } - } - } - }, - "schemaVersion": "2.0.0", - "sdkVersion": "kfp-1.8.11" - }, - "runtimeConfig": { - "parameters": { - "additional_experiments": { - "stringValue": "" - }, - "cv_trainer_worker_pool_specs_override": { - "stringValue": "" - }, - "dataflow_service_account": { - "stringValue": "" - }, - "dataflow_subnetwork": { - "stringValue": "" - }, - "dataflow_use_public_ips": { - "stringValue": "True" - }, - "disable_early_stopping": { - "stringValue": "False" - }, - "distill_batch_predict_machine_type": { - "stringValue": "n1-standard-16" - }, - "distill_batch_predict_max_replica_count": { - "intValue": "25" - }, - "distill_batch_predict_starting_replica_count": { - "intValue": "25" - }, - "distill_stage_1_deadline_hours": { - "doubleValue": 1.0 - }, - "encryption_spec_key_name": { - "stringValue": "" - }, - "evaluation_batch_predict_machine_type": { - "stringValue": "n1-standard-16" - }, - "evaluation_batch_predict_max_replica_count": { - "intValue": "25" - }, - "evaluation_batch_predict_starting_replica_count": { - "intValue": "25" - }, - "evaluation_dataflow_disk_size_gb": { - "intValue": "50" - }, - "evaluation_dataflow_machine_type": { - "stringValue": "n1-standard-4" - }, - "evaluation_dataflow_max_num_workers": { - "intValue": "25" - }, - "export_additional_model_without_custom_ops": { - "stringValue": "False" - }, - "optimization_objective_precision_value": { - "doubleValue": -1.0 - }, - "optimization_objective_recall_value": { - "doubleValue": -1.0 - }, - "reduce_search_space_mode": { - "stringValue": "regular" - }, - "run_distillation": { - "stringValue": "False" - }, - "run_evaluation": { - "stringValue": "False" - }, - "stage_1_tuner_worker_pool_specs_override": { - "stringValue": "" - }, - "stats_and_example_gen_dataflow_disk_size_gb": { - "intValue": "40" - }, - "stats_and_example_gen_dataflow_machine_type": { - "stringValue": "n1-standard-16" - }, - "stats_and_example_gen_dataflow_max_num_workers": { - "intValue": "25" - }, - "study_spec_override": { - "stringValue": "" - }, - "transform_dataflow_disk_size_gb": { - "intValue": "40" - }, - "transform_dataflow_machine_type": { - "stringValue": "n1-standard-16" - }, - "transform_dataflow_max_num_workers": { - "intValue": "25" - }, - "weight_column_name": { - "stringValue": "" - } - } - } -} \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py deleted file mode 100644 index 1afdbfa157..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Tabular Ensemble component spec.""" - -from typing import Optional - -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def automl_tabular_ensemble( - project: str, - location: str, - root_dir: str, - transform_output: Input[Artifact], - metadata: Input[Artifact], - dataset_schema: Input[Artifact], - tuning_result_input: Input[Artifact], - instance_baseline: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - model_architecture: Output[Artifact], - model: Output[Artifact], - unmanaged_container_model: Output[UnmanagedContainerModel], - model_without_custom_ops: Output[Artifact], - explanation_metadata: dsl.OutputPath(dict), - explanation_metadata_artifact: Output[Artifact], - explanation_parameters: dsl.OutputPath(dict), - warmup_data: Optional[Input[Dataset]] = None, - encryption_spec_key_name: Optional[str] = '', - export_additional_model_without_custom_ops: Optional[bool] = False, -): - # fmt: off - """Ensembles AutoML Tabular models. - - Args: - project: Project to run Cross-validation trainer. - location: Location for running the Cross-validation trainer. - root_dir: The Cloud Storage location to store the output. - transform_output: The transform output artifact. - metadata: The tabular example gen metadata. - dataset_schema: The schema of the dataset. - tuning_result_input: AutoML Tabular tuning - result. - instance_baseline: The instance baseline - used to calculate explanations. - warmup_data: The warm up data. Ensemble component will save the - warm up data together with the model artifact, used to warm up the model - when prediction server starts. - encryption_spec_key_name: Customer-managed encryption key. - export_additional_model_without_custom_ops: True if export - an additional model without custom TF operators to the - `model_without_custom_ops` output. - - Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - model_architecture: The architecture of the output model. - model: The output model. - model_without_custom_ops: The output model without custom TF operators, this output will be empty unless `export_additional_model_without_custom_ops` is set. - model_uri: The URI of the output model. - instance_schema_uri: The URI of the instance schema. - prediction_schema_uri: The URI of the prediction schema. - explanation_metadata: The explanation metadata used by Vertex online and batch explanations. - explanation_metadata: The explanation parameters used by Vertex online and batch explanations. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["ensemble", "--transform_output_path=', - transform_output.uri, - '", "--model_output_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model",' - ' "--custom_model_output_path=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/custom_model",' - ' "--error_file_path=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--export_custom_model=' - ), - export_additional_model_without_custom_ops, - '", "--metadata_path=', - metadata.uri, - '", "--dataset_schema_path=', - dataset_schema.uri, - '", "--tuning_result_input_path=', - tuning_result_input.uri, - '", "--instance_baseline_path=', - instance_baseline.uri, - '", "--warmup_data=', - warmup_data.uri, - '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - '", "--model_path=', - model.uri, - '", "--custom_model_path=', - model_without_custom_ops.uri, - '", "--explanation_metadata_path=', - explanation_metadata, - ',', - explanation_metadata_artifact.uri, - '", "--explanation_parameters_path=', - explanation_parameters, - '", "--model_architecture_path=', - model_architecture.uri, - ( - '", "--use_json=true",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py deleted file mode 100644 index ea36d7d297..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Pipeline Finalizer component spec.""" - -from typing import Optional - -from kfp import dsl - - -@dsl.container_component -def automl_tabular_finalizer( - project: str, - location: str, - root_dir: str, - gcp_resources: dsl.OutputPath(str), - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Finalizes AutoML Tabular pipelines. - - Args: - project: Project to run Cross-validation trainer. - location: Location for running the Cross-validation trainer. - root_dir: The Cloud Storage location to store the output. - encryption_spec_key_name: Customer-managed encryption key. - - Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-finalizer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["cancel_l2l_tuner", "--error_file_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--cleanup_lro_job_infos=' - ), - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro"' + ']}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py deleted file mode 100644 index 8fc6b00ec9..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Infra Validator component spec.""" - -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel -from kfp import dsl -from kfp.dsl import Input - - -@dsl.container_component -def automl_tabular_infra_validator( - unmanaged_container_model: Input[UnmanagedContainerModel], # pylint: disable=unused-argument -): - # fmt: off - """Validates the trained AutoML Tabular model is a valid model. - - Args: - unmanaged_container_model: google.UnmanagedContainerModel for model - to be validated. - """ - # fmt: on - - return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', - command=[], - args=['--executor_input', '{{$}}'], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py deleted file mode 100644 index 29091ded20..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Split Materialized Data component spec.""" - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def split_materialized_data( - materialized_data: Input[Dataset], - materialized_train_split: Output[Artifact], - materialized_eval_split: Output[Artifact], - materialized_test_split: Output[Artifact], -): - # fmt: off - """Splits materialized dataset into train, eval, and test data splits. - - The materialized dataset generated by the Feature Transform Engine consists of - all the splits - that were combined into the input transform dataset (i.e., train, eval, and - test splits). - This components splits the output materialized dataset into corresponding - materialized data splits - so that the splits can be used by down-stream training or evaluation - components. - - Args: - materialized_data: Materialized dataset output by the Feature - Transform Engine. - - Returns: - materialized_train_split: Path patern to materialized train split. - materialized_eval_split: Path patern to materialized eval split. - materialized_test_split: Path patern to materialized test split. - """ - # fmt: on - - return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - command=[ - 'sh', - '-ec', - ( - 'program_path=$(mktemp -d)\nprintf "%s" "$0" >' - ' "$program_path/ephemeral_component.py"\npython3 -m' - ' kfp.components.executor_main ' - ' --component_module_path ' - ' "$program_path/ephemeral_component.py" ' - ' "$@"\n' - ), - ( - '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom' - ' typing import *\n\ndef _split_materialized_data(\n ' - ' materialized_data: Input[Dataset],\n ' - " materialized_train_split: OutputPath('MaterializedSplit'),\n " - " materialized_eval_split: OutputPath('MaterializedSplit'),\n " - " materialized_test_split: OutputPath('MaterializedSplit')):\n " - ' """Splits materialized_data into materialized_data test,' - ' train, and eval splits.\n\n Necessary adapter between FTE' - ' pipeline and trainer.\n\n Args:\n materialized_data:' - ' materialized_data dataset output by FTE.\n ' - ' materialized_train_split: Path patern to' - ' materialized_train_split.\n materialized_eval_split: Path' - ' patern to materialized_eval_split.\n ' - ' materialized_test_split: Path patern to' - ' materialized_test_split.\n """\n # pylint:' - ' disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n' - ' import json\n import tensorflow as tf\n # pylint:' - ' enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n' - " with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n " - ' artifact_path = f.read()\n\n # needed to import tf because' - ' this is a path in gs://\n with' - " tf.io.gfile.GFile(artifact_path, 'r') as f:\n " - ' materialized_data_json = json.load(f)\n\n if' - " 'tf_record_data_source' in materialized_data_json:\n " - ' file_patterns =' - " materialized_data_json['tf_record_data_source'][\n " - " 'file_patterns']\n elif 'avro_data_source' in" - ' materialized_data_json:\n file_patterns =' - " materialized_data_json['avro_data_source'][\n " - " 'file_patterns']\n elif 'parquet_data_source' in" - ' materialized_data_json:\n file_patterns =' - " materialized_data_json['parquet_data_source'][\n " - " 'file_patterns']\n else:\n raise ValueError(f'Unsupported" - " training data source: {materialized_data_json}')\n\n # we map" - ' indices to file patterns based on the ordering of insertion' - ' order\n # in our transform_data (see above in' - ' _generate_analyze_and_transform_data)\n with' - " tf.io.gfile.GFile(materialized_train_split, 'w') as f:\n " - ' f.write(file_patterns[0])\n\n with' - " tf.io.gfile.GFile(materialized_eval_split, 'w') as f:\n " - ' f.write(file_patterns[1])\n\n with' - " tf.io.gfile.GFile(materialized_test_split, 'w') as f:\n " - ' f.write(file_patterns[2])\n\n' - ), - ], - args=[ - '--executor_input', - '{{$}}', - '--function_to_execute', - '_split_materialized_data', - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py deleted file mode 100644 index 095837620d..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Tabular Stage 1 Tuner component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def automl_tabular_stage_1_tuner( - project: str, - location: str, - root_dir: str, - num_selected_trials: int, - deadline_hours: float, - num_parallel_trials: int, - single_run_max_secs: int, - metadata: Input[Artifact], - transform_output: Input[Artifact], - materialized_train_split: Input[Artifact], - materialized_eval_split: Input[Artifact], - gcp_resources: dsl.OutputPath(str), - tuning_result_output: Output[Artifact], - execution_metrics: dsl.OutputPath(dict), - study_spec_parameters_override: Optional[list] = [], - worker_pool_specs_override_json: Optional[list] = [], - reduce_search_space_mode: Optional[str] = 'regular', - num_selected_features: Optional[int] = 0, - disable_early_stopping: Optional[bool] = False, - feature_ranking: Optional[Input[Artifact]] = None, - tune_feature_selection_rate: Optional[bool] = False, - encryption_spec_key_name: Optional[str] = '', - run_distillation: Optional[bool] = False, -): - # fmt: off - """Searches AutoML Tabular architectures and selects the top trials. - - Args: - project: Project to run Cross-validation trainer. - location: Location for running the Cross-validation trainer. - root_dir: The Cloud Storage location to store the output. - study_spec_parameters_override: JSON study spec. E.g., - [{"parameter_id": "model_type","categorical_value_spec": {"values": - ["nn"]}}] - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - reduce_search_space_mode: The reduce search space mode. Possible - values: "regular" (default), "minimal", "full". - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - num_selected_features: Number of selected features. The number of - features to learn in the NN models. - deadline_hours: Number of hours the cross-validation trainer - should run. - disable_early_stopping: True if disable early stopping. Default - value is false. - num_parallel_trials: Number of parallel training trials. - single_run_max_secs: Max number of seconds each training trial runs. - metadata: The tabular example gen metadata. - transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. - materialized_eval_split: The materialized eval split. - encryption_spec_key_name: Customer-managed encryption key. - run_distillation: True if in distillation mode. The default value - is false. - - Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - tuning_result_output: The trained model and architectures. - execution_metrics: Core metrics in dictionary of component execution. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', - transform_output.uri, - '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "--feature_selection_result_path=', - feature_ranking.uri, - '", "--disable_early_stopping=', - disable_early_stopping, - '", "--tune_feature_selection_rate=', - tune_feature_selection_rate, - '", "--reduce_search_space_mode=', - reduce_search_space_mode, - ( - f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "--training_base_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' - ' "--num_parallel_trial=' - ), - num_parallel_trials, - '", "--single_run_max_secs=', - single_run_max_secs, - '", "--deadline_hours=', - deadline_hours, - '", "--num_selected_trials=', - num_selected_trials, - '", "--num_selected_features=', - num_selected_features, - '", "--lro_job_info=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' - ' "--error_file_path=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--metadata_path=' - ), - metadata.uri, - '", "--materialized_train_split=', - materialized_train_split.uri, - '", "--materialized_eval_split=', - materialized_eval_split.uri, - '", "--is_distill=', - run_distillation, - '", "--tuning_result_output_path=', - tuning_result_output.uri, - '", "--kms_key_name=', - encryption_spec_key_name, - '", "--gcp_resources_path=', - gcp_resources, - '", "--execution_metrics_path=', - execution_metrics, - ( - '", "--use_json=true", "--log_level=ERROR",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py deleted file mode 100644 index 6c7e915dbe..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Stats and Example Generation component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Output - - -@dsl.container_component -def tabular_stats_and_example_gen( - project: str, - location: str, - root_dir: str, - target_column_name: str, - prediction_type: str, - transformations: str, - dataset_schema: Output[Artifact], - dataset_stats: Output[Artifact], - train_split: Output[Dataset], - eval_split: Output[Dataset], - test_split: Output[Dataset], - test_split_json: dsl.OutputPath(list), - downsampled_test_split_json: dsl.OutputPath(list), - instance_baseline: Output[Artifact], - metadata: Output[Artifact], - gcp_resources: dsl.OutputPath(str), - weight_column_name: Optional[str] = '', - optimization_objective: Optional[str] = '', - optimization_objective_recall_value: Optional[float] = -1, - optimization_objective_precision_value: Optional[float] = -1, - transformations_path: Optional[str] = '', - request_type: Optional[str] = 'COLUMN_STATS_ONLY', - dataflow_machine_type: Optional[str] = 'n1-standard-16', - dataflow_max_num_workers: Optional[int] = 25, - dataflow_disk_size_gb: Optional[int] = 40, - dataflow_subnetwork: Optional[str] = '', - dataflow_use_public_ips: Optional[bool] = True, - dataflow_service_account: Optional[str] = '', - encryption_spec_key_name: Optional[str] = '', - run_distillation: Optional[bool] = False, - additional_experiments: Optional[str] = '', - additional_experiments_json: Optional[dict] = {}, - data_source_csv_filenames: Optional[str] = '', - data_source_bigquery_table_path: Optional[str] = '', - predefined_split_key: Optional[str] = '', - timestamp_split_key: Optional[str] = '', - stratified_split_key: Optional[str] = '', - training_fraction: Optional[float] = -1, - validation_fraction: Optional[float] = -1, - test_fraction: Optional[float] = -1, - quantiles: Optional[list] = [], - enable_probabilistic_inference: Optional[bool] = False, -): - # fmt: off - """Generates stats and training instances for tabular data. - - Args: - project: Project to run dataset statistics and example - generation. - location: Location for running dataset statistics and example - generation. - root_dir: The Cloud Storage location to store the output. - target_column_name: The target column name. - weight_column_name: The weight column name. - prediction_type: The prediction type. Supported values: - "classification", "regression". - optimization_objective: Objective function the model is optimizing - towards. The training process creates a model that maximizes/minimizes - the value of the objective function over the validation set. The - supported optimization objectives depend on the prediction type. If the - field is not set, a default objective function is used. - classification: "maximize-au-roc" (default) - Maximize the - area under the receiver operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a - specified precision value. - classification (multi-class): "minimize-log-loss" (default) - Minimize - log loss. - regression: "minimize-rmse" (default) - Minimize root-mean-squared - error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - optimization_objective_recall_value: Required when - optimization_objective is "maximize-precision-at-recall". Must be - between 0 and 1, inclusive. - optimization_objective_precision_value: Required when - optimization_objective is "maximize-recall-at-precision". Must be - between 0 and 1, inclusive. - transformations: Quote escaped JSON string for transformations. Each - transformation will apply transform function to given input column. And - the result will be used for training. When creating transformation for - BigQuery Struct column, the column should be flattened using "." as the - delimiter. - transformations_path: Path to a GCS file containing JSON - string for transformations. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. - encryption_spec_key_name: Customer-managed encryption key. - run_distillation: True if in distillation mode. The default value - is false. - - Returns: - dataset_schema: The schema of the dataset. - dataset_stats: The stats of the dataset. - train_split: The train split. - eval_split: The eval split. - test_split: The test split. - test_split_json: The test split JSON object. - downsampled_test_split_json: The downsampled test split JSON object. - instance_baseline: The instance baseline used to calculate explanations. - metadata: The tabular example gen metadata. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - '", "args": ["stats_generator",', - '"--train_spec={\\"prediction_type\\": \\"', - prediction_type, - '\\", \\"target_column\\": \\"', - target_column_name, - '\\", \\"optimization_objective\\": \\"', - optimization_objective, - '\\", \\"weight_column_name\\": \\"', - weight_column_name, - '\\", \\"transformations\\": ', - transformations, - ', \\"quantiles\\": ', - quantiles, - ', \\"enable_probabilistic_inference\\": ', - enable_probabilistic_inference, - '}", "--transformations_override_path=', - transformations_path, - '", "--data_source_csv_filenames=', - data_source_csv_filenames, - '", "--data_source_bigquery_table_path=', - data_source_bigquery_table_path, - '", "--predefined_split_key=', - predefined_split_key, - '", "--timestamp_split_key=', - timestamp_split_key, - '", "--stratified_split_key=', - stratified_split_key, - '", "--training_fraction=', - training_fraction, - '", "--validation_fraction=', - validation_fraction, - '", "--test_fraction=', - test_fraction, - '", "--target_column=', - target_column_name, - '", "--request_type=', - request_type, - '", "--optimization_objective_recall_value=', - optimization_objective_recall_value, - '", "--optimization_objective_precision_value=', - optimization_objective_precision_value, - '", "--example_gen_gcs_output_prefix=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/example_gen_output",' - ' "--dataset_stats_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/stats/",' - ' "--stats_result_path=' - ), - dataset_stats.uri, - '", "--dataset_schema_path=', - dataset_schema.uri, - ( - f'", "--job_name=tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' - ), - '", "--dataflow_project=', - project, - '", "--error_file_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--dataflow_staging_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' - ' "--dataflow_tmp_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' - ' "--dataflow_max_num_workers=' - ), - dataflow_max_num_workers, - '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - '", "--dataflow_machine_type=', - dataflow_machine_type, - '", "--dataflow_disk_size_gb=', - dataflow_disk_size_gb, - '", "--dataflow_kms_key=', - encryption_spec_key_name, - '", "--dataflow_subnetwork_fully_qualified=', - dataflow_subnetwork, - '", "--dataflow_use_public_ips=', - dataflow_use_public_ips, - '", "--dataflow_service_account=', - dataflow_service_account, - '", "--is_distill=', - run_distillation, - '", "--additional_experiments=', - additional_experiments, - '", "--metadata_path=', - metadata.uri, - '", "--train_split=', - train_split.uri, - '", "--eval_split=', - eval_split.uri, - '", "--test_split=', - test_split.uri, - '", "--test_split_for_batch_prediction_component=', - test_split_json, - ( - '", "--downsampled_test_split_for_batch_prediction_component=' - ), - downsampled_test_split_json, - '", "--instance_baseline_path=', - instance_baseline.uri, - '", "--lro_job_info=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' - ' "--gcp_resources_path=' - ), - gcp_resources, - ( - '", "--parse_json=true",' - ' "--generate_additional_downsample_test_split=true",' - ' "--executor_input={{$.json_escape[1]}}"]}}]}}' - ), - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py deleted file mode 100644 index d4ff9c5473..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Training Configurator and Validator component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def training_configurator_and_validator( - dataset_stats: Input[Artifact], - split_example_counts: str, - training_schema: Input[Artifact], - instance_schema: Input[Artifact], - metadata: Output[Artifact], - instance_baseline: Output[Artifact], - target_column: Optional[str] = '', - weight_column: Optional[str] = '', - prediction_type: Optional[str] = '', - optimization_objective: Optional[str] = '', - optimization_objective_recall_value: Optional[float] = -1, - optimization_objective_precision_value: Optional[float] = -1, - run_evaluation: Optional[bool] = False, - run_distill: Optional[bool] = False, - enable_probabilistic_inference: Optional[bool] = False, - time_series_identifier_column: Optional[str] = '', - time_column: Optional[str] = '', - time_series_attribute_columns: Optional[list] = [], - available_at_forecast_columns: Optional[list] = [], - unavailable_at_forecast_columns: Optional[list] = [], - quantiles: Optional[list] = [], - context_window: Optional[int] = -1, - forecast_horizon: Optional[int] = -1, - forecasting_model_type: Optional[str] = '', - forecasting_transformations: Optional[dict] = {}, - stage_1_deadline_hours: Optional[float] = None, - stage_2_deadline_hours: Optional[float] = None, - group_columns: Optional[list] = None, - group_total_weight: float = 0.0, - temporal_total_weight: float = 0.0, - group_temporal_total_weight: float = 0.0, -): - # fmt: off - """Configures training and validates data and user-input configurations. - - Args: - dataset_stats: Dataset stats generated by - feature transform engine. - split_example_counts: JSON string of data split example counts for - train, validate, and test splits. - training_schema_path: Schema of input data to the tf_model - at training time. - instance_schema: Schema of input data to the tf_model at - serving time. - target_column: Target column of input data. - weight_column: Weight column of input data. - prediction_type: Model prediction type. One of "classification", - "regression", "time_series". - optimization_objective: Objective function the model is optimizing - towards. The training process creates a model that maximizes/minimizes - the value of the objective function over the validation set. The - supported optimization objectives depend on the prediction type. If the - field is not set, a default objective function is used. - classification: "maximize-au-roc" (default) - Maximize the - area under the receiver operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a - specified precision value. - classification (multi-class): "minimize-log-loss" (default) - Minimize - log loss. - regression: "minimize-rmse" (default) - Minimize root-mean-squared - error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - optimization_objective_recall_value: Required when - optimization_objective is "maximize-precision-at-recall". Must be - between 0 and 1, inclusive. - optimization_objective_precision_value: Required when - optimization_objective is "maximize-recall-at-precision". Must be - between 0 and 1, inclusive. - run_evaluation: Whether we are running evaluation in the training - pipeline. - run_distill: Whether the distillation should be applied to the - training. - enable_probabilistic_inference: If probabilistic inference is - enabled, the model will fit a distribution that captures the uncertainty - of a prediction. At inference time, the predictive distribution is used - to make a point prediction that minimizes the optimization objective. - For example, the mean of a predictive distribution is the point - prediction that minimizes RMSE loss. If quantiles are specified, then - the quantiles of the distribution are also returned. - time_series_identifier_column: Time series idenfier column. Used by - forecasting only. - time_column: The column that indicates the time. Used by forecasting - only. - time_series_attribute_columns: The column names of the time series - attributes. - available_at_forecast_columns: The names of the columns that are - available at forecast time. - unavailable_at_forecast_columns: The names of the columns that are - not available at forecast time. - quantiles: All quantiles that the model need to predict. - context_window: The length of the context window. - forecast_horizon: The length of the forecast horizon. - forecasting_model_type: The model types, e.g. l2l, seq2seq, tft. - forecasting_transformations: Dict mapping auto and/or type-resolutions to - feature columns. The supported types are auto, categorical, numeric, - text, and timestamp. - stage_1_deadline_hours: Stage 1 training budget in - hours. - stage_2_deadline_hours: Stage 2 training budget in - hours. - group_columns: A list of time series attribute column - names that define the time series hierarchy. - group_total_weight: The weight of the loss for - predictions aggregated over time series in the same group. - temporal_total_weight: The weight of the loss for - predictions aggregated over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for - predictions aggregated over both the horizon and time series in the same - hierarchy group. - - Returns: - metadata: The tabular example gen metadata. - """ - # fmt: on - - return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', - command=[], - args=[ - 'training_configurator_and_validator', - dsl.ConcatPlaceholder( - items=['--instance_schema_path=', instance_schema.uri] - ), - dsl.ConcatPlaceholder( - items=['--training_schema_path=', training_schema.uri] - ), - dsl.ConcatPlaceholder( - items=['--dataset_stats_path=', dataset_stats.uri] - ), - dsl.ConcatPlaceholder( - items=['--split_example_counts=', split_example_counts] - ), - dsl.ConcatPlaceholder(items=['--target_column=', target_column]), - dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), - dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), - dsl.ConcatPlaceholder( - items=['--optimization_objective=', optimization_objective] - ), - dsl.ConcatPlaceholder( - items=[ - '--optimization_objective_recall_value=', - optimization_objective_recall_value, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--optimization_objective_precision_value=', - optimization_objective_precision_value, - ] - ), - dsl.ConcatPlaceholder(items=['--metadata_path=', metadata.uri]), - dsl.ConcatPlaceholder( - items=['--instance_baseline_path=', instance_baseline.uri] - ), - dsl.ConcatPlaceholder(items=['--run_evaluation=', run_evaluation]), - dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), - dsl.ConcatPlaceholder( - items=[ - '--enable_probabilistic_inference=', - enable_probabilistic_inference, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--time_series_identifier_column=', - time_series_identifier_column, - ] - ), - dsl.ConcatPlaceholder(items=['--time_column=', time_column]), - dsl.ConcatPlaceholder( - items=[ - '--time_series_attribute_columns=', - time_series_attribute_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--available_at_forecast_columns=', - available_at_forecast_columns, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--unavailable_at_forecast_columns=', - unavailable_at_forecast_columns, - ] - ), - dsl.IfPresentPlaceholder( - input_name='quantiles', - then=dsl.ConcatPlaceholder( - items=[ - '--quantiles=', - quantiles, - ] - ), - ), - dsl.ConcatPlaceholder(items=['--context_window=', context_window]), - dsl.ConcatPlaceholder( - items=['--forecast_horizon=', forecast_horizon] - ), - dsl.ConcatPlaceholder( - items=['--forecasting_model_type=', forecasting_model_type] - ), - dsl.ConcatPlaceholder( - items=[ - '--forecasting_transformations=', - forecasting_transformations, - ] - ), - dsl.IfPresentPlaceholder( - input_name='stage_1_deadline_hours', - then=dsl.ConcatPlaceholder( - items=[ - '--stage_1_deadline_hours=', - stage_1_deadline_hours, - ] - ), - ), - dsl.IfPresentPlaceholder( - input_name='stage_2_deadline_hours', - then=dsl.ConcatPlaceholder( - items=[ - '--stage_2_deadline_hours=', - stage_2_deadline_hours, - ] - ), - ), - dsl.IfPresentPlaceholder( - input_name='group_columns', - then=dsl.ConcatPlaceholder( - items=['--group_columns=', group_columns] - ), - ), - dsl.IfPresentPlaceholder( - input_name='group_total_weight', - then=dsl.ConcatPlaceholder( - items=['--group_total_weight=', group_total_weight] - ), - ), - dsl.IfPresentPlaceholder( - input_name='temporal_total_weight', - then=dsl.ConcatPlaceholder( - items=['--temporal_total_weight=', temporal_total_weight] - ), - ), - dsl.IfPresentPlaceholder( - input_name='group_temporal_total_weight', - then=dsl.ConcatPlaceholder( - items=[ - '--group_temporal_total_weight=', - group_temporal_total_weight, - ] - ), - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py deleted file mode 100644 index c9ab7ef401..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""AutoML Transform component spec.""" - -from typing import Optional - -from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Input -from kfp.dsl import Output - - -@dsl.container_component -def automl_tabular_transform( - project: str, - location: str, - root_dir: str, - metadata: Input[Artifact], - dataset_schema: Input[Artifact], - train_split: Input[Dataset], - eval_split: Input[Dataset], - test_split: Input[Dataset], - materialized_train_split: Output[Artifact], - materialized_eval_split: Output[Artifact], - materialized_test_split: Output[Artifact], - training_schema_uri: Output[Artifact], - transform_output: Output[Artifact], - gcp_resources: dsl.OutputPath(str), - dataflow_machine_type: Optional[str] = 'n1-standard-16', - dataflow_max_num_workers: Optional[int] = 25, - dataflow_disk_size_gb: Optional[int] = 40, - dataflow_subnetwork: Optional[str] = '', - dataflow_use_public_ips: Optional[bool] = True, - dataflow_service_account: Optional[str] = '', - encryption_spec_key_name: Optional[str] = '', -): - # fmt: off - """Transforms raw features to engineered features. - - Args: - project: Project to run Cross-validation trainer. - location: Location for running the Cross-validation trainer. - root_dir: The Cloud Storage location to store the output. - metadata: The tabular example gen metadata. - dataset_schema: The schema of the dataset. - train_split: The train split. - eval_split: The eval split. - test_split: The test split. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. - encryption_spec_key_name: Customer-managed encryption key. - - Returns: - materialized_train_split: The materialized train split. - materialized_eval_split: The materialized eval split. - materialized_eval_split: The materialized test split. - training_schema_uri: The training schema. - transform_output: The transform output artifact. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - """ - # fmt: on - - return dsl.ContainerSpec( - image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', - command=[ - 'python3', - '-u', - '-m', - 'google_cloud_pipeline_components.container.v1.custom_job.launcher', - ], - args=[ - '--type', - 'CustomJob', - '--project', - project, - '--location', - location, - '--gcp_resources', - gcp_resources, - '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', - ( - '", "args": ["transform", "--is_mp=true",' - ' "--transform_output_artifact_path=' - ), - transform_output.uri, - '", "--transform_output_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform",' - ' "--materialized_splits_output_path=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform_materialized",' - ' "--metadata_path=' - ), - metadata.uri, - '", "--dataset_schema_path=', - dataset_schema.uri, - '", "--train_split=', - train_split.uri, - '", "--eval_split=', - eval_split.uri, - '", "--test_split=', - test_split.uri, - '", "--materialized_train_split=', - materialized_train_split.uri, - '", "--materialized_eval_split=', - materialized_eval_split.uri, - '", "--materialized_test_split=', - materialized_test_split.uri, - '", "--training_schema_path=', - training_schema_uri.uri, - ( - f'", "--job_name=automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' - ), - '", "--dataflow_project=', - project, - '", "--error_file_path=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' - ' "--dataflow_staging_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' - ' "--dataflow_tmp_dir=' - ), - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' - ' "--dataflow_max_num_workers=' - ), - dataflow_max_num_workers, - '", "--dataflow_machine_type=', - dataflow_machine_type, - '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - '", "--dataflow_disk_size_gb=', - dataflow_disk_size_gb, - '", "--dataflow_subnetwork_fully_qualified=', - dataflow_subnetwork, - '", "--dataflow_use_public_ips=', - dataflow_use_public_ips, - '", "--dataflow_kms_key=', - encryption_spec_key_name, - '", "--dataflow_service_account=', - dataflow_service_account, - '", "--lro_job_info=', - root_dir, - ( - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' - ' "--gcp_resources_path=' - ), - gcp_resources, - '"]}}]}}', - ] - ), - ], - ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py deleted file mode 100644 index 2c19976e47..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py +++ /dev/null @@ -1,1435 +0,0 @@ -"""Util functions for AutoML Tabular pipeline.""" - -import json -import math -import os -import pathlib -from typing import Any, Dict, List, Optional, Tuple -import warnings - -_DEFAULT_NUM_PARALLEL_TRAILS = 35 -_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 -_NUM_FOLDS = 5 -_DISTILL_TOTAL_TRIALS = 100 -_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' -_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 -_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 -_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' -_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 -_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 -_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' -_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 -_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 -_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 - -# Needed because we reference the AutoML Tabular V2 pipeline. -_GCPC_STAGING_PATH = pathlib.Path( - __file__ -).parent.parent.parent.parent.resolve() -_GCPC_PREVIEW_TABULAR_PATH = ( - _GCPC_STAGING_PATH / 'preview' / 'automl' / 'tabular' -) - - -# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag -# to signify FTE usage instead of the presence of num_selected_features. -def _get_default_pipeline_params( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: Optional[int] = None, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[float] = None, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - max_selected_features: Optional[int] = None, - apply_feature_selection_tuning: bool = False, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - run_distillation: bool = False, - distill_batch_predict_machine_type: Optional[str] = None, - distill_batch_predict_starting_replica_count: Optional[int] = None, - distill_batch_predict_max_replica_count: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - quantiles: Optional[List[float]] = None, - enable_probabilistic_inference: bool = False, - num_selected_features: Optional[int] = None, - model_display_name: str = '', - model_description: str = '', -) -> Dict[str, Any]: - """Get the AutoML Tabular v1 default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The path to a GCS file containing the transformations to - apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - study_spec_parameters_override: The list for overriding study spec. The list - should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - max_selected_features: number of features to select for training, - apply_feature_selection_tuning: tuning feature selection rate if true. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. At inference time, the predictive distribution is used to make - a point prediction that minimizes the optimization objective. For example, - the mean of a predictive distribution is the point prediction that - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - distribution are also returned. - num_selected_features: Number of selected features for feature selection, - defaults to None, in which case all features are used. If specified, - enable_probabilistic_inference and run_distillation cannot be enabled. - model_display_name: The display name of the uploaded Vertex model. - model_description: The description for the uploaded model. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - if not study_spec_parameters_override: - study_spec_parameters_override = [] - if not stage_1_tuner_worker_pool_specs_override: - stage_1_tuner_worker_pool_specs_override = [] - if not cv_trainer_worker_pool_specs_override: - cv_trainer_worker_pool_specs_override = [] - if not quantiles: - quantiles = [] - - parameter_values = {} - parameters = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column': target_column, - 'prediction_type': prediction_type, - 'data_source_csv_filenames': data_source_csv_filenames, - 'data_source_bigquery_table_path': data_source_bigquery_table_path, - 'predefined_split_key': predefined_split_key, - 'timestamp_split_key': timestamp_split_key, - 'stratified_split_key': stratified_split_key, - 'training_fraction': training_fraction, - 'validation_fraction': validation_fraction, - 'test_fraction': test_fraction, - 'optimization_objective': optimization_objective, - 'train_budget_milli_node_hours': train_budget_milli_node_hours, - 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, - 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, - 'stage_2_num_selected_trials': stage_2_num_selected_trials, - 'weight_column': weight_column, - 'optimization_objective_recall_value': ( - optimization_objective_recall_value - ), - 'optimization_objective_precision_value': ( - optimization_objective_precision_value - ), - 'study_spec_parameters_override': study_spec_parameters_override, - 'stage_1_tuner_worker_pool_specs_override': ( - stage_1_tuner_worker_pool_specs_override - ), - 'cv_trainer_worker_pool_specs_override': ( - cv_trainer_worker_pool_specs_override - ), - 'export_additional_model_without_custom_ops': ( - export_additional_model_without_custom_ops - ), - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'dataflow_service_account': dataflow_service_account, - 'encryption_spec_key_name': encryption_spec_key_name, - 'max_selected_features': max_selected_features, - 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, - 'quantiles': quantiles, - 'enable_probabilistic_inference': enable_probabilistic_inference, - 'model_display_name': model_display_name, - 'model_description': model_description, - } - parameter_values.update( - {param: value for param, value in parameters.items() if value is not None} - ) - - if run_evaluation: - eval_parameters = { - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_batch_explain_machine_type': ( - evaluation_batch_explain_machine_type - ), - 'evaluation_batch_explain_starting_replica_count': ( - evaluation_batch_explain_starting_replica_count - ), - 'evaluation_batch_explain_max_replica_count': ( - evaluation_batch_explain_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_starting_num_workers': ( - evaluation_dataflow_starting_num_workers - ), - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'run_evaluation': run_evaluation, - } - parameter_values.update( - { - param: value - for param, value in eval_parameters.items() - if value is not None - } - ) - - # V1 pipeline without FTE - if num_selected_features is None: - if not additional_experiments: - additional_experiments = {} - - parameters = { - 'transformations': transformations, - 'stats_and_example_gen_dataflow_machine_type': ( - stats_and_example_gen_dataflow_machine_type - ), - 'stats_and_example_gen_dataflow_max_num_workers': ( - stats_and_example_gen_dataflow_max_num_workers - ), - 'stats_and_example_gen_dataflow_disk_size_gb': ( - stats_and_example_gen_dataflow_disk_size_gb - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': ( - transform_dataflow_max_num_workers - ), - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'additional_experiments': additional_experiments, - } - parameter_values.update( - { - param: value - for param, value in parameters.items() - if value is not None - } - ) - - if apply_feature_selection_tuning: - parameter_values.update({ - 'apply_feature_selection_tuning': apply_feature_selection_tuning, - }) - - if run_distillation: - distillation_parameters = { - 'distill_batch_predict_machine_type': ( - distill_batch_predict_machine_type - ), - 'distill_batch_predict_starting_replica_count': ( - distill_batch_predict_starting_replica_count - ), - 'distill_batch_predict_max_replica_count': ( - distill_batch_predict_max_replica_count - ), - 'run_distillation': run_distillation, - } - parameter_values.update( - { - param: value - for param, value in distillation_parameters.items() - if value is not None - } - ) - - # V2 pipeline (with FTE) - else: - if run_distillation: - raise ValueError( - 'Distillation is currently not supported' - ' when num_selected_features is specified.' - ) - - parameters = { - 'num_selected_features': num_selected_features, - 'dataset_level_custom_transformation_definitions': [], - 'dataset_level_transformations': [], - 'tf_auto_transform_features': {}, - 'tf_custom_transformation_definitions': [], - 'legacy_transformations_path': transformations, - 'feature_transform_engine_dataflow_machine_type': ( - transform_dataflow_machine_type - ), - 'feature_transform_engine_dataflow_max_num_workers': ( - transform_dataflow_max_num_workers - ), - 'feature_transform_engine_dataflow_disk_size_gb': ( - transform_dataflow_disk_size_gb - ), - } - parameter_values.update( - { - param: value - for param, value in parameters.items() - if value is not None - } - ) - - return parameter_values - - -def get_automl_tabular_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: Optional[int] = None, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, - run_distillation: bool = False, - distill_batch_predict_machine_type: Optional[str] = None, - distill_batch_predict_starting_replica_count: Optional[int] = None, - distill_batch_predict_max_replica_count: Optional[int] = None, - stage_1_tuning_result_artifact_uri: Optional[str] = None, - quantiles: Optional[List[float]] = None, - enable_probabilistic_inference: bool = False, - num_selected_features: Optional[int] = None, - model_display_name: str = '', - model_description: str = '', -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular v1 default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The path to a GCS file containing the transformations to - apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - study_spec_parameters_override: The list for overriding study spec. The list - should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles - are allowed of values between 0 and 1, exclusive. Represents the quantiles - to use for that objective. Quantiles must be unique. - enable_probabilistic_inference: If probabilistic inference is enabled, the - model will fit a distribution that captures the uncertainty of a - prediction. At inference time, the predictive distribution is used to make - a point prediction that minimizes the optimization objective. For example, - the mean of a predictive distribution is the point prediction that - minimizes RMSE loss. If quantiles are specified, then the quantiles of the - distribution are also returned. - num_selected_features: Number of selected features for feature selection, - defaults to None, in which case all features are used. - model_display_name: The display name of the uploaded Vertex model. - model_description: The description for the uploaded model. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - parameter_values = _get_default_pipeline_params( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - study_spec_parameters_override=study_spec_parameters_override, - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - dataflow_service_account=dataflow_service_account, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, - evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, - evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - run_distillation=run_distillation, - distill_batch_predict_machine_type=distill_batch_predict_machine_type, - distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, - distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - quantiles=quantiles, - enable_probabilistic_inference=enable_probabilistic_inference, - num_selected_features=num_selected_features, - model_display_name=model_display_name, - model_description=model_description, - ) - - # V1 pipeline without FTE - if num_selected_features is None: - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), 'automl_tabular_pipeline.yaml' - ) - - # V2 pipeline with FTE - else: - pipeline_definition_path = os.path.join( - _GCPC_PREVIEW_TABULAR_PATH, - 'automl_tabular_v2_pipeline.yaml', - ) - - return pipeline_definition_path, parameter_values - - -def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: - """Convert json input dict to encoded parameter string. - - This function is required due to the limitation on YAML component definition - that YAML definition does not have a keyword for apply quote escape, so the - JSON argument's quote must be manually escaped using this function. - - Args: - input_dict: The input json dictionary. - - Returns: - The encoded string used for parameter. - """ - if not input_dict: - return '' - out = json.dumps(json.dumps(input_dict)) - return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo - - -def get_skip_evaluation_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column_name: str, - prediction_type: str, - optimization_objective: str, - transformations: Dict[str, Any], - split_spec: Dict[str, Any], - data_source: Dict[str, Any], - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, - weight_column_name: str = '', - study_spec_override: Optional[Dict[str, Any]] = None, - optimization_objective_recall_value: float = -1, - optimization_objective_precision_value: float = -1, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', - stats_and_example_gen_dataflow_max_num_workers: int = 25, - stats_and_example_gen_dataflow_disk_size_gb: int = 40, - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', - additional_experiments: Optional[Dict[str, Any]] = None, -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that skips evaluation. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column_name: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The transformations to apply. - split_spec: The split spec. - data_source: The data source. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - weight_column_name: The weight column name. - study_spec_override: The dictionary for overriding study spec. The - dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - return get_default_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column_name=target_column_name, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - split_spec=split_spec, - data_source=data_source, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - weight_column_name=weight_column_name, - study_spec_override=study_spec_override, - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - run_evaluation=False, - run_distillation=False, - ) - - -def get_default_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column_name: str, - prediction_type: str, - optimization_objective: str, - transformations: Dict[str, Any], - split_spec: Dict[str, Any], - data_source: Dict[str, Any], - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, - weight_column_name: str = '', - study_spec_override: Optional[Dict[str, Any]] = None, - optimization_objective_recall_value: float = -1, - optimization_objective_precision_value: float = -1, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', - stats_and_example_gen_dataflow_max_num_workers: int = 25, - stats_and_example_gen_dataflow_disk_size_gb: int = 40, - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: str = '', - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, - evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, - evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, - evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, - evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, - evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, - run_distillation: bool = False, - distill_batch_predict_machine_type: str = 'n1-standard-16', - distill_batch_predict_starting_replica_count: int = 25, - distill_batch_predict_max_replica_count: int = 25, -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular default training pipeline. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column_name: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The transformations to apply. - split_spec: The split spec. - data_source: The data source. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - weight_column_name: The weight column name. - study_spec_override: The dictionary for overriding study spec. The - dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - run_distillation: Whether to run distill in the training pipeline. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - warnings.warn( - 'This method is deprecated,' - ' please use get_automl_tabular_pipeline_and_parameters instead.' - ) - - if stage_1_num_parallel_trials <= 0: - stage_1_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS - - if stage_2_num_parallel_trials <= 0: - stage_2_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS - - hours = float(train_budget_milli_node_hours) / 1000.0 - multiplier = stage_1_num_parallel_trials * hours / 500.0 - stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0) - phase_2_rounds = int( - math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials + 0.5 - ) - if phase_2_rounds < 1: - phase_2_rounds = 1 - - # All of magic number "1.3" above is because the trial doesn't always finish - # in time_per_trial. 1.3 is an empirical safety margin here. - stage_1_deadline_secs = int( - hours * 3600.0 - 1.3 * stage_1_single_run_max_secs * phase_2_rounds - ) - - if stage_1_deadline_secs < hours * 3600.0 * 0.5: - stage_1_deadline_secs = int(hours * 3600.0 * 0.5) - # Phase 1 deadline is the same as phase 2 deadline in this case. Phase 2 - # can't finish in time after the deadline is cut, so adjust the time per - # trial to meet the deadline. - stage_1_single_run_max_secs = int( - stage_1_deadline_secs / (1.3 * phase_2_rounds) - ) - - reduce_search_space_mode = 'minimal' - if multiplier > 2: - reduce_search_space_mode = 'regular' - if multiplier > 4: - reduce_search_space_mode = 'full' - - # Stage 2 number of trials is stage_1_num_selected_trials * - # _NUM_FOLDS, which should be equal to phase_2_rounds * - # stage_2_num_parallel_trials. Use this information to calculate - # stage_1_num_selected_trials: - stage_1_num_selected_trials = int( - phase_2_rounds * stage_2_num_parallel_trials / _NUM_FOLDS - ) - stage_1_deadline_hours = stage_1_deadline_secs / 3600.0 - - stage_2_deadline_hours = hours - stage_1_deadline_hours - stage_2_single_run_max_secs = stage_1_single_run_max_secs - - parameter_values = { - 'project': project, - 'location': location, - 'root_dir': root_dir, - 'target_column_name': target_column_name, - 'prediction_type': prediction_type, - 'optimization_objective': optimization_objective, - 'transformations': input_dictionary_to_parameter(transformations), - 'split_spec': input_dictionary_to_parameter(split_spec), - 'data_source': input_dictionary_to_parameter(data_source), - 'stage_1_deadline_hours': stage_1_deadline_hours, - 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, - 'stage_1_num_selected_trials': stage_1_num_selected_trials, - 'stage_1_single_run_max_secs': stage_1_single_run_max_secs, - 'reduce_search_space_mode': reduce_search_space_mode, - 'stage_2_deadline_hours': stage_2_deadline_hours, - 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, - 'stage_2_num_selected_trials': stage_2_num_selected_trials, - 'stage_2_single_run_max_secs': stage_2_single_run_max_secs, - 'weight_column_name': weight_column_name, - 'optimization_objective_recall_value': ( - optimization_objective_recall_value - ), - 'optimization_objective_precision_value': ( - optimization_objective_precision_value - ), - 'study_spec_override': input_dictionary_to_parameter(study_spec_override), - 'stage_1_tuner_worker_pool_specs_override': input_dictionary_to_parameter( - stage_1_tuner_worker_pool_specs_override - ), - 'cv_trainer_worker_pool_specs_override': input_dictionary_to_parameter( - cv_trainer_worker_pool_specs_override - ), - 'export_additional_model_without_custom_ops': ( - export_additional_model_without_custom_ops - ), - 'stats_and_example_gen_dataflow_machine_type': ( - stats_and_example_gen_dataflow_machine_type - ), - 'stats_and_example_gen_dataflow_max_num_workers': ( - stats_and_example_gen_dataflow_max_num_workers - ), - 'stats_and_example_gen_dataflow_disk_size_gb': ( - stats_and_example_gen_dataflow_disk_size_gb - ), - 'transform_dataflow_machine_type': transform_dataflow_machine_type, - 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, - 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, - 'dataflow_subnetwork': dataflow_subnetwork, - 'dataflow_use_public_ips': dataflow_use_public_ips, - 'encryption_spec_key_name': encryption_spec_key_name, - } - if additional_experiments: - parameter_values.update( - { - 'additional_experiments': input_dictionary_to_parameter( - additional_experiments - ) - } - ) - if run_evaluation: - parameter_values.update({ - 'dataflow_service_account': dataflow_service_account, - 'evaluation_batch_predict_machine_type': ( - evaluation_batch_predict_machine_type - ), - 'evaluation_batch_predict_starting_replica_count': ( - evaluation_batch_predict_starting_replica_count - ), - 'evaluation_batch_predict_max_replica_count': ( - evaluation_batch_predict_max_replica_count - ), - 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, - 'evaluation_dataflow_max_num_workers': ( - evaluation_dataflow_max_num_workers - ), - 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, - 'run_evaluation': run_evaluation, - }) - if run_distillation: - # All of magic number "1.3" above is because the trial doesn't always finish - # in time_per_trial. 1.3 is an empirical safety margin here. - distill_stage_1_deadline_hours = ( - math.ceil( - float(_DISTILL_TOTAL_TRIALS) - / parameter_values['stage_1_num_parallel_trials'] - ) - * parameter_values['stage_1_single_run_max_secs'] - * 1.3 - / 3600.0 - ) - - parameter_values.update({ - 'distill_stage_1_deadline_hours': distill_stage_1_deadline_hours, - 'distill_batch_predict_machine_type': ( - distill_batch_predict_machine_type - ), - 'distill_batch_predict_starting_replica_count': ( - distill_batch_predict_starting_replica_count - ), - 'distill_batch_predict_max_replica_count': ( - distill_batch_predict_max_replica_count - ), - 'run_distillation': run_distillation, - }) - pipeline_definition_path = os.path.join( - pathlib.Path(__file__).parent.resolve(), - 'deprecated/default_pipeline.json', - ) - return pipeline_definition_path, parameter_values - - -def get_skip_architecture_search_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column: str, - prediction_type: str, - optimization_objective: str, - transformations: str, - train_budget_milli_node_hours: float, - stage_1_tuning_result_artifact_uri: str, - stage_2_num_parallel_trials: Optional[int] = None, - stage_2_num_selected_trials: Optional[int] = None, - data_source_csv_filenames: Optional[str] = None, - data_source_bigquery_table_path: Optional[str] = None, - predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, - stratified_split_key: Optional[str] = None, - training_fraction: Optional[float] = None, - validation_fraction: Optional[float] = None, - test_fraction: Optional[float] = None, - weight_column: Optional[str] = None, - optimization_objective_recall_value: Optional[float] = None, - optimization_objective_precision_value: Optional[float] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: Optional[str] = None, - stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, - stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, - transform_dataflow_machine_type: Optional[str] = None, - transform_dataflow_max_num_workers: Optional[int] = None, - transform_dataflow_disk_size_gb: Optional[int] = None, - dataflow_subnetwork: Optional[str] = None, - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: Optional[str] = None, - additional_experiments: Optional[Dict[str, Any]] = None, - dataflow_service_account: Optional[str] = None, - run_evaluation: bool = True, - evaluation_batch_predict_machine_type: Optional[str] = None, - evaluation_batch_predict_starting_replica_count: Optional[int] = None, - evaluation_batch_predict_max_replica_count: Optional[int] = None, - evaluation_batch_explain_machine_type: Optional[str] = None, - evaluation_batch_explain_starting_replica_count: Optional[int] = None, - evaluation_batch_explain_max_replica_count: Optional[int] = None, - evaluation_dataflow_machine_type: Optional[str] = None, - evaluation_dataflow_starting_num_workers: Optional[int] = None, - evaluation_dataflow_max_num_workers: Optional[int] = None, - evaluation_dataflow_disk_size_gb: Optional[int] = None, -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that skips architecture search. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The transformations to apply. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS - URI. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - data_source_csv_filenames: The CSV data source. - data_source_bigquery_table_path: The BigQuery data source. - predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. - stratified_split_key: The stratified_split column name. - training_fraction: The training fraction. - validation_fraction: The validation fraction. - test_fraction: float = The test fraction. - weight_column: The weight column name. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - dataflow_service_account: Custom service account to run dataflow jobs. - run_evaluation: Whether to run evaluation in the training pipeline. - evaluation_batch_predict_machine_type: The prediction server machine type - for batch predict components during evaluation. - evaluation_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict components during evaluation. - evaluation_batch_predict_max_replica_count: The max number of prediction - server for batch predict components during evaluation. - evaluation_batch_explain_machine_type: The prediction server machine type - for batch explain components during evaluation. - evaluation_batch_explain_starting_replica_count: The initial number of - prediction server for batch explain components during evaluation. - evaluation_batch_explain_max_replica_count: The max number of prediction - server for batch explain components during evaluation. - evaluation_dataflow_machine_type: The dataflow machine type for evaluation - components. - evaluation_dataflow_starting_num_workers: The initial number of Dataflow - workers for evaluation components. - evaluation_dataflow_max_num_workers: The max number of Dataflow workers for - evaluation components. - evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - evaluation components. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - - return get_automl_tabular_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column=target_column, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=None, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - data_source_csv_filenames=data_source_csv_filenames, - data_source_bigquery_table_path=data_source_bigquery_table_path, - predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, - stratified_split_key=stratified_split_key, - training_fraction=training_fraction, - validation_fraction=validation_fraction, - test_fraction=test_fraction, - weight_column=weight_column, - study_spec_parameters_override=[], - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override={}, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - dataflow_service_account=dataflow_service_account, - run_evaluation=run_evaluation, - evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, - evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, - evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, - evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, - evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, - evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, - evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, - evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, - evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, - evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, - run_distillation=None, - distill_batch_predict_machine_type=None, - distill_batch_predict_starting_replica_count=None, - distill_batch_predict_max_replica_count=None, - stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, - quantiles=[], - enable_probabilistic_inference=False, - ) - - -def get_distill_skip_evaluation_pipeline_and_parameters( - project: str, - location: str, - root_dir: str, - target_column_name: str, - prediction_type: str, - optimization_objective: str, - transformations: Dict[str, Any], - split_spec: Dict[str, Any], - data_source: Dict[str, Any], - train_budget_milli_node_hours: float, - stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, - stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, - weight_column_name: str = '', - study_spec_override: Optional[Dict[str, Any]] = None, - optimization_objective_recall_value: float = -1, - optimization_objective_precision_value: float = -1, - stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, - export_additional_model_without_custom_ops: bool = False, - stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', - stats_and_example_gen_dataflow_max_num_workers: int = 25, - stats_and_example_gen_dataflow_disk_size_gb: int = 40, - transform_dataflow_machine_type: str = 'n1-standard-16', - transform_dataflow_max_num_workers: int = 25, - transform_dataflow_disk_size_gb: int = 40, - dataflow_subnetwork: str = '', - dataflow_use_public_ips: bool = True, - encryption_spec_key_name: str = '', - additional_experiments: Optional[Dict[str, Any]] = None, - distill_batch_predict_machine_type: str = 'n1-standard-16', - distill_batch_predict_starting_replica_count: int = 25, - distill_batch_predict_max_replica_count: int = 25, -) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that distill and skips evaluation. - - Args: - project: The GCP project that runs the pipeline components. - location: The GCP region that runs the pipeline components. - root_dir: The root GCS directory for the pipeline components. - target_column_name: The target column name. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - optimization_objective: For binary classification, "maximize-au-roc", - "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or - "maximize-recall-at-precision". For multi class classification, - "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or - "minimize-rmsle". - transformations: The transformations to apply. - split_spec: The split spec. - data_source: The data source. - train_budget_milli_node_hours: The train budget of creating this model, - expressed in milli node hours i.e. 1,000 value in this field means 1 node - hour. - stage_1_num_parallel_trials: Number of parallel trails for stage 1. - stage_2_num_parallel_trials: Number of parallel trails for stage 2. - stage_2_num_selected_trials: Number of selected trials for stage 2. - weight_column_name: The weight column name. - study_spec_override: The dictionary for overriding study spec. The - dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. - optimization_objective_recall_value: Required when optimization_objective is - "maximize-precision-at-recall". Must be between 0 and 1, inclusive. - optimization_objective_precision_value: Required when optimization_objective - is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. - stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. - stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - cv_trainer_worker_pool_specs_override: The dictionary for overriding stage - cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. - export_additional_model_without_custom_ops: Whether to export additional - model without custom TensorFlow operators. - stats_and_example_gen_dataflow_machine_type: The dataflow machine type for - stats_and_example_gen component. - stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow - workers for stats_and_example_gen component. - stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in - GB for stats_and_example_gen component. - transform_dataflow_machine_type: The dataflow machine type for transform - component. - transform_dataflow_max_num_workers: The max number of Dataflow workers for - transform component. - transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for - transform component. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: The KMS key name. - additional_experiments: Use this field to config private preview features. - distill_batch_predict_machine_type: The prediction server machine type for - batch predict component in the model distillation. - distill_batch_predict_starting_replica_count: The initial number of - prediction server for batch predict component in the model distillation. - distill_batch_predict_max_replica_count: The max number of prediction server - for batch predict component in the model distillation. - - Returns: - Tuple of pipeline_definition_path and parameter_values. - """ - warnings.warn( - 'Depreciated. Please use get_automl_tabular_pipeline_and_parameters.' - ) - - return get_default_pipeline_and_parameters( - project=project, - location=location, - root_dir=root_dir, - target_column_name=target_column_name, - prediction_type=prediction_type, - optimization_objective=optimization_objective, - transformations=transformations, - split_spec=split_spec, - data_source=data_source, - train_budget_milli_node_hours=train_budget_milli_node_hours, - stage_1_num_parallel_trials=stage_1_num_parallel_trials, - stage_2_num_parallel_trials=stage_2_num_parallel_trials, - stage_2_num_selected_trials=stage_2_num_selected_trials, - weight_column_name=weight_column_name, - study_spec_override=study_spec_override, - optimization_objective_recall_value=optimization_objective_recall_value, - optimization_objective_precision_value=optimization_objective_precision_value, - stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, - cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, - export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, - stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, - stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, - stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, - transform_dataflow_machine_type=transform_dataflow_machine_type, - transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, - transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - additional_experiments=additional_experiments, - distill_batch_predict_machine_type=distill_batch_predict_machine_type, - distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, - distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, - run_evaluation=False, - run_distillation=True, - ) From c8204d0285958f4caa2c8d82b7b4743ae992e26c Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 7 Jul 2023 16:18:52 -0700 Subject: [PATCH 017/253] chore(sdk): partition KFP SDK source code into runtime and non-runtime code (#9710) --- sdk/python/kfp/__init__.py | 1 + sdk/python/kfp/cli/compile_.py | 4 +- sdk/python/kfp/cli/component.py | 6 +-- sdk/python/kfp/client/client.py | 2 +- sdk/python/kfp/compiler/compiler.py | 4 +- sdk/python/kfp/compiler/compiler_test.py | 20 ++----- sdk/python/kfp/compiler/compiler_utils.py | 10 ++-- .../kfp/compiler/compiler_utils_test.py | 2 +- .../kfp/compiler/pipeline_spec_builder.py | 20 +++---- sdk/python/kfp/compiler/read_write_test.py | 6 +-- sdk/python/kfp/components/__init__.py | 14 ++--- ...ml_component.py => load_yaml_utilities.py} | 53 +++--------------- ...nt_test.py => load_yaml_utilities_test.py} | 14 ++--- .../kfp/components/test_data/simple_yaml.yaml | 16 ------ sdk/python/kfp/dsl/__init__.py | 50 ++++++++--------- .../kfp/{components => dsl}/base_component.py | 6 +-- .../base_component_test.py | 12 ++--- .../component_decorator.py | 2 +- .../component_decorator_test.py | 6 +-- .../{components => dsl}/component_factory.py | 32 +++++------ .../component_factory_test.py | 12 ++--- .../kfp/{components => dsl}/constants.py | 0 .../container_component_artifact_channel.py | 13 ++--- ...ntainer_component_artifact_channel_test.py | 4 +- .../container_component_class.py} | 4 +- .../container_component_decorator.py | 6 +-- .../container_component_decorator_test.py | 8 +-- .../kfp/{components => dsl}/executor.py | 8 +-- .../kfp/{components => dsl}/executor_main.py | 6 +-- .../kfp/{components => dsl}/executor_test.py | 20 +++---- .../kfp/{components => dsl}/for_loop.py | 2 +- .../kfp/{components => dsl}/for_loop_test.py | 4 +- .../{components => dsl}/graph_component.py | 8 +-- .../{components => dsl}/importer_component.py | 4 +- .../kfp/{components => dsl}/importer_node.py | 16 +++--- .../{components => dsl}/importer_node_test.py | 4 +- .../kfp/{components => dsl}/kfp_config.py | 0 .../{components => dsl}/pipeline_channel.py | 4 +- .../pipeline_channel_test.py | 4 +- .../{components => dsl}/pipeline_context.py | 8 +-- .../kfp/{components => dsl}/pipeline_task.py | 16 +++--- .../{components => dsl}/pipeline_task_test.py | 8 +-- .../kfp/{components => dsl}/placeholders.py | 4 +- .../{components => dsl}/placeholders_test.py | 4 +- .../{components => dsl}/python_component.py | 6 +-- .../kfp/{components => dsl}/structures.py | 22 ++++---- .../{components => dsl}/structures_test.py | 8 +-- .../{components => dsl}/task_final_status.py | 0 .../kfp/{components => dsl}/tasks_group.py | 8 +-- .../{components => dsl}/tasks_group_test.py | 6 +-- .../kfp/{components => dsl}/types/__init__.py | 0 .../types/artifact_types.py | 0 .../types/artifact_types_test.py | 2 +- .../types/custom_artifact_types.py | 6 +-- .../types/custom_artifact_types_test.py | 18 +++---- ...expected_bulk_loaded_confusion_matrix.json | 0 .../test_data/expected_confusion_matrix.json | 0 ...ypes_bulk_load_classification_metrics.json | 0 ...ected_io_types_classification_metrics.json | 0 .../types/type_annotations.py | 6 +-- .../types/type_annotations_test.py | 16 +++--- .../{components => dsl}/types/type_utils.py | 15 +++--- .../types/type_utils_test.py | 14 ++--- sdk/python/kfp/{components => dsl}/utils.py | 0 .../kfp/{components => dsl}/utils_test.py | 4 +- .../kfp/{components => dsl}/v1_components.py | 2 +- .../kfp/{components => dsl}/v1_modelbase.py | 0 .../kfp/{components => dsl}/v1_structures.py | 3 +- sdk/python/kfp/dsl/yaml_component.py | 54 +++++++++++++++++++ .../test_data/components/add_numbers.yaml | 6 +-- .../component_with_metadata_fields.yaml | 6 +-- .../component_with_pip_install.yaml | 6 +-- .../component_with_task_final_status.yaml | 6 +-- .../test_data/components/concat_message.yaml | 6 +-- .../test_data/components/dict_input.yaml | 6 +-- sdk/python/test_data/components/identity.yaml | 6 +-- .../test_data/components/input_artifact.yaml | 6 +-- .../test_data/components/nested_return.yaml | 6 +-- .../test_data/components/output_metrics.yaml | 6 +-- .../test_data/components/preprocess.yaml | 6 +-- .../component_with_optional_inputs.yaml | 6 +-- .../component_with_pip_index_urls.yaml | 6 +-- .../components_with_optional_artifacts.yaml | 10 ++-- ...lightweight_python_functions_pipeline.yaml | 10 ++-- ...tweight_python_functions_with_outputs.yaml | 18 +++---- .../parallelfor_fan_in/artifacts_complex.yaml | 22 ++++---- .../parallelfor_fan_in/artifacts_simple.yaml | 10 ++-- .../conditional_producer_and_consumers.yaml | 10 ++-- .../nested_with_parameters.yaml | 22 ++++---- .../parameters_complex.yaml | 30 +++++------ .../parallelfor_fan_in/parameters_simple.yaml | 10 ++-- .../pipeline_producer_consumer.yaml | 18 +++---- .../pipelines/pipeline_as_exit_task.yaml | 18 +++---- .../pipelines/pipeline_in_pipeline.yaml | 10 ++-- .../pipeline_in_pipeline_complex.yaml | 10 ++-- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 14 ++--- .../pipelines/pipeline_with_condition.yaml | 22 ++++---- ...peline_with_dynamic_importer_metadata.yaml | 6 +-- .../pipelines/pipeline_with_env.yaml | 6 +-- .../pipelines/pipeline_with_exit_handler.yaml | 14 ++--- .../pipeline_with_google_artifact_type.yaml | 10 ++-- .../pipelines/pipeline_with_importer.yaml | 10 ++-- .../pipelines/pipeline_with_loops.yaml | 34 ++++++------ .../pipeline_with_loops_and_conditions.yaml | 54 +++++++++---------- .../pipeline_with_metadata_fields.yaml | 10 ++-- .../pipeline_with_metrics_outputs.yaml | 10 ++-- .../pipeline_with_multiple_exit_handlers.yaml | 30 +++++------ .../pipeline_with_nested_conditions.yaml | 34 ++++++------ .../pipelines/pipeline_with_nested_loops.yaml | 14 ++--- .../pipelines/pipeline_with_outputs.yaml | 10 ++-- ...pipeline_with_parallelfor_parallelism.yaml | 26 ++++----- ...ipeline_with_params_containing_format.yaml | 14 ++--- .../pipelines/pipeline_with_placeholders.yaml | 22 ++++---- .../pipelines/pipeline_with_retry.yaml | 6 +-- .../pipeline_with_task_final_status.yaml | 14 ++--- ...th_task_using_ignore_upstream_failure.yaml | 10 ++-- 116 files changed, 633 insertions(+), 650 deletions(-) rename sdk/python/kfp/components/{yaml_component.py => load_yaml_utilities.py} (59%) rename sdk/python/kfp/components/{yaml_component_test.py => load_yaml_utilities_test.py} (91%) delete mode 100644 sdk/python/kfp/components/test_data/simple_yaml.yaml rename sdk/python/kfp/{components => dsl}/base_component.py (97%) rename sdk/python/kfp/{components => dsl}/base_component_test.py (95%) rename sdk/python/kfp/{components => dsl}/component_decorator.py (99%) rename sdk/python/kfp/{components => dsl}/component_decorator_test.py (97%) rename sdk/python/kfp/{components => dsl}/component_factory.py (95%) rename sdk/python/kfp/{components => dsl}/component_factory_test.py (95%) rename sdk/python/kfp/{components => dsl}/constants.py (100%) rename sdk/python/kfp/{components => dsl}/container_component_artifact_channel.py (80%) rename sdk/python/kfp/{components => dsl}/container_component_artifact_channel_test.py (94%) rename sdk/python/kfp/{components/container_component.py => dsl/container_component_class.py} (94%) rename sdk/python/kfp/{components => dsl}/container_component_decorator.py (91%) rename sdk/python/kfp/{components => dsl}/container_component_decorator_test.py (94%) rename sdk/python/kfp/{components => dsl}/executor.py (98%) rename sdk/python/kfp/{components => dsl}/executor_main.py (96%) rename sdk/python/kfp/{components => dsl}/executor_test.py (98%) rename sdk/python/kfp/{components => dsl}/for_loop.py (99%) rename sdk/python/kfp/{components => dsl}/for_loop_test.py (98%) rename sdk/python/kfp/{components => dsl}/graph_component.py (95%) rename sdk/python/kfp/{components => dsl}/importer_component.py (92%) rename sdk/python/kfp/{components => dsl}/importer_node.py (93%) rename sdk/python/kfp/{components => dsl}/importer_node_test.py (98%) rename sdk/python/kfp/{components => dsl}/kfp_config.py (100%) rename sdk/python/kfp/{components => dsl}/pipeline_channel.py (99%) rename sdk/python/kfp/{components => dsl}/pipeline_channel_test.py (98%) rename sdk/python/kfp/{components => dsl}/pipeline_context.py (97%) rename sdk/python/kfp/{components => dsl}/pipeline_task.py (97%) rename sdk/python/kfp/{components => dsl}/pipeline_task_test.py (98%) rename sdk/python/kfp/{components => dsl}/placeholders.py (99%) rename sdk/python/kfp/{components => dsl}/placeholders_test.py (99%) rename sdk/python/kfp/{components => dsl}/python_component.py (92%) rename sdk/python/kfp/{components => dsl}/structures.py (98%) rename sdk/python/kfp/{components => dsl}/structures_test.py (99%) rename sdk/python/kfp/{components => dsl}/task_final_status.py (100%) rename sdk/python/kfp/{components => dsl}/tasks_group.py (97%) rename sdk/python/kfp/{components => dsl}/tasks_group_test.py (95%) rename sdk/python/kfp/{components => dsl}/types/__init__.py (100%) rename sdk/python/kfp/{components => dsl}/types/artifact_types.py (100%) rename sdk/python/kfp/{components => dsl}/types/artifact_types_test.py (98%) rename sdk/python/kfp/{components => dsl}/types/custom_artifact_types.py (98%) rename sdk/python/kfp/{components => dsl}/types/custom_artifact_types_test.py (96%) rename sdk/python/kfp/{components => dsl}/types/test_data/expected_bulk_loaded_confusion_matrix.json (100%) rename sdk/python/kfp/{components => dsl}/types/test_data/expected_confusion_matrix.json (100%) rename sdk/python/kfp/{components => dsl}/types/test_data/expected_io_types_bulk_load_classification_metrics.json (100%) rename sdk/python/kfp/{components => dsl}/types/test_data/expected_io_types_classification_metrics.json (100%) rename sdk/python/kfp/{components => dsl}/types/type_annotations.py (98%) rename sdk/python/kfp/{components => dsl}/types/type_annotations_test.py (94%) rename sdk/python/kfp/{components => dsl}/types/type_utils.py (98%) rename sdk/python/kfp/{components => dsl}/types/type_utils_test.py (98%) rename sdk/python/kfp/{components => dsl}/utils.py (100%) rename sdk/python/kfp/{components => dsl}/utils_test.py (98%) rename sdk/python/kfp/{components => dsl}/v1_components.py (97%) rename sdk/python/kfp/{components => dsl}/v1_modelbase.py (100%) rename sdk/python/kfp/{components => dsl}/v1_structures.py (99%) create mode 100644 sdk/python/kfp/dsl/yaml_component.py diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index d58174a9b1..3c605b82a9 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -20,5 +20,6 @@ TYPE_CHECK = True +from kfp import components from kfp import dsl from kfp.client import Client diff --git a/sdk/python/kfp/cli/compile_.py b/sdk/python/kfp/cli/compile_.py index d265ca6194..2bd3bab18c 100644 --- a/sdk/python/kfp/cli/compile_.py +++ b/sdk/python/kfp/cli/compile_.py @@ -22,8 +22,8 @@ import click from kfp import compiler -from kfp.components import base_component -from kfp.components import graph_component +from kfp.dsl import base_component +from kfp.dsl import graph_component def is_pipeline_func(func: Callable) -> bool: diff --git a/sdk/python/kfp/cli/component.py b/sdk/python/kfp/cli/component.py index e4b8d7e321..e09bd7b794 100644 --- a/sdk/python/kfp/cli/component.py +++ b/sdk/python/kfp/cli/component.py @@ -31,9 +31,9 @@ _DOCKER_IS_PRESENT = False import kfp as kfp -from kfp.components import component_factory -from kfp.components import kfp_config -from kfp.components import utils +from kfp.dsl import component_factory +from kfp.dsl import kfp_config +from kfp.dsl import utils _REQUIREMENTS_TXT = 'runtime-requirements.txt' diff --git a/sdk/python/kfp/client/client.py b/sdk/python/kfp/client/client.py index 8127b1b232..57b4c6d1f9 100644 --- a/sdk/python/kfp/client/client.py +++ b/sdk/python/kfp/client/client.py @@ -32,7 +32,7 @@ from kfp import compiler from kfp.client import auth from kfp.client import set_volume_credentials -from kfp.components import base_component +from kfp.dsl import base_component from kfp.pipeline_spec import pipeline_spec_pb2 import kfp_server_api import yaml diff --git a/sdk/python/kfp/compiler/compiler.py b/sdk/python/kfp/compiler/compiler.py index af762ea2b6..a77f606e89 100644 --- a/sdk/python/kfp/compiler/compiler.py +++ b/sdk/python/kfp/compiler/compiler.py @@ -20,8 +20,8 @@ from typing import Any, Dict, Optional from kfp.compiler import pipeline_spec_builder as builder -from kfp.components import base_component -from kfp.components.types import type_utils +from kfp.dsl import base_component +from kfp.dsl.types import type_utils class Compiler: diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 7071ca7688..92b1f6a1b7 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -31,17 +31,17 @@ from kfp.cli import cli from kfp.compiler import compiler from kfp.compiler import compiler_utils -from kfp.components import graph_component -from kfp.components import pipeline_task -from kfp.components import yaml_component -from kfp.components.types import type_utils from kfp.dsl import Artifact from kfp.dsl import ContainerSpec +from kfp.dsl import graph_component from kfp.dsl import Input from kfp.dsl import Model from kfp.dsl import Output from kfp.dsl import OutputPath +from kfp.dsl import pipeline_task from kfp.dsl import PipelineTaskFinalStatus +from kfp.dsl import yaml_component +from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 import yaml @@ -152,18 +152,6 @@ def simple_pipeline(): with open(target_json_file, 'r') as f: f.read() - def test_compile_pipeline_with_dsl_graph_component_should_raise_error(self): - - with self.assertRaisesRegex( - AttributeError, - "module 'kfp.dsl' has no attribute 'graph_component'"): - - @dsl.graph_component - def flip_coin_graph_component(): - flip = flip_coin_op() - with dsl.Condition(flip.output == 'heads'): - flip_coin_graph_component() - def test_compile_pipeline_with_misused_inputvalue_should_raise_error(self): upstream_op = components.load_component_from_text(""" diff --git a/sdk/python/kfp/compiler/compiler_utils.py b/sdk/python/kfp/compiler/compiler_utils.py index 1ae0c326bf..79c4418bdf 100644 --- a/sdk/python/kfp/compiler/compiler_utils.py +++ b/sdk/python/kfp/compiler/compiler_utils.py @@ -17,11 +17,11 @@ from copy import deepcopy from typing import DefaultDict, Dict, List, Mapping, Set, Tuple, Union -from kfp.components import for_loop -from kfp.components import pipeline_channel -from kfp.components import pipeline_context -from kfp.components import pipeline_task -from kfp.components import tasks_group +from kfp.dsl import for_loop +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import pipeline_task +from kfp.dsl import tasks_group GroupOrTaskType = Union[tasks_group.TasksGroup, pipeline_task.PipelineTask] diff --git a/sdk/python/kfp/compiler/compiler_utils_test.py b/sdk/python/kfp/compiler/compiler_utils_test.py index 61866fb740..ec20833b30 100644 --- a/sdk/python/kfp/compiler/compiler_utils_test.py +++ b/sdk/python/kfp/compiler/compiler_utils_test.py @@ -16,7 +16,7 @@ from absl.testing import parameterized from kfp.compiler import compiler_utils -from kfp.components import pipeline_channel +from kfp.dsl import pipeline_channel class TestAdditionalInputNameForPipelineChannel(parameterized.TestCase): diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 67f9786dde..3d63a78c4d 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -24,16 +24,16 @@ from google.protobuf import struct_pb2 import kfp from kfp.compiler import compiler_utils -from kfp.components import for_loop -from kfp.components import pipeline_channel -from kfp.components import pipeline_context -from kfp.components import pipeline_task -from kfp.components import placeholders -from kfp.components import structures -from kfp.components import tasks_group -from kfp.components import utils -from kfp.components.types import artifact_types -from kfp.components.types import type_utils +from kfp.dsl import for_loop +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import pipeline_task +from kfp.dsl import placeholders +from kfp.dsl import structures +from kfp.dsl import tasks_group +from kfp.dsl import utils +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 import yaml diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index 8ecdb30c4f..29c76db03e 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -22,9 +22,9 @@ from absl.testing import parameterized from kfp import compiler from kfp import components -from kfp.components import placeholders -from kfp.components import python_component -from kfp.components import structures +from kfp.dsl import placeholders +from kfp.dsl import python_component +from kfp.dsl import structures import yaml _PROJECT_ROOT = os.path.abspath(os.path.join(__file__, *([os.path.pardir] * 5))) diff --git a/sdk/python/kfp/components/__init__.py b/sdk/python/kfp/components/__init__.py index 3f183b6267..005c43f3cd 100644 --- a/sdk/python/kfp/components/__init__.py +++ b/sdk/python/kfp/components/__init__.py @@ -24,10 +24,10 @@ 'YamlComponent', ] -from kfp.components.base_component import BaseComponent -from kfp.components.container_component import ContainerComponent -from kfp.components.python_component import PythonComponent -from kfp.components.yaml_component import load_component_from_file -from kfp.components.yaml_component import load_component_from_text -from kfp.components.yaml_component import load_component_from_url -from kfp.components.yaml_component import YamlComponent +from kfp.components.load_yaml_utilities import load_component_from_file +from kfp.components.load_yaml_utilities import load_component_from_text +from kfp.components.load_yaml_utilities import load_component_from_url +from kfp.dsl.base_component import BaseComponent +from kfp.dsl.container_component_class import ContainerComponent +from kfp.dsl.python_component import PythonComponent +from kfp.dsl.yaml_component import YamlComponent diff --git a/sdk/python/kfp/components/yaml_component.py b/sdk/python/kfp/components/load_yaml_utilities.py similarity index 59% rename from sdk/python/kfp/components/yaml_component.py rename to sdk/python/kfp/components/load_yaml_utilities.py index e60907c6ef..34342d3b0b 100644 --- a/sdk/python/kfp/components/yaml_component.py +++ b/sdk/python/kfp/components/load_yaml_utilities.py @@ -15,49 +15,12 @@ from typing import Optional, Tuple -from google.protobuf import json_format -from kfp import components -from kfp.components import structures -from kfp.pipeline_spec import pipeline_spec_pb2 +from kfp.dsl import structures +from kfp.dsl import yaml_component import requests -class YamlComponent(components.BaseComponent): - """A component loaded from a YAML file. - - **Note:** ``YamlComponent`` is not intended to be used to construct components directly. Use ``kfp.components.load_component_from_*()`` instead. - - Attribute: - component_spec: Component definition. - component_yaml: The yaml string that this component is loaded from. - """ - - def __init__( - self, - component_spec: structures.ComponentSpec, - component_yaml: str, - ): - super().__init__(component_spec=component_spec) - self.component_yaml = component_yaml - - @property - def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: - """Returns the pipeline spec of the component.""" - component_dict = structures.load_documents_from_yaml( - self.component_yaml)[0] - is_v1 = 'implementation' in set(component_dict.keys()) - if is_v1: - return self.component_spec.to_pipeline_spec() - else: - return json_format.ParseDict(component_dict, - pipeline_spec_pb2.PipelineSpec()) - - def execute(self, *args, **kwargs): - """Not implemented.""" - raise NotImplementedError - - -def load_component_from_text(text: str) -> YamlComponent: +def load_component_from_text(text: str) -> yaml_component.YamlComponent: """Loads a component from text. Args: @@ -66,12 +29,12 @@ def load_component_from_text(text: str) -> YamlComponent: Returns: Component loaded from YAML. """ - return YamlComponent( + return yaml_component.YamlComponent( component_spec=structures.ComponentSpec.from_yaml_documents(text), component_yaml=text) -def load_component_from_file(file_path: str) -> YamlComponent: +def load_component_from_file(file_path: str) -> yaml_component.YamlComponent: """Loads a component from a file. Args: @@ -91,9 +54,9 @@ def load_component_from_file(file_path: str) -> YamlComponent: return load_component_from_text(component_stream.read()) -def load_component_from_url(url: str, - auth: Optional[Tuple[str, - str]] = None) -> YamlComponent: +def load_component_from_url( + url: str, + auth: Optional[Tuple[str, str]] = None) -> yaml_component.YamlComponent: """Loads a component from a URL. Args: diff --git a/sdk/python/kfp/components/yaml_component_test.py b/sdk/python/kfp/components/load_yaml_utilities_test.py similarity index 91% rename from sdk/python/kfp/components/yaml_component_test.py rename to sdk/python/kfp/components/load_yaml_utilities_test.py index 3e56e3ce79..55ba29cf57 100644 --- a/sdk/python/kfp/components/yaml_component_test.py +++ b/sdk/python/kfp/components/load_yaml_utilities_test.py @@ -11,15 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.yaml_component.""" +"""Tests for kfp.dsl.yaml_component.""" import os import tempfile import textwrap import unittest -from kfp.components import structures -from kfp.components import yaml_component +from kfp import components +from kfp.dsl import structures SAMPLE_YAML = textwrap.dedent("""\ components: @@ -84,10 +84,10 @@ ] -class YamlComponentTest(unittest.TestCase): +class LoadYamlTests(unittest.TestCase): def test_load_component_from_text(self): - component = yaml_component.load_component_from_text(SAMPLE_YAML) + component = components.load_component_from_text(SAMPLE_YAML) self.assertEqual(component.component_spec.name, 'component-1') self.assertEqual(component.component_spec.outputs, {'output1': structures.OutputSpec(type='String')}) @@ -101,7 +101,7 @@ def test_load_component_from_file(self): path = os.path.join(tmpdir, 'sample_yaml.yaml') with open(path, 'w') as f: f.write(SAMPLE_YAML) - component = yaml_component.load_component_from_file(path) + component = components.load_component_from_file(path) self.assertEqual(component.component_spec.name, 'component-1') self.assertEqual(component.component_spec.outputs, {'output1': structures.OutputSpec(type='String')}) @@ -112,7 +112,7 @@ def test_load_component_from_file(self): def test_load_component_from_url(self): component_url = 'https://raw.githubusercontent.com/kubeflow/pipelines/7b49eadf621a9054e1f1315c86f95fb8cf8c17c3/sdk/python/kfp/compiler/test_data/components/identity.yaml' - component = yaml_component.load_component_from_url(component_url) + component = components.load_component_from_url(component_url) self.assertEqual(component.component_spec.name, 'identity') self.assertEqual(component.component_spec.outputs, diff --git a/sdk/python/kfp/components/test_data/simple_yaml.yaml b/sdk/python/kfp/components/test_data/simple_yaml.yaml deleted file mode 100644 index 8db8477f7b..0000000000 --- a/sdk/python/kfp/components/test_data/simple_yaml.yaml +++ /dev/null @@ -1,16 +0,0 @@ -name: component_1 -inputs: - input1: {type: String} -outputs: - output1: {type: String} -implementation: - container: - image: alpine - command: - - sh - - -c - - 'set -ex - - echo "$0" > "$1"' - - {inputValue: input1} - - {outputPath: output1} \ No newline at end of file diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index a70190ca78..d3502a7287 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -57,31 +57,31 @@ from typing import TypeVar -from kfp.components.component_decorator import component -from kfp.components.container_component_decorator import container_component -from kfp.components.for_loop import Collected -from kfp.components.importer_node import importer -from kfp.components.pipeline_context import pipeline -from kfp.components.pipeline_task import PipelineTask -from kfp.components.placeholders import ConcatPlaceholder -from kfp.components.placeholders import IfPresentPlaceholder -from kfp.components.structures import ContainerSpec -from kfp.components.task_final_status import PipelineTaskFinalStatus -from kfp.components.tasks_group import Condition -from kfp.components.tasks_group import ExitHandler -from kfp.components.tasks_group import ParallelFor -from kfp.components.types.artifact_types import Artifact -from kfp.components.types.artifact_types import ClassificationMetrics -from kfp.components.types.artifact_types import Dataset -from kfp.components.types.artifact_types import HTML -from kfp.components.types.artifact_types import Markdown -from kfp.components.types.artifact_types import Metrics -from kfp.components.types.artifact_types import Model -from kfp.components.types.artifact_types import SlicedClassificationMetrics -from kfp.components.types.type_annotations import InputAnnotation -from kfp.components.types.type_annotations import InputPath -from kfp.components.types.type_annotations import OutputAnnotation -from kfp.components.types.type_annotations import OutputPath +from kfp.dsl.component_decorator import component +from kfp.dsl.container_component_decorator import container_component +from kfp.dsl.for_loop import Collected +from kfp.dsl.importer_node import importer +from kfp.dsl.pipeline_context import pipeline +from kfp.dsl.pipeline_task import PipelineTask +from kfp.dsl.placeholders import ConcatPlaceholder +from kfp.dsl.placeholders import IfPresentPlaceholder +from kfp.dsl.structures import ContainerSpec +from kfp.dsl.task_final_status import PipelineTaskFinalStatus +from kfp.dsl.tasks_group import Condition +from kfp.dsl.tasks_group import ExitHandler +from kfp.dsl.tasks_group import ParallelFor +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import ClassificationMetrics +from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.artifact_types import HTML +from kfp.dsl.types.artifact_types import Markdown +from kfp.dsl.types.artifact_types import Metrics +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.artifact_types import SlicedClassificationMetrics +from kfp.dsl.types.type_annotations import InputAnnotation +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputAnnotation +from kfp.dsl.types.type_annotations import OutputPath # hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py diff --git a/sdk/python/kfp/components/base_component.py b/sdk/python/kfp/dsl/base_component.py similarity index 97% rename from sdk/python/kfp/components/base_component.py rename to sdk/python/kfp/dsl/base_component.py index 2b62ece34e..25a10f84df 100644 --- a/sdk/python/kfp/components/base_component.py +++ b/sdk/python/kfp/dsl/base_component.py @@ -16,9 +16,9 @@ import abc from typing import List -from kfp.components import pipeline_task -from kfp.components import structures -from kfp.components.types import type_utils +from kfp.dsl import pipeline_task +from kfp.dsl import structures +from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 diff --git a/sdk/python/kfp/components/base_component_test.py b/sdk/python/kfp/dsl/base_component_test.py similarity index 95% rename from sdk/python/kfp/components/base_component_test.py rename to sdk/python/kfp/dsl/base_component_test.py index c9e0277564..48ed1d6065 100644 --- a/sdk/python/kfp/components/base_component_test.py +++ b/sdk/python/kfp/dsl/base_component_test.py @@ -11,18 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.base_component.""" +"""Tests for kfp.dsl.base_component.""" import unittest from unittest.mock import patch from kfp import dsl -from kfp.components import pipeline_task -from kfp.components import placeholders -from kfp.components import PythonComponent -from kfp.components import structures +from kfp.dsl import pipeline_task +from kfp.dsl import placeholders +from kfp.dsl import python_component +from kfp.dsl import structures -component_op = PythonComponent( +component_op = python_component.PythonComponent( # dummy python_func not used in behavior that is being tested python_func=lambda: None, component_spec=structures.ComponentSpec( diff --git a/sdk/python/kfp/components/component_decorator.py b/sdk/python/kfp/dsl/component_decorator.py similarity index 99% rename from sdk/python/kfp/components/component_decorator.py rename to sdk/python/kfp/dsl/component_decorator.py index 8bc64f17bc..7c6589589d 100644 --- a/sdk/python/kfp/components/component_decorator.py +++ b/sdk/python/kfp/dsl/component_decorator.py @@ -16,7 +16,7 @@ from typing import Callable, List, Optional import warnings -from kfp.components import component_factory +from kfp.dsl import component_factory def component(func: Optional[Callable] = None, diff --git a/sdk/python/kfp/components/component_decorator_test.py b/sdk/python/kfp/dsl/component_decorator_test.py similarity index 97% rename from sdk/python/kfp/components/component_decorator_test.py rename to sdk/python/kfp/dsl/component_decorator_test.py index e55d79af56..4b51de638f 100644 --- a/sdk/python/kfp/components/component_decorator_test.py +++ b/sdk/python/kfp/dsl/component_decorator_test.py @@ -17,9 +17,9 @@ from typing import Dict, List, NamedTuple import unittest -from kfp.components import python_component -from kfp.components import structures -from kfp.components.component_decorator import component +from kfp.dsl import python_component +from kfp.dsl import structures +from kfp.dsl.component_decorator import component class TestComponentDecorator(unittest.TestCase): diff --git a/sdk/python/kfp/components/component_factory.py b/sdk/python/kfp/dsl/component_factory.py similarity index 95% rename from sdk/python/kfp/components/component_factory.py rename to sdk/python/kfp/dsl/component_factory.py index 6585d0a813..99d34f7828 100644 --- a/sdk/python/kfp/components/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -21,17 +21,17 @@ import warnings import docstring_parser -from kfp.components import container_component -from kfp.components import container_component_artifact_channel -from kfp.components import graph_component -from kfp.components import placeholders -from kfp.components import python_component -from kfp.components import structures -from kfp.components import task_final_status -from kfp.components.types import artifact_types -from kfp.components.types import custom_artifact_types -from kfp.components.types import type_annotations -from kfp.components.types import type_utils +from kfp.dsl import container_component_artifact_channel +from kfp.dsl import container_component_class +from kfp.dsl import graph_component +from kfp.dsl import placeholders +from kfp.dsl import python_component +from kfp.dsl import structures +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import custom_artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils _DEFAULT_BASE_IMAGE = 'python:3.7' @@ -195,7 +195,7 @@ def extract_component_interface( passing_style = type_annotations.get_io_artifact_annotation( parameter_type) - # parameter_type is a type like typing_extensions.Annotated[kfp.components.types.artifact_types.Artifact, ] OR typing_extensions.Annotated[typing.List[kfp.components.types.artifact_types.Artifact], ] + # parameter_type is a type like typing_extensions.Annotated[kfp.dsl.types.artifact_types.Artifact, ] OR typing_extensions.Annotated[typing.List[kfp.dsl.types.artifact_types.Artifact], ] is_artifact_list = type_annotations.is_list_of_artifacts( parameter_type.__origin__) @@ -421,7 +421,7 @@ def _get_command_and_args_for_lightweight_component( textwrap.dedent('''\ program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main \ + python3 -m kfp.dsl.executor_main \ --component_module_path \ "$program_path/ephemeral_component.py" \ "$@" @@ -444,7 +444,7 @@ def _get_command_and_args_for_containerized_component( command = [ 'python3', '-m', - 'kfp.components.executor_main', + 'kfp.dsl.executor_main', ] args = [ @@ -569,7 +569,7 @@ def make_input_for_parameterized_container_component_function( def create_container_component_from_func( - func: Callable) -> container_component.ContainerComponent: + func: Callable) -> container_component_class.ContainerComponent: """Implementation for the @container_component decorator. The decorator is defined under container_component_decorator.py. See @@ -593,7 +593,7 @@ def create_container_component_from_func( component_spec.implementation = structures.Implementation( container_spec_implementation) component_spec._validate_placeholders() - return container_component.ContainerComponent(component_spec, func) + return container_component_class.ContainerComponent(component_spec, func) def create_graph_component_from_func( diff --git a/sdk/python/kfp/components/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py similarity index 95% rename from sdk/python/kfp/components/component_factory_test.py rename to sdk/python/kfp/dsl/component_factory_test.py index 6cfd5b5c8d..8f935ae3f0 100644 --- a/sdk/python/kfp/components/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -16,14 +16,14 @@ import unittest from kfp import dsl -from kfp.components import component_factory -from kfp.components import structures -from kfp.components.component_decorator import component -from kfp.components.types.artifact_types import Artifact -from kfp.components.types.artifact_types import Model -from kfp.components.types.type_annotations import OutputPath +from kfp.dsl import component_factory from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl import structures +from kfp.dsl.component_decorator import component +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.type_annotations import OutputPath class TestGetPackagesToInstallCommand(unittest.TestCase): diff --git a/sdk/python/kfp/components/constants.py b/sdk/python/kfp/dsl/constants.py similarity index 100% rename from sdk/python/kfp/components/constants.py rename to sdk/python/kfp/dsl/constants.py diff --git a/sdk/python/kfp/components/container_component_artifact_channel.py b/sdk/python/kfp/dsl/container_component_artifact_channel.py similarity index 80% rename from sdk/python/kfp/components/container_component_artifact_channel.py rename to sdk/python/kfp/dsl/container_component_artifact_channel.py index 84e0f984ec..322752295f 100644 --- a/sdk/python/kfp/components/container_component_artifact_channel.py +++ b/sdk/python/kfp/dsl/container_component_artifact_channel.py @@ -14,8 +14,6 @@ from typing import Union -from kfp.components import placeholders - class ContainerComponentArtifactChannel: """A class for passing in placeholders into container_component decorated @@ -25,13 +23,10 @@ def __init__(self, io_type: str, var_name: str): self._io_type = io_type self._var_name = var_name - def __getattr__( - self, _name: str - ) -> Union[placeholders.InputUriPlaceholder, placeholders - .InputPathPlaceholder, placeholders.OutputUriPlaceholder, - placeholders.OutputPathPlaceholder, - placeholders.InputMetadataPlaceholder, - placeholders.OutputMetadataPlaceholder]: + def __getattr__(self, _name: str) -> Union['placeholders.Placeholder']: + # aviod circular imports + from kfp.dsl import placeholders + attr_to_placeholder_dict = { 'uri': { 'input': placeholders.InputUriPlaceholder, diff --git a/sdk/python/kfp/components/container_component_artifact_channel_test.py b/sdk/python/kfp/dsl/container_component_artifact_channel_test.py similarity index 94% rename from sdk/python/kfp/components/container_component_artifact_channel_test.py rename to sdk/python/kfp/dsl/container_component_artifact_channel_test.py index 35eaeafdd6..fd04a027f0 100644 --- a/sdk/python/kfp/components/container_component_artifact_channel_test.py +++ b/sdk/python/kfp/dsl/container_component_artifact_channel_test.py @@ -14,8 +14,8 @@ import unittest -from kfp.components import container_component_artifact_channel -from kfp.components import placeholders +from kfp.dsl import container_component_artifact_channel +from kfp.dsl import placeholders class TestContainerComponentArtifactChannel(unittest.TestCase): diff --git a/sdk/python/kfp/components/container_component.py b/sdk/python/kfp/dsl/container_component_class.py similarity index 94% rename from sdk/python/kfp/components/container_component.py rename to sdk/python/kfp/dsl/container_component_class.py index 6c83ac3d06..7cd928036a 100644 --- a/sdk/python/kfp/components/container_component.py +++ b/sdk/python/kfp/dsl/container_component_class.py @@ -15,8 +15,8 @@ from typing import Callable -from kfp.components import base_component -from kfp.components import structures +from kfp.dsl import base_component +from kfp.dsl import structures class ContainerComponent(base_component.BaseComponent): diff --git a/sdk/python/kfp/components/container_component_decorator.py b/sdk/python/kfp/dsl/container_component_decorator.py similarity index 91% rename from sdk/python/kfp/components/container_component_decorator.py rename to sdk/python/kfp/dsl/container_component_decorator.py index 7d8901c97c..6ce43094ff 100644 --- a/sdk/python/kfp/components/container_component_decorator.py +++ b/sdk/python/kfp/dsl/container_component_decorator.py @@ -14,12 +14,12 @@ from typing import Callable -from kfp.components import component_factory -from kfp.components import container_component +from kfp.dsl import component_factory +from kfp.dsl import container_component_class def container_component( - func: Callable) -> container_component.ContainerComponent: + func: Callable) -> container_component_class.ContainerComponent: """Decorator for container-based components in KFP v2. Args: diff --git a/sdk/python/kfp/components/container_component_decorator_test.py b/sdk/python/kfp/dsl/container_component_decorator_test.py similarity index 94% rename from sdk/python/kfp/components/container_component_decorator_test.py rename to sdk/python/kfp/dsl/container_component_decorator_test.py index a234b16d22..d49253b1e8 100644 --- a/sdk/python/kfp/components/container_component_decorator_test.py +++ b/sdk/python/kfp/dsl/container_component_decorator_test.py @@ -16,8 +16,8 @@ import unittest from kfp import dsl -from kfp.components import container_component from kfp.dsl import Artifact +from kfp.dsl import container_component_class from kfp.dsl import Input from kfp.dsl import Output @@ -36,7 +36,7 @@ def hello_world() -> dsl.ContainerSpec: ) self.assertIsInstance(hello_world, - container_component.ContainerComponent) + container_component_class.ContainerComponent) self.assertIsNone(hello_world.component_spec.inputs) def test_func_with_simple_io(self): @@ -52,7 +52,7 @@ def hello_world_io( args=['--text', text, '--output_path', text_output_path]) self.assertIsInstance(hello_world_io, - container_component.ContainerComponent) + container_component_class.ContainerComponent) def test_func_with_artifact_io(self): @@ -78,7 +78,7 @@ def container_comp_with_artifacts( ]) self.assertIsInstance(container_comp_with_artifacts, - container_component.ContainerComponent) + container_component_class.ContainerComponent) class TestInputValuePlaceholderIrTypeHack(unittest.TestCase): diff --git a/sdk/python/kfp/components/executor.py b/sdk/python/kfp/dsl/executor.py similarity index 98% rename from sdk/python/kfp/components/executor.py rename to sdk/python/kfp/dsl/executor.py index 0f43790999..db8a8a89bd 100644 --- a/sdk/python/kfp/components/executor.py +++ b/sdk/python/kfp/dsl/executor.py @@ -16,10 +16,10 @@ import os from typing import Any, Callable, Dict, List, Optional, Union -from kfp.components import python_component -from kfp.components import task_final_status -from kfp.components.types import artifact_types -from kfp.components.types import type_annotations +from kfp.dsl import python_component +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations class Executor(): diff --git a/sdk/python/kfp/components/executor_main.py b/sdk/python/kfp/dsl/executor_main.py similarity index 96% rename from sdk/python/kfp/components/executor_main.py rename to sdk/python/kfp/dsl/executor_main.py index 96943b0ca4..1836ea5889 100644 --- a/sdk/python/kfp/components/executor_main.py +++ b/sdk/python/kfp/dsl/executor_main.py @@ -17,9 +17,9 @@ import os import sys -from kfp.components import executor as component_executor -from kfp.components import kfp_config -from kfp.components import utils +from kfp.dsl import executor as component_executor +from kfp.dsl import kfp_config +from kfp.dsl import utils def _setup_logging(): diff --git a/sdk/python/kfp/components/executor_test.py b/sdk/python/kfp/dsl/executor_test.py similarity index 98% rename from sdk/python/kfp/components/executor_test.py rename to sdk/python/kfp/dsl/executor_test.py index 87625cc3b0..351e68a17a 100644 --- a/sdk/python/kfp/components/executor_test.py +++ b/sdk/python/kfp/dsl/executor_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.executor.""" +"""Tests for kfp.dsl.executor.""" import json import os @@ -21,17 +21,17 @@ from unittest import mock from absl.testing import parameterized -from kfp.components import executor -from kfp.components.task_final_status import PipelineTaskFinalStatus -from kfp.components.types import artifact_types -from kfp.components.types.artifact_types import Artifact -from kfp.components.types.artifact_types import Dataset -from kfp.components.types.artifact_types import Metrics -from kfp.components.types.artifact_types import Model -from kfp.components.types.type_annotations import InputPath -from kfp.components.types.type_annotations import OutputPath +from kfp.dsl import executor from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl.task_final_status import PipelineTaskFinalStatus +from kfp.dsl.types import artifact_types +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.artifact_types import Metrics +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputPath class ExecutorTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/for_loop.py b/sdk/python/kfp/dsl/for_loop.py similarity index 99% rename from sdk/python/kfp/components/for_loop.py rename to sdk/python/kfp/dsl/for_loop.py index f838dff257..5381576631 100644 --- a/sdk/python/kfp/components/for_loop.py +++ b/sdk/python/kfp/dsl/for_loop.py @@ -16,7 +16,7 @@ import re from typing import Any, Dict, List, Optional, Union -from kfp.components import pipeline_channel +from kfp.dsl import pipeline_channel ItemList = List[Union[int, float, str, Dict[str, Any]]] diff --git a/sdk/python/kfp/components/for_loop_test.py b/sdk/python/kfp/dsl/for_loop_test.py similarity index 98% rename from sdk/python/kfp/components/for_loop_test.py rename to sdk/python/kfp/dsl/for_loop_test.py index b1e3549c94..7d1559c87b 100644 --- a/sdk/python/kfp/components/for_loop_test.py +++ b/sdk/python/kfp/dsl/for_loop_test.py @@ -15,8 +15,8 @@ import unittest from absl.testing import parameterized -from kfp.components import for_loop -from kfp.components import pipeline_channel +from kfp.dsl import for_loop +from kfp.dsl import pipeline_channel class ForLoopTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/graph_component.py b/sdk/python/kfp/dsl/graph_component.py similarity index 95% rename from sdk/python/kfp/components/graph_component.py rename to sdk/python/kfp/dsl/graph_component.py index 181ae809a8..2b09927dfa 100644 --- a/sdk/python/kfp/components/graph_component.py +++ b/sdk/python/kfp/dsl/graph_component.py @@ -18,10 +18,10 @@ import uuid from kfp.compiler import pipeline_spec_builder as builder -from kfp.components import base_component -from kfp.components import pipeline_channel -from kfp.components import pipeline_context -from kfp.components import structures +from kfp.dsl import base_component +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import structures from kfp.pipeline_spec import pipeline_spec_pb2 diff --git a/sdk/python/kfp/components/importer_component.py b/sdk/python/kfp/dsl/importer_component.py similarity index 92% rename from sdk/python/kfp/components/importer_component.py rename to sdk/python/kfp/dsl/importer_component.py index ac6e14614a..168c7c6f73 100644 --- a/sdk/python/kfp/components/importer_component.py +++ b/sdk/python/kfp/dsl/importer_component.py @@ -13,8 +13,8 @@ # limitations under the License. """Importer-based component.""" -from kfp.components import base_component -from kfp.components import structures +from kfp.dsl import base_component +from kfp.dsl import structures class ImporterComponent(base_component.BaseComponent): diff --git a/sdk/python/kfp/components/importer_node.py b/sdk/python/kfp/dsl/importer_node.py similarity index 93% rename from sdk/python/kfp/components/importer_node.py rename to sdk/python/kfp/dsl/importer_node.py index d62edf7917..2a3e676daa 100644 --- a/sdk/python/kfp/components/importer_node.py +++ b/sdk/python/kfp/dsl/importer_node.py @@ -15,14 +15,14 @@ from typing import Any, Dict, Mapping, Optional, Type, Union -from kfp.components import importer_component -from kfp.components import pipeline_channel -from kfp.components import pipeline_task -from kfp.components import placeholders -from kfp.components import structures -from kfp.components import utils -from kfp.components.types import artifact_types -from kfp.components.types import type_utils +from kfp.dsl import importer_component +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_task +from kfp.dsl import placeholders +from kfp.dsl import structures +from kfp.dsl import utils +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_utils URI_KEY = 'uri' OUTPUT_KEY = 'artifact' diff --git a/sdk/python/kfp/components/importer_node_test.py b/sdk/python/kfp/dsl/importer_node_test.py similarity index 98% rename from sdk/python/kfp/components/importer_node_test.py rename to sdk/python/kfp/dsl/importer_node_test.py index d6066b5a4d..0351382b10 100644 --- a/sdk/python/kfp/components/importer_node_test.py +++ b/sdk/python/kfp/dsl/importer_node_test.py @@ -14,8 +14,8 @@ import unittest from kfp import dsl -from kfp.components import importer_node -from kfp.components.types.artifact_types import Dataset +from kfp.dsl import importer_node +from kfp.dsl.types.artifact_types import Dataset class TestImporterSupportsDynamicMetadata(unittest.TestCase): diff --git a/sdk/python/kfp/components/kfp_config.py b/sdk/python/kfp/dsl/kfp_config.py similarity index 100% rename from sdk/python/kfp/components/kfp_config.py rename to sdk/python/kfp/dsl/kfp_config.py diff --git a/sdk/python/kfp/components/pipeline_channel.py b/sdk/python/kfp/dsl/pipeline_channel.py similarity index 99% rename from sdk/python/kfp/components/pipeline_channel.py rename to sdk/python/kfp/dsl/pipeline_channel.py index 26ad27eae3..66616103fb 100644 --- a/sdk/python/kfp/components/pipeline_channel.py +++ b/sdk/python/kfp/dsl/pipeline_channel.py @@ -20,7 +20,7 @@ import re from typing import Dict, List, Optional, Union -from kfp.components.types import type_utils +from kfp.dsl.types import type_utils @dataclasses.dataclass @@ -97,7 +97,7 @@ def __init__( # so that serialization and unserialization remain consistent # (i.e. None => '' => None) self.task_name = task_name or None - from kfp.components import pipeline_context + from kfp.dsl import pipeline_context default_pipeline = pipeline_context.Pipeline.get_default_pipeline() if self.task_name is not None and default_pipeline is not None and default_pipeline.tasks: diff --git a/sdk/python/kfp/components/pipeline_channel_test.py b/sdk/python/kfp/dsl/pipeline_channel_test.py similarity index 98% rename from sdk/python/kfp/components/pipeline_channel_test.py rename to sdk/python/kfp/dsl/pipeline_channel_test.py index 060fe4ad23..4de0e84a25 100644 --- a/sdk/python/kfp/components/pipeline_channel_test.py +++ b/sdk/python/kfp/dsl/pipeline_channel_test.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.pipeline_channel.""" +"""Tests for kfp.dsl.pipeline_channel.""" import unittest from absl.testing import parameterized from kfp import dsl -from kfp.components import pipeline_channel +from kfp.dsl import pipeline_channel class PipelineChannelTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/pipeline_context.py b/sdk/python/kfp/dsl/pipeline_context.py similarity index 97% rename from sdk/python/kfp/components/pipeline_context.py rename to sdk/python/kfp/dsl/pipeline_context.py index a41e401da6..c1304c39ba 100644 --- a/sdk/python/kfp/components/pipeline_context.py +++ b/sdk/python/kfp/dsl/pipeline_context.py @@ -16,10 +16,10 @@ import functools from typing import Callable, Optional -from kfp.components import component_factory -from kfp.components import pipeline_task -from kfp.components import tasks_group -from kfp.components import utils +from kfp.dsl import component_factory +from kfp.dsl import pipeline_task +from kfp.dsl import tasks_group +from kfp.dsl import utils def pipeline(func: Optional[Callable] = None, diff --git a/sdk/python/kfp/components/pipeline_task.py b/sdk/python/kfp/dsl/pipeline_task.py similarity index 97% rename from sdk/python/kfp/components/pipeline_task.py rename to sdk/python/kfp/dsl/pipeline_task.py index f1e47bbaf1..f35cdd752b 100644 --- a/sdk/python/kfp/components/pipeline_task.py +++ b/sdk/python/kfp/dsl/pipeline_task.py @@ -20,12 +20,12 @@ from typing import Any, Dict, List, Mapping, Optional, Union import warnings -from kfp.components import constants -from kfp.components import pipeline_channel -from kfp.components import placeholders -from kfp.components import structures -from kfp.components import utils -from kfp.components.types import type_utils +from kfp.dsl import constants +from kfp.dsl import pipeline_channel +from kfp.dsl import placeholders +from kfp.dsl import structures +from kfp.dsl import utils +from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 _register_task_handler = lambda task: utils.maybe_rename_for_k8s( @@ -69,7 +69,7 @@ def __init__( ): """Initilizes a PipelineTask instance.""" # import within __init__ to avoid circular import - from kfp.components.tasks_group import TasksGroup + from kfp.dsl.tasks_group import TasksGroup self.parent_task_group: Union[None, TasksGroup] = None args = args or {} @@ -617,7 +617,7 @@ def my_pipeline(text: str = 'message'): return self -# TODO: this function should ideally be in the function kfp.components.structures.check_placeholder_references_valid_io_name, which does something similar, but this causes the exception to be raised at component definition time, rather than compile time. This would break tests that load v1 component YAML, even though that YAML is invalid. +# TODO: this function should ideally be in the function kfp.dsl.structures.check_placeholder_references_valid_io_name, which does something similar, but this causes the exception to be raised at component definition time, rather than compile time. This would break tests that load v1 component YAML, even though that YAML is invalid. def check_primitive_placeholder_is_used_for_correct_io_type( inputs_dict: Dict[str, structures.InputSpec], outputs_dict: Dict[str, structures.OutputSpec], diff --git a/sdk/python/kfp/components/pipeline_task_test.py b/sdk/python/kfp/dsl/pipeline_task_test.py similarity index 98% rename from sdk/python/kfp/components/pipeline_task_test.py rename to sdk/python/kfp/dsl/pipeline_task_test.py index 128a83a349..6e7443fc1a 100644 --- a/sdk/python/kfp/components/pipeline_task_test.py +++ b/sdk/python/kfp/dsl/pipeline_task_test.py @@ -11,16 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.pipeline_task.""" +"""Tests for kfp.dsl.pipeline_task.""" import textwrap import unittest from absl.testing import parameterized from kfp import dsl -from kfp.components import pipeline_task -from kfp.components import placeholders -from kfp.components import structures +from kfp.dsl import pipeline_task +from kfp.dsl import placeholders +from kfp.dsl import structures V2_YAML = textwrap.dedent("""\ components: diff --git a/sdk/python/kfp/components/placeholders.py b/sdk/python/kfp/dsl/placeholders.py similarity index 99% rename from sdk/python/kfp/components/placeholders.py rename to sdk/python/kfp/dsl/placeholders.py index 701f463ada..39a2617cff 100644 --- a/sdk/python/kfp/components/placeholders.py +++ b/sdk/python/kfp/dsl/placeholders.py @@ -18,8 +18,8 @@ import json from typing import Any, Dict, List, Optional, Union -from kfp.components import utils -from kfp.components.types import type_utils +from kfp.dsl import utils +from kfp.dsl.types import type_utils class Placeholder(abc.ABC): diff --git a/sdk/python/kfp/components/placeholders_test.py b/sdk/python/kfp/dsl/placeholders_test.py similarity index 99% rename from sdk/python/kfp/components/placeholders_test.py rename to sdk/python/kfp/dsl/placeholders_test.py index 9c89660621..da89f4cfa8 100644 --- a/sdk/python/kfp/components/placeholders_test.py +++ b/sdk/python/kfp/dsl/placeholders_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Contains tests for kfp.components.placeholders.""" +"""Contains tests for kfp.dsl.placeholders.""" import os import tempfile from typing import Any, List @@ -19,11 +19,11 @@ from absl.testing import parameterized from kfp import compiler from kfp import dsl -from kfp.components import placeholders from kfp.dsl import Artifact from kfp.dsl import Dataset from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl import placeholders class TestExecutorInputPlaceholder(parameterized.TestCase): diff --git a/sdk/python/kfp/components/python_component.py b/sdk/python/kfp/dsl/python_component.py similarity index 92% rename from sdk/python/kfp/components/python_component.py rename to sdk/python/kfp/dsl/python_component.py index 8106f2e4bf..faa4c44740 100644 --- a/sdk/python/kfp/components/python_component.py +++ b/sdk/python/kfp/dsl/python_component.py @@ -15,11 +15,11 @@ from typing import Callable -from kfp import components -from kfp.components import structures +from kfp.dsl import base_component +from kfp.dsl import structures -class PythonComponent(components.BaseComponent): +class PythonComponent(base_component.BaseComponent): """A component defined via Python function. **Note:** ``PythonComponent`` is not intended to be used to construct components directly. Use ``@kfp.dsl.component`` instead. diff --git a/sdk/python/kfp/components/structures.py b/sdk/python/kfp/dsl/structures.py similarity index 98% rename from sdk/python/kfp/components/structures.py rename to sdk/python/kfp/dsl/structures.py index a39cff4fee..24486e730d 100644 --- a/sdk/python/kfp/components/structures.py +++ b/sdk/python/kfp/dsl/structures.py @@ -23,15 +23,15 @@ from google.protobuf import json_format import kfp -from kfp.components import placeholders -from kfp.components import utils -from kfp.components import v1_components -from kfp.components import v1_structures -from kfp.components.container_component_artifact_channel import \ +from kfp.dsl import placeholders +from kfp.dsl import utils +from kfp.dsl import v1_components +from kfp.dsl import v1_structures +from kfp.dsl.container_component_artifact_channel import \ ContainerComponentArtifactChannel -from kfp.components.types import artifact_types -from kfp.components.types import type_annotations -from kfp.components.types import type_utils +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 import yaml @@ -912,9 +912,9 @@ def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: # import here to aviod circular module dependency from kfp.compiler import compiler_utils from kfp.compiler import pipeline_spec_builder as builder - from kfp.components import pipeline_channel - from kfp.components import pipeline_task - from kfp.components import tasks_group + from kfp.dsl import pipeline_channel + from kfp.dsl import pipeline_task + from kfp.dsl import tasks_group args_dict = {} pipeline_inputs = self.inputs or {} diff --git a/sdk/python/kfp/components/structures_test.py b/sdk/python/kfp/dsl/structures_test.py similarity index 99% rename from sdk/python/kfp/components/structures_test.py rename to sdk/python/kfp/dsl/structures_test.py index 6bacae4e77..ad6274d931 100644 --- a/sdk/python/kfp/components/structures_test.py +++ b/sdk/python/kfp/dsl/structures_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.structures.""" +"""Tests for kfp.dsl.structures.""" import os import tempfile @@ -22,9 +22,9 @@ from kfp import compiler from kfp import components from kfp import dsl -from kfp.components import component_factory -from kfp.components import placeholders -from kfp.components import structures +from kfp.dsl import component_factory +from kfp.dsl import placeholders +from kfp.dsl import structures V1_YAML_IF_PLACEHOLDER = textwrap.dedent("""\ implementation: diff --git a/sdk/python/kfp/components/task_final_status.py b/sdk/python/kfp/dsl/task_final_status.py similarity index 100% rename from sdk/python/kfp/components/task_final_status.py rename to sdk/python/kfp/dsl/task_final_status.py diff --git a/sdk/python/kfp/components/tasks_group.py b/sdk/python/kfp/dsl/tasks_group.py similarity index 97% rename from sdk/python/kfp/components/tasks_group.py rename to sdk/python/kfp/dsl/tasks_group.py index c5390c6899..42d1446a9d 100644 --- a/sdk/python/kfp/components/tasks_group.py +++ b/sdk/python/kfp/dsl/tasks_group.py @@ -16,10 +16,10 @@ import enum from typing import Optional, Union -from kfp.components import for_loop -from kfp.components import pipeline_channel -from kfp.components import pipeline_context -from kfp.components import pipeline_task +from kfp.dsl import for_loop +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import pipeline_task class TasksGroupType(str, enum.Enum): diff --git a/sdk/python/kfp/components/tasks_group_test.py b/sdk/python/kfp/dsl/tasks_group_test.py similarity index 95% rename from sdk/python/kfp/components/tasks_group_test.py rename to sdk/python/kfp/dsl/tasks_group_test.py index c64346b245..09ba5cdbc3 100644 --- a/sdk/python/kfp/components/tasks_group_test.py +++ b/sdk/python/kfp/dsl/tasks_group_test.py @@ -13,9 +13,9 @@ # limitations under the License. from absl.testing import parameterized -from kfp.components import for_loop -from kfp.components import pipeline_context -from kfp.components import tasks_group +from kfp.dsl import for_loop +from kfp.dsl import pipeline_context +from kfp.dsl import tasks_group class ParallelForTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/types/__init__.py b/sdk/python/kfp/dsl/types/__init__.py similarity index 100% rename from sdk/python/kfp/components/types/__init__.py rename to sdk/python/kfp/dsl/types/__init__.py diff --git a/sdk/python/kfp/components/types/artifact_types.py b/sdk/python/kfp/dsl/types/artifact_types.py similarity index 100% rename from sdk/python/kfp/components/types/artifact_types.py rename to sdk/python/kfp/dsl/types/artifact_types.py diff --git a/sdk/python/kfp/components/types/artifact_types_test.py b/sdk/python/kfp/dsl/types/artifact_types_test.py similarity index 98% rename from sdk/python/kfp/components/types/artifact_types_test.py rename to sdk/python/kfp/dsl/types/artifact_types_test.py index 517d5f9b4e..917ad95a45 100644 --- a/sdk/python/kfp/components/types/artifact_types_test.py +++ b/sdk/python/kfp/dsl/types/artifact_types_test.py @@ -18,7 +18,7 @@ import unittest from absl.testing import parameterized -from kfp.components.types import artifact_types +from kfp.dsl.types import artifact_types class ArtifactsTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/types/custom_artifact_types.py b/sdk/python/kfp/dsl/types/custom_artifact_types.py similarity index 98% rename from sdk/python/kfp/components/types/custom_artifact_types.py rename to sdk/python/kfp/dsl/types/custom_artifact_types.py index 7174c86aa8..484dfa6508 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types.py +++ b/sdk/python/kfp/dsl/types/custom_artifact_types.py @@ -16,9 +16,9 @@ import inspect from typing import Callable, Dict, List, Union -from kfp.components import component_factory -from kfp.components.types import type_annotations -from kfp.components.types import type_utils +from kfp.dsl import component_factory +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils RETURN_PREFIX = 'return-' diff --git a/sdk/python/kfp/components/types/custom_artifact_types_test.py b/sdk/python/kfp/dsl/types/custom_artifact_types_test.py similarity index 96% rename from sdk/python/kfp/components/types/custom_artifact_types_test.py rename to sdk/python/kfp/dsl/types/custom_artifact_types_test.py index ed856db23b..4a9c279df6 100644 --- a/sdk/python/kfp/components/types/custom_artifact_types_test.py +++ b/sdk/python/kfp/dsl/types/custom_artifact_types_test.py @@ -24,14 +24,14 @@ from absl.testing import parameterized import kfp from kfp import dsl -from kfp.components.types import artifact_types -from kfp.components.types import custom_artifact_types -from kfp.components.types.artifact_types import Artifact -from kfp.components.types.artifact_types import Dataset -from kfp.components.types.type_annotations import InputPath -from kfp.components.types.type_annotations import OutputPath from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl.types import artifact_types +from kfp.dsl.types import custom_artifact_types +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputPath Alias = Artifact artifact_types_alias = artifact_types @@ -219,9 +219,9 @@ def func() -> typing.NamedTuple('Outputs', [ class TestGetFullQualnameForArtifact(_TestCaseWithThirdPartyPackage): # only gets called on artifacts, so don't need to test on all types @parameterized.parameters([ - (Alias, 'kfp.components.types.artifact_types.Artifact'), - (Artifact, 'kfp.components.types.artifact_types.Artifact'), - (Dataset, 'kfp.components.types.artifact_types.Dataset'), + (Alias, 'kfp.dsl.types.artifact_types.Artifact'), + (Artifact, 'kfp.dsl.types.artifact_types.Artifact'), + (Dataset, 'kfp.dsl.types.artifact_types.Dataset'), ]) def test(self, obj: Any, expected_qualname: str): self.assertEqual( diff --git a/sdk/python/kfp/components/types/test_data/expected_bulk_loaded_confusion_matrix.json b/sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json similarity index 100% rename from sdk/python/kfp/components/types/test_data/expected_bulk_loaded_confusion_matrix.json rename to sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json diff --git a/sdk/python/kfp/components/types/test_data/expected_confusion_matrix.json b/sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json similarity index 100% rename from sdk/python/kfp/components/types/test_data/expected_confusion_matrix.json rename to sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json diff --git a/sdk/python/kfp/components/types/test_data/expected_io_types_bulk_load_classification_metrics.json b/sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json similarity index 100% rename from sdk/python/kfp/components/types/test_data/expected_io_types_bulk_load_classification_metrics.json rename to sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json diff --git a/sdk/python/kfp/components/types/test_data/expected_io_types_classification_metrics.json b/sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json similarity index 100% rename from sdk/python/kfp/components/types/test_data/expected_io_types_classification_metrics.json rename to sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json diff --git a/sdk/python/kfp/components/types/type_annotations.py b/sdk/python/kfp/dsl/types/type_annotations.py similarity index 98% rename from sdk/python/kfp/components/types/type_annotations.py rename to sdk/python/kfp/dsl/types/type_annotations.py index 138a98198f..aa39d2002e 100644 --- a/sdk/python/kfp/components/types/type_annotations.py +++ b/sdk/python/kfp/dsl/types/type_annotations.py @@ -19,9 +19,9 @@ import re from typing import List, Type, TypeVar, Union -from kfp.components.types import artifact_types -from kfp.components.types import type_annotations -from kfp.components.types import type_utils +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils class OutputPath: diff --git a/sdk/python/kfp/components/types/type_annotations_test.py b/sdk/python/kfp/dsl/types/type_annotations_test.py similarity index 94% rename from sdk/python/kfp/components/types/type_annotations_test.py rename to sdk/python/kfp/dsl/types/type_annotations_test.py index f514cdd2fa..099208c1b1 100644 --- a/sdk/python/kfp/components/types/type_annotations_test.py +++ b/sdk/python/kfp/dsl/types/type_annotations_test.py @@ -11,21 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.types.type_annotations.""" +"""Tests for kfp.dsl.types.type_annotations.""" from typing import Any, Dict, List, Optional import unittest from absl.testing import parameterized -from kfp.components.types import artifact_types -from kfp.components.types import type_annotations -from kfp.components.types.artifact_types import Model -from kfp.components.types.type_annotations import InputAnnotation -from kfp.components.types.type_annotations import InputPath -from kfp.components.types.type_annotations import OutputAnnotation -from kfp.components.types.type_annotations import OutputPath from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.type_annotations import InputAnnotation +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputAnnotation +from kfp.dsl.types.type_annotations import OutputPath class AnnotationsTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/types/type_utils.py b/sdk/python/kfp/dsl/types/type_utils.py similarity index 98% rename from sdk/python/kfp/components/types/type_utils.py rename to sdk/python/kfp/dsl/types/type_utils.py index 92b9328fab..bd724742c5 100644 --- a/sdk/python/kfp/components/types/type_utils.py +++ b/sdk/python/kfp/dsl/types/type_utils.py @@ -20,11 +20,10 @@ import warnings import kfp -from kfp.components import pipeline_channel -from kfp.components import structures -from kfp.components import task_final_status -from kfp.components.types import artifact_types -from kfp.components.types import type_annotations +from kfp.dsl import structures +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations from kfp.pipeline_spec import pipeline_spec_pb2 DEFAULT_ARTIFACT_SCHEMA_VERSION = '0.0.1' @@ -231,9 +230,9 @@ def _get_type_string_from_component_argument( argument_value: Union['pipeline_channel.PipelineChannel', str, bool, int, float, dict, list] ) -> str: - # argument is a PipelineChannel - if isinstance(argument_value, - kfp.components.pipeline_channel.PipelineChannel): + # avoid circular imports + from kfp.dsl import pipeline_channel + if isinstance(argument_value, pipeline_channel.PipelineChannel): return argument_value.channel_type # argument is a constant diff --git a/sdk/python/kfp/components/types/type_utils_test.py b/sdk/python/kfp/dsl/types/type_utils_test.py similarity index 98% rename from sdk/python/kfp/components/types/type_utils_test.py rename to sdk/python/kfp/dsl/types/type_utils_test.py index 6c36483b11..ee2cf16180 100644 --- a/sdk/python/kfp/components/types/type_utils_test.py +++ b/sdk/python/kfp/dsl/types/type_utils_test.py @@ -21,16 +21,16 @@ from kfp import compiler from kfp import components from kfp import dsl -from kfp.components import base_component -from kfp.components import pipeline_channel -from kfp.components import structures -from kfp.components import yaml_component -from kfp.components.types import artifact_types -from kfp.components.types import type_utils -from kfp.components.types.type_utils import InconsistentTypeException +from kfp.dsl import base_component from kfp.dsl import Dataset from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl import pipeline_channel +from kfp.dsl import structures +from kfp.dsl import yaml_component +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_utils +from kfp.dsl.types.type_utils import InconsistentTypeException from kfp.pipeline_spec import pipeline_spec_pb2 as pb _PARAMETER_TYPES = [ diff --git a/sdk/python/kfp/components/utils.py b/sdk/python/kfp/dsl/utils.py similarity index 100% rename from sdk/python/kfp/components/utils.py rename to sdk/python/kfp/dsl/utils.py diff --git a/sdk/python/kfp/components/utils_test.py b/sdk/python/kfp/dsl/utils_test.py similarity index 98% rename from sdk/python/kfp/components/utils_test.py rename to sdk/python/kfp/dsl/utils_test.py index 1a3df52f07..a0be587fc0 100644 --- a/sdk/python/kfp/components/utils_test.py +++ b/sdk/python/kfp/dsl/utils_test.py @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for kfp.components.utils.""" +"""Tests for kfp.dsl.utils.""" import unittest from absl.testing import parameterized -from kfp.components import utils +from kfp.dsl import utils class UtilsTest(parameterized.TestCase): diff --git a/sdk/python/kfp/components/v1_components.py b/sdk/python/kfp/dsl/v1_components.py similarity index 97% rename from sdk/python/kfp/components/v1_components.py rename to sdk/python/kfp/dsl/v1_components.py index f276fa9702..9714d56eef 100644 --- a/sdk/python/kfp/components/v1_components.py +++ b/sdk/python/kfp/dsl/v1_components.py @@ -15,7 +15,7 @@ import hashlib import warnings -from kfp.components import v1_structures +from kfp.dsl import v1_structures import yaml diff --git a/sdk/python/kfp/components/v1_modelbase.py b/sdk/python/kfp/dsl/v1_modelbase.py similarity index 100% rename from sdk/python/kfp/components/v1_modelbase.py rename to sdk/python/kfp/dsl/v1_modelbase.py diff --git a/sdk/python/kfp/components/v1_structures.py b/sdk/python/kfp/dsl/v1_structures.py similarity index 99% rename from sdk/python/kfp/components/v1_structures.py rename to sdk/python/kfp/dsl/v1_structures.py index 90818f260c..661cef196f 100644 --- a/sdk/python/kfp/components/v1_structures.py +++ b/sdk/python/kfp/dsl/v1_structures.py @@ -15,10 +15,9 @@ from collections import OrderedDict from typing import Any, Dict, List, Mapping, Optional, Union +from kfp.dsl.v1_modelbase import ModelBase import yaml -from .v1_modelbase import ModelBase - PrimitiveTypes = Union[str, int, float, bool] PrimitiveTypesIncludingNone = Optional[PrimitiveTypes] diff --git a/sdk/python/kfp/dsl/yaml_component.py b/sdk/python/kfp/dsl/yaml_component.py new file mode 100644 index 0000000000..187fa533f2 --- /dev/null +++ b/sdk/python/kfp/dsl/yaml_component.py @@ -0,0 +1,54 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Component loaded from YAML.""" + +from google.protobuf import json_format +from kfp.dsl import base_component +from kfp.dsl import structures +from kfp.pipeline_spec import pipeline_spec_pb2 + + +class YamlComponent(base_component.BaseComponent): + """A component loaded from a YAML file. + + **Note:** ``YamlComponent`` is not intended to be used to construct components directly. Use ``kfp.components.load_component_from_*()`` instead. + + Attribute: + component_spec: Component definition. + component_yaml: The yaml string that this component is loaded from. + """ + + def __init__( + self, + component_spec: structures.ComponentSpec, + component_yaml: str, + ): + super().__init__(component_spec=component_spec) + self.component_yaml = component_yaml + + @property + def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Returns the pipeline spec of the component.""" + component_dict = structures.load_documents_from_yaml( + self.component_yaml)[0] + is_v1 = 'implementation' in set(component_dict.keys()) + if is_v1: + return self.component_spec.to_pipeline_spec() + else: + return json_format.ParseDict(component_dict, + pipeline_spec_pb2.PipelineSpec()) + + def execute(self, *args, **kwargs): + """Not implemented.""" + raise NotImplementedError diff --git a/sdk/python/test_data/components/add_numbers.yaml b/sdk/python/test_data/components/add_numbers.yaml index 33654302b6..5b5486da36 100644 --- a/sdk/python/test_data/components/add_numbers.yaml +++ b/sdk/python/test_data/components/add_numbers.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -40,7 +40,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -81,4 +81,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/component_with_metadata_fields.yaml b/sdk/python/test_data/components/component_with_metadata_fields.yaml index 16bdc539b1..61a41867cf 100644 --- a/sdk/python/test_data/components/component_with_metadata_fields.yaml +++ b/sdk/python/test_data/components/component_with_metadata_fields.yaml @@ -48,7 +48,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -56,7 +56,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -124,4 +124,4 @@ root: description: The concatenated string. parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/component_with_pip_install.yaml b/sdk/python/test_data/components/component_with_pip_install.yaml index d9b8cf7a13..4e4335a204 100644 --- a/sdk/python/test_data/components/component_with_pip_install.yaml +++ b/sdk/python/test_data/components/component_with_pip_install.yaml @@ -19,14 +19,14 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.0-beta.16' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -46,4 +46,4 @@ root: taskInfo: name: component-with-pip-install schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/component_with_task_final_status.yaml b/sdk/python/test_data/components/component_with_task_final_status.yaml index e0a4bda25d..ac138f7055 100644 --- a/sdk/python/test_data/components/component_with_task_final_status.yaml +++ b/sdk/python/test_data/components/component_with_task_final_status.yaml @@ -24,7 +24,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -32,7 +32,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -61,4 +61,4 @@ root: isOptional: true parameterType: TASK_FINAL_STATUS schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/concat_message.yaml b/sdk/python/test_data/components/concat_message.yaml index 4f0e97a25d..5dc62f9620 100644 --- a/sdk/python/test_data/components/concat_message.yaml +++ b/sdk/python/test_data/components/concat_message.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -40,7 +40,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -82,4 +82,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/dict_input.yaml b/sdk/python/test_data/components/dict_input.yaml index 1eeac295b0..977103a338 100644 --- a/sdk/python/test_data/components/dict_input.yaml +++ b/sdk/python/test_data/components/dict_input.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -31,7 +31,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -58,4 +58,4 @@ root: struct: parameterType: STRUCT schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/identity.yaml b/sdk/python/test_data/components/identity.yaml index 17b6b9afef..b8a4551a9f 100644 --- a/sdk/python/test_data/components/identity.yaml +++ b/sdk/python/test_data/components/identity.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -37,7 +37,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -74,4 +74,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/input_artifact.yaml b/sdk/python/test_data/components/input_artifact.yaml index 813ecc83e9..e029dd8161 100644 --- a/sdk/python/test_data/components/input_artifact.yaml +++ b/sdk/python/test_data/components/input_artifact.yaml @@ -25,7 +25,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -33,7 +33,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -63,4 +63,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/nested_return.yaml b/sdk/python/test_data/components/nested_return.yaml index e8224c18da..810215dcf3 100644 --- a/sdk/python/test_data/components/nested_return.yaml +++ b/sdk/python/test_data/components/nested_return.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -31,7 +31,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -61,4 +61,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/output_metrics.yaml b/sdk/python/test_data/components/output_metrics.yaml index 3b67e606fc..6a18a32d0b 100644 --- a/sdk/python/test_data/components/output_metrics.yaml +++ b/sdk/python/test_data/components/output_metrics.yaml @@ -27,7 +27,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -35,7 +35,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -77,4 +77,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/components/preprocess.yaml b/sdk/python/test_data/components/preprocess.yaml index 05fff3f4cf..03c46dbdac 100644 --- a/sdk/python/test_data/components/preprocess.yaml +++ b/sdk/python/test_data/components/preprocess.yaml @@ -56,7 +56,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.16'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -64,7 +64,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,4 +171,4 @@ root: output_parameter_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index aa8dd25973..f53f6ae05d 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -37,7 +37,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -68,4 +68,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index e8611f4712..59ebc83433 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -19,14 +19,14 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -45,4 +45,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index 6294901944..5bcf95a08e 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -134,7 +134,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -155,7 +155,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -163,7 +163,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -237,4 +237,4 @@ root: schemaVersion: 0.0.1 isOptional: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index a57f218574..abc9a2995d 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -78,7 +78,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -86,7 +86,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -130,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -138,7 +138,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -238,4 +238,4 @@ root: message: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index f933eb76b2..b7525f874c 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -89,7 +89,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -108,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -116,7 +116,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -135,7 +135,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -143,7 +143,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -162,7 +162,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -170,7 +170,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -273,4 +273,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml index ede6a1e7dd..ad5e32ce02 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml @@ -285,7 +285,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -293,7 +293,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -315,7 +315,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -323,7 +323,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -345,7 +345,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -353,7 +353,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -375,7 +375,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -383,7 +383,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -403,7 +403,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -411,7 +411,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -484,4 +484,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml index 3526dcfd73..55f5c8ae24 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml @@ -90,7 +90,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -98,7 +98,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -136,7 +136,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -144,7 +144,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -209,4 +209,4 @@ root: schemaVersion: 0.0.1 isArtifactList: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index 7a3cbbf6de..c2d8aae620 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -140,7 +140,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -158,7 +158,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -166,7 +166,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -225,4 +225,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index a318183a26..af4379d557 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -150,7 +150,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -158,12 +158,12 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef add(nums: List[int]) -> int:\n import itertools\n return\ - \ sum(itertools.chain(*nums))\n\n" + \ *\n\ndef add(nums: List[List[int]]) -> int:\n import itertools\n \ + \ return sum(itertools.chain(*nums))\n\n" image: python:3.7 exec-add-two-nums: container: @@ -177,7 +177,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -185,7 +185,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -211,7 +211,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -229,7 +229,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -237,7 +237,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -283,4 +283,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index c056ae02ba..b76f1ad5b6 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -232,7 +232,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -251,7 +251,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -277,7 +277,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -285,7 +285,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -303,7 +303,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -311,7 +311,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -330,7 +330,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -338,7 +338,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -357,7 +357,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -365,7 +365,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -383,7 +383,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -391,7 +391,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -477,4 +477,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index f6c8694c0b..9bc16ff5b2 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -75,7 +75,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -83,7 +83,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -111,7 +111,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -119,7 +119,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -180,4 +180,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index 1afb9904a0..18fc3aa052 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -214,7 +214,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -241,7 +241,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -267,7 +267,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.14'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -294,7 +294,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -356,4 +356,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.14 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index 15e7fa8601..42c88e3a68 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,7 +129,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -137,7 +137,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -156,7 +156,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -164,7 +164,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -183,7 +183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -191,7 +191,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -210,7 +210,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -218,7 +218,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -262,4 +262,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index c899cb25c1..9c8f5e0993 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -82,7 +82,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -109,7 +109,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -152,4 +152,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index b52048b78d..63ce9aceb0 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -169,7 +169,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -188,7 +188,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -196,7 +196,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -241,4 +241,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index b90af91c22..ab7d67cac7 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -160,7 +160,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -187,7 +187,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -214,7 +214,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -264,4 +264,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index 436a60bf0d..5eed3984a5 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -88,7 +88,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -96,7 +96,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -116,7 +116,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -124,7 +124,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -144,7 +144,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -152,7 +152,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -264,4 +264,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 29753c24ec..6443b13909 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -94,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -102,7 +102,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -181,4 +181,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index 8e9f5d74e9..789a1e975d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -41,7 +41,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -49,7 +49,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -79,4 +79,4 @@ root: taskInfo: name: print-env-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index 60f7602c31..b1c6091fe2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,7 +65,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -73,7 +73,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -92,7 +92,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -100,7 +100,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -119,7 +119,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -127,7 +127,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,4 +171,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index 646b956e21..6753ae29a0 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -57,14 +57,14 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.0-rc.2' 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -90,14 +90,14 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.0-rc.2' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -150,4 +150,4 @@ root: taskInfo: name: model-producer schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index b9ff81a39d..a7678237f6 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -127,7 +127,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -135,7 +135,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -167,7 +167,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -235,4 +235,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index 37bf1b9547..13999d852c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -232,7 +232,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -250,7 +250,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -258,7 +258,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -276,7 +276,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -284,7 +284,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -302,7 +302,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -310,7 +310,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -328,7 +328,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -336,7 +336,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -354,7 +354,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -362,7 +362,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -424,4 +424,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index fc214ebafe..fbf6dd967b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,7 +602,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -610,7 +610,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -631,7 +631,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -639,7 +639,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -660,7 +660,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -668,7 +668,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -688,7 +688,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -696,7 +696,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -714,7 +714,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -722,7 +722,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -741,7 +741,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -749,7 +749,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -768,7 +768,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -776,7 +776,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -795,7 +795,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -803,7 +803,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -822,7 +822,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -830,7 +830,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -849,7 +849,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -857,7 +857,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -876,7 +876,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -884,7 +884,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -903,7 +903,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -911,7 +911,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -930,7 +930,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -938,7 +938,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -1022,4 +1022,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 5f0da89e46..1aa009e344 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -68,7 +68,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -95,7 +95,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -103,7 +103,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -172,4 +172,4 @@ root: schemaVersion: 0.0.1 description: The final concatenated dataset. schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index 355a257b52..d2091815bf 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -68,7 +68,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -89,7 +89,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -97,7 +97,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -148,4 +148,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index db39ef1192..3bbec7526c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,7 +125,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -133,7 +133,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -160,7 +160,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -187,7 +187,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -214,7 +214,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -241,7 +241,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -260,7 +260,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -268,7 +268,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -287,7 +287,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -295,7 +295,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -389,4 +389,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index 2ffa5012bb..e81a303531 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,7 +147,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -155,7 +155,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -175,7 +175,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -183,7 +183,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -211,7 +211,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -239,7 +239,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -267,7 +267,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -294,7 +294,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -313,7 +313,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -321,7 +321,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -340,7 +340,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -348,7 +348,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -426,4 +426,4 @@ root: taskInfo: name: print-op-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index f062c20860..9b601893ed 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -145,7 +145,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -153,7 +153,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -172,7 +172,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -180,7 +180,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -199,7 +199,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -207,7 +207,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -256,4 +256,4 @@ root: isOptional: true parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index b5f0cbb1c9..1cba4dd0a2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,7 +104,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -112,7 +112,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -131,7 +131,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -139,7 +139,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,4 +203,4 @@ root: schemaTitle: system.Artifact schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index 48acb3b4ef..f1f3a5fa23 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -187,7 +187,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -205,7 +205,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -213,7 +213,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -239,7 +239,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -257,7 +257,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -265,7 +265,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -283,7 +283,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -291,7 +291,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -317,7 +317,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -357,4 +357,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index 3445124b9c..6f31bc7deb 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -82,7 +82,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -109,7 +109,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -128,7 +128,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -136,7 +136,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -201,4 +201,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index 9541824312..5a313c4ed4 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,7 +55,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -63,7 +63,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -89,7 +89,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -107,7 +107,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -115,7 +115,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -133,7 +133,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -141,7 +141,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -167,7 +167,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -254,4 +254,4 @@ root: taskInfo: name: print-op-5 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index bfff3d14c6..34c474435b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,7 +30,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -38,7 +38,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -78,4 +78,4 @@ root: isOptional: true parameterType: NUMBER_DOUBLE schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index 729de262d8..e53e19ac60 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -76,7 +76,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -99,7 +99,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -107,7 +107,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -134,7 +134,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -180,4 +180,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index cba9c92beb..385cb4a1d4 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,7 +35,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -43,7 +43,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -62,7 +62,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -70,7 +70,7 @@ deploymentSpec: printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -117,4 +117,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-rc.2 +sdkVersion: kfp-2.0.1 From ba322abb010d832539d57e90c34be0557f550252 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:17:53 -0700 Subject: [PATCH 018/253] chore(frontend): Refactor resource selector for sharable usage in both v1 and v2 (#9693) * Change the logic of ResourceSelector to make it is able to be used in both v1 and v2. Remove ResourceSelectorV2. * Remove snapshot. --- frontend/src/components/PipelinesDialog.tsx | 3 +- frontend/src/components/PipelinesDialogV2.tsx | 4 +- frontend/src/pages/NewRun.test.tsx | 20 ++ frontend/src/pages/NewRun.tsx | 20 +- frontend/src/pages/NewRunV2.tsx | 6 +- frontend/src/pages/ResourceSelector.test.tsx | 18 +- frontend/src/pages/ResourceSelector.tsx | 11 +- .../src/pages/ResourceSelectorV2.test.tsx | 180 ------------------ frontend/src/pages/ResourceSelectorV2.tsx | 157 --------------- .../ResourceSelectorV2.test.tsx.snap | 60 ------ 10 files changed, 45 insertions(+), 434 deletions(-) delete mode 100644 frontend/src/pages/ResourceSelectorV2.test.tsx delete mode 100644 frontend/src/pages/ResourceSelectorV2.tsx delete mode 100644 frontend/src/pages/__snapshots__/ResourceSelectorV2.test.tsx.snap diff --git a/frontend/src/components/PipelinesDialog.tsx b/frontend/src/components/PipelinesDialog.tsx index f56fe8a625..992bbb4474 100644 --- a/frontend/src/components/PipelinesDialog.tsx +++ b/frontend/src/components/PipelinesDialog.tsx @@ -72,7 +72,8 @@ const PipelinesDialog: React.FC = (props): JSX.Element | n columns={props.pipelineSelectorColumns} emptyMessage='No pipelines found. Upload a pipeline and then try again.' initialSortColumn={PipelineSortKeys.CREATED_AT} - selectionChanged={(selectedPipeline: ApiPipeline) => { + selectionChanged={async (selectedId: string) => { + const selectedPipeline = await Apis.pipelineServiceApi.getPipeline(selectedId); setUnconfirmedSelectedPipeline(selectedPipeline); }} /> diff --git a/frontend/src/components/PipelinesDialogV2.tsx b/frontend/src/components/PipelinesDialogV2.tsx index dd15074748..be87860a57 100644 --- a/frontend/src/components/PipelinesDialogV2.tsx +++ b/frontend/src/components/PipelinesDialogV2.tsx @@ -21,7 +21,7 @@ import DialogActions from '@material-ui/core/DialogActions'; import { classes } from 'typestyle'; import { padding, commonCss } from 'src/Css'; import DialogContent from '@material-ui/core/DialogContent'; -import ResourceSelectorV2 from 'src/pages/ResourceSelectorV2'; +import ResourceSelector from 'src/pages/ResourceSelector'; import { Apis, PipelineSortKeys } from 'src/lib/Apis'; import { Column } from './CustomTable'; import { V2beta1Pipeline } from 'src/apisv2beta1/pipeline'; @@ -56,7 +56,7 @@ const PipelinesDialogV2: React.FC = (props): JSX.Element function getPipelinesList(): JSX.Element { return ( - { const expectedPipeline = await screen.findByText(newPipeline.name); fireEvent.click(expectedPipeline); + await waitFor(() => { + expect(getPipelineSpy).toHaveBeenCalled(); + }); + const usePipelineButton = screen.getByText('Use this pipeline'); fireEvent.click(usePipelineButton); @@ -774,6 +778,10 @@ describe('NewRun', () => { const expectedPipeline = await screen.findByText(newPipeline.name); fireEvent.click(expectedPipeline); + await waitFor(() => { + expect(getPipelineSpy).toHaveBeenCalled(); + }); + const usePipelineButton = screen.getByText('Use this pipeline'); fireEvent.click(usePipelineButton); @@ -790,6 +798,10 @@ describe('NewRun', () => { const expectedPipelineVersion = await screen.findByText(latestPipelineVersion.name); fireEvent.click(expectedPipelineVersion); + await waitFor(() => { + expect(getPipelineVersionSpy).toHaveBeenCalled(); + }); + const usePipelineVersionBtn = screen.getByText('Use this pipeline version'); fireEvent.click(usePipelineVersionBtn); @@ -1671,6 +1683,10 @@ describe('NewRun', () => { const expectedPipeline = await screen.findByText(pipelineWithParams.name); fireEvent.click(expectedPipeline); + await waitFor(() => { + expect(getPipelineSpy).toHaveBeenCalled(); + }); + const usePipelineButton = screen.getByText('Use this pipeline'); fireEvent.click(usePipelineButton); @@ -1729,6 +1745,10 @@ describe('NewRun', () => { const expectedPipeline = await screen.findByText(pipelineWithParams.name); fireEvent.click(expectedPipeline); + await waitFor(() => { + expect(getPipelineSpy).toHaveBeenCalled(); + }); + const usePipelineButton = screen.getByText('Use this pipeline'); fireEvent.click(usePipelineButton); diff --git a/frontend/src/pages/NewRun.tsx b/frontend/src/pages/NewRun.tsx index c376f36286..0d86fe6eef 100644 --- a/frontend/src/pages/NewRun.tsx +++ b/frontend/src/pages/NewRun.tsx @@ -348,9 +348,14 @@ export class NewRun extends Page { columns={this.pipelineVersionSelectorColumns} emptyMessage='No pipeline versions found. Select or upload a pipeline then try again.' initialSortColumn={PipelineVersionSortKeys.CREATED_AT} - selectionChanged={(selectedPipelineVersion: ApiPipelineVersion) => - this.setStateSafe({ unconfirmedSelectedPipelineVersion: selectedPipelineVersion }) - } + selectionChanged={async (selectedId: string) => { + const selectedPipelineVersion = await Apis.pipelineServiceApi.getPipelineVersion( + selectedId, + ); + this.setStateSafe({ + unconfirmedSelectedPipelineVersion: selectedPipelineVersion, + }); + }} toolbarActionMap={buttons .upload(() => this.setStateSafe({ @@ -432,9 +437,12 @@ export class NewRun extends Page { columns={this.experimentSelectorColumns} emptyMessage='No experiments found. Create an experiment and then try again.' initialSortColumn={ExperimentSortKeys.CREATED_AT} - selectionChanged={(selectedExperiment: ApiExperiment) => - this.setStateSafe({ unconfirmedSelectedExperiment: selectedExperiment }) - } + selectionChanged={async (selectedId: string) => { + const selectedExperiment = await Apis.experimentServiceApi.getExperiment( + selectedId, + ); + this.setStateSafe({ unconfirmedSelectedExperiment: selectedExperiment }); + }} /> diff --git a/frontend/src/pages/NewRunV2.tsx b/frontend/src/pages/NewRunV2.tsx index f5cbc1e9b6..68d979e407 100644 --- a/frontend/src/pages/NewRunV2.tsx +++ b/frontend/src/pages/NewRunV2.tsx @@ -51,7 +51,7 @@ import { classes, stylesheet } from 'typestyle'; import { PageProps } from './Page'; import PipelinesDialogV2 from 'src/components/PipelinesDialogV2'; import { V2beta1RecurringRun, RecurringRunMode } from 'src/apisv2beta1/recurringrun'; -import ResourceSelectorV2 from 'src/pages/ResourceSelectorV2'; +import ResourceSelector from 'src/pages/ResourceSelector'; import { convertExperimentToResource, convertPipelineVersionToResource, @@ -823,7 +823,7 @@ function PipelineVersionSelector(props: PipelineVersionSelectorProps) { PaperProps={{ id: 'pipelineVersionSelectorDialog' }} > - - { expect(tree.state('selectedIds')).toEqual([]); (tree.instance() as TestResourceSelector)._selectionChanged([RESOURCES[1].id!]); - expect(selectionChangedCbSpy).toHaveBeenLastCalledWith(RESOURCES[1]); + expect(selectionChangedCbSpy).toHaveBeenLastCalledWith(RESOURCES[1].id!); expect(tree.state('selectedIds')).toEqual([RESOURCES[1].id]); }); @@ -177,20 +177,4 @@ describe('ResourceSelector', () => { RESOURCES[1].id, ]); }); - - it('logs error if selected resource ID is not found in list', async () => { - tree = shallow(); - const consoleSpy = jest.spyOn(console, 'error').mockImplementation(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(tree.state('selectedIds')).toEqual([]); - - (tree.instance() as TestResourceSelector)._selectionChanged(['id-not-in-list']); - - expect(selectionChangedCbSpy).not.toHaveBeenCalled(); - expect(tree.state('selectedIds')).toEqual([]); - expect(consoleSpy).toHaveBeenLastCalledWith( - 'Somehow no resource was found with ID: id-not-in-list', - ); - }); }); diff --git a/frontend/src/pages/ResourceSelector.tsx b/frontend/src/pages/ResourceSelector.tsx index 1b7987d1e5..be81359ede 100644 --- a/frontend/src/pages/ResourceSelector.tsx +++ b/frontend/src/pages/ResourceSelector.tsx @@ -33,6 +33,7 @@ export interface BaseResource { description?: string; name?: string; error?: string; + namespace?: string; } export interface ResourceSelectorProps extends RouteComponentProps { @@ -41,7 +42,7 @@ export interface ResourceSelectorProps extends RouteComponentProps { emptyMessage: string; filterLabel: string; initialSortColumn: any; - selectionChanged: (resource: BaseResource) => void; + selectionChanged: (selectedId: string) => void; title?: string; toolbarActionMap?: ToolbarActionMap; updateDialog: (dialogProps: DialogProps) => void; @@ -107,13 +108,7 @@ class ResourceSelector extends React.Component r.id === selectedIds[0]); - if (selected) { - this.props.selectionChanged(selected); - } else { - logger.error(`Somehow no resource was found with ID: ${selectedIds[0]}`); - return; - } + this.props.selectionChanged(selectedIds[0]); this.setStateSafe({ selectedIds }); } diff --git a/frontend/src/pages/ResourceSelectorV2.test.tsx b/frontend/src/pages/ResourceSelectorV2.test.tsx deleted file mode 100644 index 7b9dd2d52d..0000000000 --- a/frontend/src/pages/ResourceSelectorV2.test.tsx +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright 2023 The Kubeflow Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as React from 'react'; -import ResourceSelectorV2, { ResourceSelectorV2Props, BaseResource } from './ResourceSelectorV2'; -import TestUtils from 'src/TestUtils'; -import { ListRequest } from 'src/lib/Apis'; -import { shallow, ReactWrapper, ShallowWrapper } from 'enzyme'; -import { Row } from 'src/components/CustomTable'; - -class TestResourceSelector extends ResourceSelectorV2 { - public async _load(request: ListRequest): Promise { - return super._load(request); - } - - public _selectionChanged(selectedIds: string[]): void { - return super._selectionChanged(selectedIds); - } - - public _resourcesToRow(resources: BaseResource[]): Row[] { - return super._resourcesToRow(resources); - } -} - -describe('ResourceSelector', () => { - let tree: ReactWrapper | ShallowWrapper; - - const updateDialogSpy = jest.fn(); - const selectionChangedCbSpy = jest.fn(); - const listResourceSpy = jest.fn(); - const RESOURCES: BaseResource[] = [ - { - created_at: new Date(2018, 1, 2, 3, 4, 5), - description: 'test-1 description', - id: 'some-id-1', - name: 'test-1 name', - }, - { - created_at: new Date(2018, 10, 9, 8, 7, 6), - description: 'test-2 description', - id: 'some-2-id', - name: 'test-2 name', - }, - ]; - - const selectorColumns = [ - { label: 'Resource name', flex: 1, sortKey: 'name' }, - { label: 'Description', flex: 1.5 }, - { label: 'Uploaded on', flex: 1, sortKey: 'created_at' }, - ]; - - const testEmptyMessage = 'Test - Sorry, no resources.'; - const testTitle = 'A test selector'; - - function generateProps(): ResourceSelectorV2Props { - return { - columns: selectorColumns, - emptyMessage: testEmptyMessage, - filterLabel: 'test filter label', - history: {} as any, - initialSortColumn: 'created_at', - listApi: listResourceSpy as any, - location: '' as any, - match: {} as any, - selectionChanged: selectionChangedCbSpy, - title: testTitle, - updateDialog: updateDialogSpy, - }; - } - - beforeEach(() => { - listResourceSpy.mockReset(); - listResourceSpy.mockImplementation(() => ({ - nextPageToken: 'test-next-page-token', - resources: RESOURCES, - })); - updateDialogSpy.mockReset(); - selectionChangedCbSpy.mockReset(); - }); - - afterEach(async () => { - // unmount() should be called before resetAllMocks() in case any part of the unmount life cycle - // depends on mocks/spies - await tree.unmount(); - }); - - it('displays resource selector', async () => { - tree = shallow(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(listResourceSpy).toHaveBeenCalledTimes(1); - expect(listResourceSpy).toHaveBeenLastCalledWith(undefined, undefined, undefined, undefined); - expect(tree.state('resources')).toEqual(RESOURCES); - expect(tree).toMatchSnapshot(); - }); - - it('converts resources into a table rows', async () => { - const props = generateProps(); - const resources: BaseResource[] = [ - { - created_at: new Date(2018, 1, 2, 3, 4, 5), - description: 'a description', - id: 'an-id', - name: 'a name', - }, - ]; - listResourceSpy.mockImplementationOnce(() => ({ resources, nextPageToken: '' })); - props.listApi = listResourceSpy as any; - - tree = shallow(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(tree.state('rows')).toEqual([ - { - id: 'an-id', - otherFields: ['a name', 'a description', '2/2/2018, 3:04:05 AM'], - }, - ]); - }); - - it('shows error dialog if listing fails', async () => { - TestUtils.makeErrorResponseOnce(listResourceSpy, 'woops!'); - jest.spyOn(console, 'error').mockImplementation(); - - tree = shallow(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(listResourceSpy).toHaveBeenCalledTimes(1); - expect(updateDialogSpy).toHaveBeenLastCalledWith( - expect.objectContaining({ - content: 'List request failed with:\nwoops!', - title: 'Error retrieving resources', - }), - ); - expect(tree.state('resources')).toEqual([]); - }); - - it('calls selection callback when a resource is selected', async () => { - tree = shallow(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(tree.state('selectedIds')).toEqual([]); - (tree.instance() as TestResourceSelector)._selectionChanged([RESOURCES[1].id!]); - expect(selectionChangedCbSpy).toHaveBeenLastCalledWith(RESOURCES[1].id!); - expect(tree.state('selectedIds')).toEqual([RESOURCES[1].id]); - }); - - it('logs error if more than one resource is selected', async () => { - tree = shallow(); - const consoleSpy = jest.spyOn(console, 'error').mockImplementation(); - await (tree.instance() as TestResourceSelector)._load({}); - - expect(tree.state('selectedIds')).toEqual([]); - - (tree.instance() as TestResourceSelector)._selectionChanged([ - RESOURCES[0].id!, - RESOURCES[1].id!, - ]); - - expect(selectionChangedCbSpy).not.toHaveBeenCalled(); - expect(tree.state('selectedIds')).toEqual([]); - expect(consoleSpy).toHaveBeenLastCalledWith('2 resources were selected somehow', [ - RESOURCES[0].id, - RESOURCES[1].id, - ]); - }); -}); diff --git a/frontend/src/pages/ResourceSelectorV2.tsx b/frontend/src/pages/ResourceSelectorV2.tsx deleted file mode 100644 index 91fe32bae5..0000000000 --- a/frontend/src/pages/ResourceSelectorV2.tsx +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright 2023 The Kubeflow Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Separate the resource selector between v1 and v2 to avoid breaking current v1 behavior -// TODO(jlyaoyuli): consider to merge 2 selectors together (change updatedSelection() in v1) - -import * as React from 'react'; -import CustomTable, { Column, Row } from 'src/components/CustomTable'; -import Toolbar, { ToolbarActionMap } from 'src/components/Toolbar'; -import { ListRequest } from 'src/lib/Apis'; -import { RouteComponentProps } from 'react-router-dom'; -import { logger, errorToMessage, formatDateString } from 'src/lib/Utils'; -import { DialogProps } from 'src/components/Router'; - -interface BaseResponse { - resources: BaseResource[]; - nextPageToken: string; -} - -export interface BaseResource { - id?: string; - created_at?: Date; - description?: string; - name?: string; - error?: string; - nameSpace?: string; -} - -export interface ResourceSelectorV2Props extends RouteComponentProps { - listApi: (...args: any[]) => Promise; - columns: Column[]; - emptyMessage: string; - filterLabel: string; - initialSortColumn: any; - selectionChanged: (selectedId: string) => void; - title?: string; - toolbarActionMap?: ToolbarActionMap; - updateDialog: (dialogProps: DialogProps) => void; -} - -interface ResourceSelectorV2State { - resources: BaseResource[]; - rows: Row[]; - selectedIds: string[]; - toolbarActionMap: ToolbarActionMap; -} - -class ResourceSelectorV2 extends React.Component { - protected _isMounted = true; - - constructor(props: any) { - super(props); - - this.state = { - resources: [], - rows: [], - selectedIds: [], - toolbarActionMap: (props && props.toolbarActionMap) || {}, - }; - } - - public render(): JSX.Element { - const { rows, selectedIds, toolbarActionMap } = this.state; - const { columns, title, filterLabel, emptyMessage, initialSortColumn } = this.props; - - return ( - - {title && } - - - - ); - } - - public componentWillUnmount(): void { - this._isMounted = false; - } - - protected setStateSafe(newState: Partial, cb?: () => void): void { - if (this._isMounted) { - this.setState(newState as any, cb); - } - } - - protected _selectionChanged(selectedIds: string[]): void { - if (!Array.isArray(selectedIds) || selectedIds.length !== 1) { - logger.error(`${selectedIds.length} resources were selected somehow`, selectedIds); - return; - } - this.props.selectionChanged(selectedIds[0]); - this.setStateSafe({ selectedIds }); - } - - protected async _load(request: ListRequest): Promise { - let nextPageToken = ''; - try { - const response = await this.props.listApi( - request.pageToken, - request.pageSize, - request.sortBy, - request.filter, - ); - - this.setStateSafe({ - resources: response.resources, - rows: this._resourcesToRow(response.resources), - }); - - nextPageToken = response.nextPageToken; - } catch (err) { - const errorMessage = await errorToMessage(err); - this.props.updateDialog({ - buttons: [{ text: 'Dismiss' }], - content: 'List request failed with:\n' + errorMessage, - title: 'Error retrieving resources', - }); - logger.error('Could not get requested list of resources', errorMessage); - } - return nextPageToken; - } - - protected _resourcesToRow(resources: BaseResource[]): Row[] { - return resources.map( - r => - ({ - error: (r as any).error, - id: r.id!, - otherFields: [r.name, r.description, formatDateString(r.created_at)], - } as Row), - ); - } -} - -export default ResourceSelectorV2; diff --git a/frontend/src/pages/__snapshots__/ResourceSelectorV2.test.tsx.snap b/frontend/src/pages/__snapshots__/ResourceSelectorV2.test.tsx.snap deleted file mode 100644 index ca6fdfaa80..0000000000 --- a/frontend/src/pages/__snapshots__/ResourceSelectorV2.test.tsx.snap +++ /dev/null @@ -1,60 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`ResourceSelector displays resource selector 1`] = ` - - - - -`; From e07038700f5930a693cadfb9831e3c120554ce3a Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 10 Jul 2023 13:15:18 -0700 Subject: [PATCH 019/253] chore(components): Migrate AutoML components to preview and v1 as needed docs(components): Revert doc changes from preview sync PiperOrigin-RevId: 546959930 --- .../preview/automl/forecasting/__init__.py | 25 + .../forecasting/forecasting_ensemble.py | 139 + .../forecasting/forecasting_stage_1_tuner.py | 159 + .../forecasting/forecasting_stage_2_tuner.py | 157 + .../learn_to_learn_forecasting_pipeline.yaml | 7790 +++++++++++ ...ence_to_sequence_forecasting_pipeline.yaml | 7749 +++++++++++ ...sion_transformer_forecasting_pipeline.yaml | 7735 +++++++++++ ...es_dense_encoder_forecasting_pipeline.yaml | 7790 +++++++++++ .../preview/automl/forecasting/utils.py | 1023 ++ .../preview/automl/tabular/__init__.py | 35 + ...ml_tabular_feature_selection_pipeline.yaml | 11427 ++++++++++++++++ .../tabular/automl_tabular_v2_pipeline.yaml | 8327 +++++++++++ ..._params_large_data_large_search_space.json | 158 + ...params_large_data_medium_search_space.json | 158 + ..._params_large_data_small_search_space.json | 146 + ...params_medium_data_large_search_space.json | 158 + ...arams_medium_data_medium_search_space.json | 158 + ...params_medium_data_small_search_space.json | 146 + ..._params_small_data_large_search_space.json | 158 + ...params_small_data_medium_search_space.json | 158 + ..._params_small_data_small_search_space.json | 146 + .../tabular/configs/wide_and_deep_params.json | 132 + .../tabular/configs/xgboost_params.json | 309 + .../automl/tabular/feature_selection.py | 179 + .../tabular/feature_transform_engine.py | 976 ++ .../tabnet_hyperparameter_tuning_job.py | 236 + ...et_hyperparameter_tuning_job_pipeline.yaml | 4661 +++++++ .../preview/automl/tabular/tabnet_trainer.py | 300 + .../tabular/tabnet_trainer_pipeline.yaml | 4302 ++++++ .../preview/automl/tabular/utils.py | 3360 +++++ ...wide_and_deep_hyperparameter_tuning_job.py | 236 + ...ep_hyperparameter_tuning_job_pipeline.yaml | 4018 ++++++ .../automl/tabular/wide_and_deep_trainer.py | 281 + .../wide_and_deep_trainer_pipeline.yaml | 4048 ++++++ .../xgboost_hyperparameter_tuning_job.py | 124 + ...st_hyperparameter_tuning_job_pipeline.yaml | 4332 ++++++ .../preview/automl/tabular/xgboost_trainer.py | 77 + .../tabular/xgboost_trainer_pipeline.yaml | 4396 ++++++ .../v1/automl/forecasting/__init__.py | 21 + .../bqml_arima_predict_pipeline.yaml | 1159 ++ .../bqml_arima_train_pipeline.yaml | 5085 +++++++ .../forecasting/prophet_predict_pipeline.yaml | 2150 +++ .../v1/automl/forecasting/prophet_trainer.py | 211 + .../forecasting/prophet_trainer_pipeline.yaml | 2958 ++++ .../v1/automl/forecasting/utils.py | 341 + .../v1/automl/tabular/__init__.py | 37 + .../tabular/automl_tabular_pipeline.yaml | 11149 +++++++++++++++ .../v1/automl/tabular/cv_trainer.py | 166 + .../tabular/deprecated/default_pipeline.json | 7974 +++++++++++ .../v1/automl/tabular/ensemble.py | 167 + .../v1/automl/tabular/finalizer.py | 88 + .../v1/automl/tabular/infra_validator.py | 39 + .../automl/tabular/split_materialized_data.py | 119 + .../v1/automl/tabular/stage_1_tuner.py | 189 + .../automl/tabular/stats_and_example_gen.py | 304 + .../training_configurator_and_validator.py | 285 + .../v1/automl/tabular/transform.py | 200 + .../v1/automl/tabular/utils.py | 1435 ++ 58 files changed, 119786 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py new file mode 100644 index 0000000000..befa20f9ad --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py @@ -0,0 +1,25 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Experimental AutoML forecasting components.""" + +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_1_tuner import automl_forecasting_stage_1_tuner as ForecastingStage1TunerOp +from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_2_tuner import automl_forecasting_stage_2_tuner as ForecastingStage2TunerOp + +__all__ = [ + 'ForecastingStage1TunerOp', + 'ForecastingEnsembleOp', + 'ForecastingStage2TunerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py new file mode 100644 index 0000000000..b7e0580c4e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -0,0 +1,139 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Ensemble component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_ensemble( + project: str, + location: str, + root_dir: str, + transform_output: Input[Artifact], + metadata: Input[Artifact], + tuning_result_input: Input[Artifact], + instance_baseline: Input[Artifact], + instance_schema_path: Input[Artifact], + prediction_image_uri: str, + gcp_resources: dsl.OutputPath(str), + model_architecture: Output[Artifact], + unmanaged_container_model: Output[UnmanagedContainerModel], + explanation_metadata: dsl.OutputPath(dict), + explanation_metadata_artifact: Output[Artifact], + explanation_parameters: dsl.OutputPath(dict), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Ensembles AutoML Forecasting models. + + Args: + project: Project to run the job in. + location: Region to run the job in. + root_dir: The Cloud Storage path to store the output. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + tuning_result_input: AutoML Tabular tuning + result. + instance_baseline: The instance baseline + used to calculate explanations. + instance_schema_path: The path to the instance schema, + describing the input data for the tf_model at serving time. + encryption_spec_key_name: Customer-managed encryption key. + prediction_image_uri: URI of the Docker image to be used as the + container for serving predictions. This URI must identify an image in + Artifact Registry or Container Registry. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + model_architecture: The architecture of the output model. + unmanaged_container_model: Model information needed to perform batch prediction. + explanation_metadata: The explanation metadata used by Vertex online and batch explanations. + explanation_metadata_artifact: The explanation metadata used by Vertex online and batch explanations in the format of a KFP Artifact. + explanation_parameters: The explanation parameters used by Vertex online and batch explanations. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_ensemble', + '", "--transform_output_path=', + transform_output.uri, + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--instance_schema_path=', + instance_schema_path.uri, + '", "--prediction_docker_uri=', + prediction_image_uri, + '", "--model_relative_output_path=', + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model', + '", "--explanation_metadata_path=', + explanation_metadata, + ',', + explanation_metadata_artifact.uri, + '", "--explanation_parameters_path=', + explanation_parameters, + '", "--model_architecture_path=', + model_architecture.uri, + '", "--use_json=true', + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py new file mode 100644 index 0000000000..e82e55708b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -0,0 +1,159 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Stage 1 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_stage_1_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + study_spec_parameters_override: Optional[list] = [], + worker_pool_specs_override_json: Optional[list] = [], + reduce_search_space_mode: Optional[str] = 'regular', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Searches AutoML Forecasting architectures and selects the top trials. + + Args: + project: Project to run hyperparameter tuning. + location: Location for running the hyperparameter tuning. + root_dir: The Cloud Storage location to store the output. + study_spec_parameters_override: JSON study spec. E.g., + [{"parameter_id": "activation","categorical_value_spec": {"values": + ["tanh"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + reduce_search_space_mode: The reduce search space mode. Possible + values: "regular" (default), "minimal", "full". + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + deadline_hours: Number of hours the hyperparameter tuning should + run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The tabular example gen metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained model and architectures. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-forecasting-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_l2l_stage_1_tuner', + '", "--region=', + location, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "--reduce_search_space_mode=', + reduce_search_space_mode, + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + '", "--training_base_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', + '", "--num_parallel_trial=', + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--lro_job_info=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--use_json=true', + '", "--log_level=ERROR', + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py new file mode 100644 index 0000000000..5375f61955 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -0,0 +1,157 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Forecasting Stage 2 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def automl_forecasting_stage_2_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + tuning_result_input_path: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + worker_pool_specs_override_json: Optional[list] = [], + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes AutoML Forecasting models and selects top trials. + + Args: + project: Project to run stage 2 tuner. + location: Cloud region for running the component: us-central1). + root_dir: The Cloud Storage location to store the output. + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + num_selected_trials: Number of selected trials. The number of weak + learners in the final model. + deadline_hours: Number of hours the cross-validation trainer + should run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The forecasting example gen + metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + tuning_result_input_path: Path to the json of hyperparameter + tuning results to use when evaluating models. + + Returns: + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained (private) model artifact paths and their hyperparameters. + """ + # fmt: on + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-forecasting-stage-2-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + '", "args": ["forecasting_mp_l2l_stage_2_tuner', + '", "--region=', + location, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + '", "--training_base_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train', + '", "--num_parallel_trial=', + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--lro_job_info=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro', + '", "--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', + '", "--metadata_path=', + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--tuning_result_input_path=', + tuning_result_input_path.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + ( + '", "--use_json=true", "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml new file mode 100644 index 0000000000..3d28c0a17f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -0,0 +1,7790 @@ +# PIPELINE DEFINITION +# Name: learn-to-learn-forecasting +# Description: The AutoML Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# quantiles: list +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: l2l + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: l2l + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: l2l + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: l2l + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + componentInputParameter: pipelinechannel--quantiles + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The AutoML Forecasting pipeline. + name: learn-to-learn-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml new file mode 100644 index 0000000000..4f656e1b99 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -0,0 +1,7749 @@ +# PIPELINE DEFINITION +# Name: sequence-to-sequence-forecasting +# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: seq2seq + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: seq2seq + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: seq2seq + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + runtimeValue: + constant: 0.0 + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: seq2seq + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + runtimeValue: + constant: [] + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. + name: sequence-to-sequence-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml new file mode 100644 index 0000000000..6bad578312 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -0,0 +1,7735 @@ +# PIPELINE DEFINITION +# Name: temporal-fusion-transformer-forecasting +# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + runtimeValue: + constant: 1.0 + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: tft + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + runtimeValue: + constant: 1.0 + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: tft + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + runtimeValue: + constant: [] + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: tft + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + runtimeValue: + constant: 0.0 + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: tft + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + runtimeValue: + constant: [] + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. + name: temporal-fusion-transformer-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml new file mode 100644 index 0000000000..afbf67ec9e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -0,0 +1,7790 @@ +# PIPELINE DEFINITION +# Name: time-series-dense-encoder-forecasting +# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. +# Inputs: +# available_at_forecast_columns: list +# context_window: int [Default: 0.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluated_examples_bigquery_path: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 22.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 22.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# evaluation_batch_predict_max_replica_count: int [Default: 25.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 25.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-16'] +# evaluation_dataflow_max_num_workers: int [Default: 25.0] +# evaluation_dataflow_starting_num_workers: int [Default: 22.0] +# fast_testing: bool [Default: False] +# feature_transform_engine_bigquery_staging_full_dataset_id: str [Default: ''] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int [Default: 0.0] +# group_columns: list +# group_temporal_total_weight: float [Default: 0.0] +# group_total_weight: float [Default: 0.0] +# holiday_regions: list +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] +# num_selected_trials: int [Default: 10.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# quantiles: list +# root_dir: str +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_trainer_worker_pool_specs_override: list +# study_spec_parameters_override: list +# target_column: str +# temporal_total_weight: float [Default: 0.0] +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_attribute_columns: list +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transformations: dict +# unavailable_at_forecast_columns: list +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# window_max_count: int [Default: 0.0] +# window_predefined_column: str [Default: ''] +# window_stride_length: int [Default: 0.0] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +components: + comp-automl-forecasting-ensemble: + executorLabel: exec-automl-forecasting-ensemble + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-ensemble-2: + executorLabel: exec-automl-forecasting-ensemble-2 + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + instance_schema_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The path to the instance schema, + + describing the input data for the tf_model at serving time.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Region to run the job in. + parameterType: STRING + prediction_image_uri: + description: 'URI of the Docker image to be used as the + + container for serving predictions. This URI must identify an image in + + Artifact Registry or Container Registry.' + parameterType: STRING + project: + description: Project to run the job in. + parameterType: STRING + root_dir: + description: The Cloud Storage path to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The explanation metadata used by Vertex online and batch explanations + in the format of a KFP Artifact. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: Model information needed to perform batch prediction. + parameters: + explanation_metadata: + description: The explanation metadata used by Vertex online and batch explanations. + parameterType: STRUCT + explanation_parameters: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-1-tuner: + executorLabel: exec-automl-forecasting-stage-1-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the hyperparameter tuning should + + run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the hyperparameter tuning. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run hyperparameter tuning. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "activation","categorical_value_spec": {"values": + + ["tanh"]}}]' + isOptional: true + parameterType: LIST + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-forecasting-stage-2-tuner: + executorLabel: exec-automl-forecasting-stage-2-tuner + inputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The forecasting example gen + + metadata.' + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input_path: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Path to the json of hyperparameter + + tuning results to use when evaluating models.' + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Cloud region for running the component: us-central1).' + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model.' + parameterType: NUMBER_INTEGER + project: + description: Project to run stage 2 tuner. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained (private) model artifact paths and their hyperparameters. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + selected_trials: + description: Number of trials that should be selected. + parameterType: NUMBER_INTEGER + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + tasks: + automl-forecasting-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble + dependentTasks: + - automl-forecasting-stage-2-tuner + - get-prediction-image-uri + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-2-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble + automl-forecasting-stage-2-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-2-tuner + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input_path: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-2-tuner + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-forecasting-ensemble + - model-upload + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description + get-prediction-image-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri + inputs: + parameters: + model_type: + runtimeValue: + constant: tide + taskInfo: + name: get-prediction-image-uri + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: get-hyperparameter-tuning-results + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-forecasting-ensemble + - get-or-create-model-description + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + finalize-eval-quantile-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters + get-predictions-column: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column + dependentTasks: + - finalize-eval-quantile-parameters + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation-forecasting: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting + dependentTasks: + - finalize-eval-quantile-parameters + - get-predictions-column + - model-batch-predict + - table-to-uri + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation-forecasting + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + tasks: + automl-forecasting-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-ensemble-2 + dependentTasks: + - automl-forecasting-stage-1-tuner + - get-prediction-image-uri-2 + inputs: + artifacts: + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-instance_baseline + instance_schema_path: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-forecasting-stage-1-tuner + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + prediction_image_uri: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-image-uri-2 + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-forecasting-ensemble-2 + automl-forecasting-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-forecasting-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-metadata + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-forecasting-stage-1-tuner + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-forecasting-ensemble-2 + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: should_run_model_evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + get-or-create-model-description-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-or-create-model-description-2 + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + original_description: + componentInputParameter: pipelinechannel--model_description + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-or-create-model-description-2 + get-prediction-image-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-image-uri-2 + inputs: + parameters: + model_type: + runtimeValue: + constant: tide + taskInfo: + name: get-prediction-image-uri-2 + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + dependentTasks: + - automl-forecasting-ensemble-2 + - get-or-create-model-description-2 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-forecasting-ensemble-2 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-forecasting-ensemble-2 + parameters: + description: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-or-create-model-description-2 + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-forecasting-ensemble-2 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + finalize-eval-quantile-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-finalize-eval-quantile-parameters-2 + inputs: + parameters: + quantiles: + componentInputParameter: pipelinechannel--quantiles + taskInfo: + name: finalize-eval-quantile-parameters-2 + get-predictions-column-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-predictions-column-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + inputs: + parameters: + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: get-predictions-column-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + generate_explanation: + runtimeValue: + constant: 0.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-forecasting-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-forecasting-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-forecasting-2 + dependentTasks: + - finalize-eval-quantile-parameters-2 + - get-predictions-column-2 + - model-batch-predict-2 + - table-to-uri-2 + inputs: + artifacts: + predictions_bigquery_source: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_quantiles: + taskOutputParameter: + outputParameterKey: quantiles + producerTask: finalize-eval-quantile-parameters-2 + forecasting_type: + taskOutputParameter: + outputParameterKey: forecasting_type + producerTask: finalize-eval-quantile-parameters-2 + ground_truth_bigquery_source: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + ground_truth_format: + runtimeValue: + constant: bigquery + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-predictions-column-2 + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_field_name: + runtimeValue: + constant: HORIZON__{{$.inputs.parameters['pipelinechannel--target_column']}} + taskInfo: + name: model-evaluation-forecasting-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-forecasting-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + forecasting_metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-forecasting-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: Vertex Forecasting pipeline + problem_type: + runtimeValue: + constant: forecasting + taskInfo: + name: model-evaluation-import-2 + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict-2 + parameters: + use_bq_prefix: + runtimeValue: + constant: 1.0 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-forecasting-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-forecasting-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-forecasting-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_2_trainer_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - split-materialized-data + - string-not-empty + - training-configurator-and-validator + inputs: + artifacts: + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + pipelinechannel--training-configurator-and-validator-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + pipelinechannel--training-configurator-and-validator-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: pipelinechannel--evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: pipelinechannel--num_selected_trials + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecasting_available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + forecasting_context_window: + componentInputParameter: pipelinechannel--context_window + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_holiday_regions: + componentInputParameter: pipelinechannel--holiday_regions + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_predefined_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + location: + componentInputParameter: pipelinechannel--location + model_type: + runtimeValue: + constant: tide + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--transformations + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-hyperparameter-tuning-results-are-supplied-by-user + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + componentInputParameter: pipelinechannel--available_at_forecast_columns + context_window: + componentInputParameter: pipelinechannel--context_window + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_model_type: + runtimeValue: + constant: tide + forecasting_transformations: + componentInputParameter: pipelinechannel--transformations + group_columns: + componentInputParameter: pipelinechannel--group_columns + group_temporal_total_weight: + componentInputParameter: pipelinechannel--group_temporal_total_weight + group_total_weight: + componentInputParameter: pipelinechannel--group_total_weight + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + prediction_type: + runtimeValue: + constant: time_series + quantiles: + componentInputParameter: pipelinechannel--quantiles + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + temporal_total_weight: + componentInputParameter: pipelinechannel--temporal_total_weight + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_attribute_columns: + componentInputParameter: pipelinechannel--time_series_attribute_columns + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + unavailable_at_forecast_columns: + componentInputParameter: pipelinechannel--unavailable_at_forecast_columns + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--available_at_forecast_columns: + parameterType: LIST + pipelinechannel--context_window: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluated_examples_bigquery_path: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--group_columns: + parameterType: LIST + pipelinechannel--group_temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--group_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--holiday_regions: + parameterType: LIST + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--temporal_total_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_attribute_columns: + parameterType: LIST + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transformations: + parameterType: STRUCT + pipelinechannel--unavailable_at_forecast_columns: + parameterType: LIST + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_predefined_column: + parameterType: STRING + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-finalize-eval-quantile-parameters: + executorLabel: exec-finalize-eval-quantile-parameters + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-finalize-eval-quantile-parameters-2: + executorLabel: exec-finalize-eval-quantile-parameters-2 + inputDefinitions: + parameters: + quantiles: + isOptional: true + parameterType: LIST + outputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + quantiles: + parameterType: LIST + comp-get-or-create-model-description: + executorLabel: exec-get-or-create-model-description + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-or-create-model-description-2: + executorLabel: exec-get-or-create-model-description-2 + inputDefinitions: + parameters: + location: + parameterType: STRING + original_description: + defaultValue: '' + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri: + executorLabel: exec-get-prediction-image-uri + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-prediction-image-uri-2: + executorLabel: exec-get-prediction-image-uri-2 + inputDefinitions: + parameters: + model_type: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column: + executorLabel: exec-get-predictions-column + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-predictions-column-2: + executorLabel: exec-get-predictions-column-2 + inputDefinitions: + parameters: + forecasting_type: + parameterType: STRING + target_column: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation-forecasting: + executorLabel: exec-model-evaluation-forecasting + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-forecasting-2: + executorLabel: exec-model-evaluation-forecasting-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + forecasting_quantiles: + defaultValue: + - 0.5 + isOptional: true + parameterType: LIST + forecasting_type: + defaultValue: point + isOptional: true + parameterType: STRING + ground_truth_bigquery_source: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + point_evaluation_quantile: + defaultValue: 0.5 + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + target_field_name: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-forecasting-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", + "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", + "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", + ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", + \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--use_json=true", "\", \"--log_level=ERROR", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-forecasting-stage-2-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-forecasting-stage-2-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", + "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", + "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", + "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro", + "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--materialized_train_split=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input_path''].uri}}", + "\", \"--kms_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--tuning_result_output_path=", "{{$.outputs.artifacts[''tuning_result_output''].uri}}", + "\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n selected_trials: int,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_single_run_max_secs',\ + \ int),\n ('stage_2_deadline_hours', float),\n ('stage_2_single_run_max_secs',\ + \ int),\n ],\n):\n \"\"\"Calculates training parameters.\n\n Args:\n\ + \ stage_1_num_parallel_trials: Number of parallel trails for stage 1.\n\ + \ train_budget_milli_node_hours: The train budget of creating this model,\n\ + \ expressed in milli node hours i.e. 1,000 value in this field means\ + \ 1 node\n hour.\n stage_2_num_parallel_trials: Number of parallel\ + \ trails for stage 2.\n selected_trials: Number of trials that should\ + \ be selected.\n is_skip_architecture_search: If component is being called\ + \ in the\n skip_architecture_search pipeline.\n fast_testing: Internal\ + \ flag used for presubmit tests.\n\n Returns:\n stage_1_deadline_hours:\ + \ Maximum number of hours to run stage 1.\n stage_1_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 1\n training\ + \ trial.\n stage_2_deadline_hours: Maximum number of hours to run stage\ + \ 2.\n stage_2_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 2\n training trial.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n stage_1_deadline_hours = -1.0\n stage_1_single_run_max_secs = -1\n\ + \ stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs = -1\n\n\ + \ if is_skip_architecture_search:\n stage_2_deadline_hours = train_budget_milli_node_hours\ + \ / 1000.0\n rounds = math.ceil(selected_trials / stage_2_num_parallel_trials)\n\ + \ stage_2_single_run_max_secs = int(\n stage_2_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n else:\n stage_1_deadline_hours =\ + \ train_budget_milli_node_hours / 1000.0\n rounds = math.ceil(100 / stage_1_num_parallel_trials)\n\ + \ stage_1_single_run_max_secs = int(\n stage_1_deadline_hours\ + \ * 3600.0 / 1.3 / rounds\n )\n if fast_testing:\n stage_1_deadline_hours\ + \ = 0.2\n stage_1_single_run_max_secs = 1\n stage_2_deadline_hours\ + \ = 0.2\n stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_single_run_max_secs',\n 'stage_2_deadline_hours',\n\ + \ 'stage_2_single_run_max_secs',\n ],\n )(\n stage_1_deadline_hours,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-finalize-eval-quantile-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-finalize-eval-quantile-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - finalize_eval_quantile_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef finalize_eval_quantile_parameters(\n quantiles: Optional[list]\ + \ = None, # pylint: disable=g-bare-generic\n) -> NamedTuple('Outputs',\ + \ [('forecasting_type', str), ('quantiles', list)]):\n \"\"\"Infers quantile-specific\ + \ evaluation parameters.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ if not quantiles or quantiles == '[]':\n quantiles = []\n forecasting_type\ + \ = 'point'\n else:\n forecasting_type = 'quantile'\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n (\n 'forecasting_type',\n 'quantiles',\n\ + \ ),\n )(forecasting_type, quantiles)\n\n" + image: python:3.7 + exec-get-or-create-model-description: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-or-create-model-description-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_or_create_model_description + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_or_create_model_description(\n location: str,\n project:\ + \ str,\n original_description: str = '',\n) -> str:\n \"\"\"Creates\ + \ a useful model description if one is not provided.\"\"\"\n # Note: {{$.pipeline_job_name}}\ + \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ + \ actual template format doesn't get injected since\n # the Python isn't\ + \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ + \ location=location, project=project\n )\n # Note: URL should match\ + \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ + \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ + \n if original_description:\n return f'{original_description} From:\ + \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ + \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ + \ {pipeline_url}'\n\n" + image: python:3.7 + exec-get-prediction-image-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-prediction-image-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_image_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_image_uri(model_type: str) -> str:\n \"\"\"\ + Returns the prediction image corresponding to the given model type.\"\"\"\ + \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ + \ must be hardcoded without any breaks in the code so string\n # replacement\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ + \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ + \ )\n return images[model_type]\n\n" + image: python:3.7 + exec-get-predictions-column: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-get-predictions-column-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_predictions_column + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_predictions_column(forecasting_type: str, target_column:\ + \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ + \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ + \ return f'predicted_{target_column}.value'\n\n" + image: python:3.7-slim + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation-forecasting: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-forecasting-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - forecasting + - --forecasting_type + - '{{$.inputs.parameters[''forecasting_type'']}}' + - --forecasting_quantiles + - '{{$.inputs.parameters[''forecasting_quantiles'']}}' + - --point_evaluation_quantile + - '{{$.inputs.parameters[''point_evaluation_quantile'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + "bq://{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}.{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --target_field_name + - instance.{{$.inputs.parameters['target_field_name']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. + name: time-series-dense-encoder-forecasting +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--available_at_forecast_columns: + componentInputParameter: available_at_forecast_columns + pipelinechannel--context_window: + componentInputParameter: context_window + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluated_examples_bigquery_path: + componentInputParameter: evaluated_examples_bigquery_path + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_transform_engine_bigquery_staging_full_dataset_id: + componentInputParameter: feature_transform_engine_bigquery_staging_full_dataset_id + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--group_columns: + componentInputParameter: group_columns + pipelinechannel--group_temporal_total_weight: + componentInputParameter: group_temporal_total_weight + pipelinechannel--group_total_weight: + componentInputParameter: group_total_weight + pipelinechannel--holiday_regions: + componentInputParameter: holiday_regions + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--num_selected_trials: + componentInputParameter: num_selected_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_trainer_worker_pool_specs_override: + componentInputParameter: stage_2_trainer_worker_pool_specs_override + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--temporal_total_weight: + componentInputParameter: temporal_total_weight + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_attribute_columns: + componentInputParameter: time_series_attribute_columns + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--unavailable_at_forecast_columns: + componentInputParameter: unavailable_at_forecast_columns + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_predefined_column: + componentInputParameter: window_predefined_column + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + available_at_forecast_columns: + description: 'The columns that are available at the + + forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: 0.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: The full service account name. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: The dataflow subnetwork. + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: '`True` to enable dataflow public IPs.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluated_examples_bigquery_path: + defaultValue: '' + description: 'The bigquery dataset to write the + + predicted examples into for evaluation, in the format + + `bq://project.dataset`. Only necessary if evaluation is enabled.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 22.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 22.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the batch prediction + + job in evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The maximum count of replicas + + the batch prediction job can scale to.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'Number of replicas to use + + in the batch prediction cluster at startup time.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: The disk space in GB for dataflow. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'Machine type for the dataflow job in + + evaluation, such as ''n1-standard-16''.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 25.0 + description: Maximum number of dataflow workers. + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 22.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_transform_engine_bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The full id of + + the feature transform engine staging dataset.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size of the + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type of + + the feature transform engine.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of + + dataflow workers of the feature transform engine.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + defaultValue: 0.0 + description: The length of the horizon. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + description: 'A list of time series attribute column names that define the + + time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions + + aggregated over both the horizon and time series in the same hierarchy + + group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated over + + time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + holiday_regions: + description: 'The geographical regions where the holiday effect is + + applied in modeling.' + isOptional: true + parameterType: LIST + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: Optional description. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + description: Optional display name for model. + isOptional: true + parameterType: STRING + num_selected_trials: + defaultValue: 10.0 + description: Number of selected trails. + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: '"minimize-rmse", "minimize-mae", "minimize-rmsle", + + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + + "minimize-quantile-loss".' + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: '`True` to evaluate the ensembled model on the test split.' + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 2 trainer worker pool spec.' + isOptional: true + parameterType: LIST + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for predictions aggregated + + over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: The column that indicates the time. + parameterType: STRING + time_series_attribute_columns: + description: 'The columns that are invariant across the + + same time series.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: 'The column that distinguish the different + + time series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Dict mapping auto and/or type-resolutions to feature + + columns. The supported types are: auto, categorical, numeric, text, and + + timestamp.' + parameterType: STRUCT + unavailable_at_forecast_columns: + description: 'The columns that are unavailable at the + + forecast time.' + isOptional: true + parameterType: LIST + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: 0.0 + description: The maximum number of windows that will be generated. + isOptional: true + parameterType: NUMBER_INTEGER + window_predefined_column: + defaultValue: '' + description: The column that indicate the start of each window. + isOptional: true + parameterType: STRING + window_stride_length: + defaultValue: 0.0 + description: The stride length to generate the window. + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py new file mode 100644 index 0000000000..2cf4444e5a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py @@ -0,0 +1,1023 @@ +"""Util functions for Vertex Forecasting pipelines.""" + +import os +import pathlib +from typing import Any, Dict, FrozenSet, List, Optional, Tuple + +_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() + +_RETAIL_MODEL_DISABLED_OPTIONS = frozenset([ + 'quantiles', + 'enable_probabilistic_inference', +]) + + +def _get_base_forecasting_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, + fields_to_exclude: FrozenSet[str] = frozenset(), +) -> Dict[str, Any]: + """Formats a set of parameters common across Vertex forecasting pipelines.""" + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not stage_2_trainer_worker_pool_specs_override: + stage_2_trainer_worker_pool_specs_override = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'dataflow_service_account': dataflow_service_account, + 'evaluated_examples_bigquery_path': evaluated_examples_bigquery_path, + 'target_column': target_column, + 'optimization_objective': optimization_objective, + 'transformations': transformations, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'time_series_attribute_columns': time_series_attribute_columns, + 'available_at_forecast_columns': available_at_forecast_columns, + 'unavailable_at_forecast_columns': unavailable_at_forecast_columns, + 'forecast_horizon': forecast_horizon, + 'context_window': context_window, + 'window_predefined_column': window_predefined_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'holiday_regions': holiday_regions, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'num_selected_trials': num_selected_trials, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'weight_column': weight_column, + 'dataflow_subnetwork': dataflow_subnetwork, + 'feature_transform_engine_dataflow_machine_type': ( + feature_transform_engine_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + feature_transform_engine_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + feature_transform_engine_dataflow_disk_size_gb + ), + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'feature_transform_engine_bigquery_staging_full_dataset_id': ( + feature_transform_engine_bigquery_staging_full_dataset_id + ), + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'stage_2_trainer_worker_pool_specs_override': ( + stage_2_trainer_worker_pool_specs_override + ), + 'quantiles': quantiles, + 'encryption_spec_key_name': encryption_spec_key_name, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + 'run_evaluation': run_evaluation, + 'group_columns': group_columns, + 'group_total_weight': group_total_weight, + 'temporal_total_weight': temporal_total_weight, + 'group_temporal_total_weight': group_temporal_total_weight, + } + + # Filter out empty values and those excluded from the particular pipeline. + # (example: TFT and Seq2Seq don't support `quantiles`.) + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None and param not in fields_to_exclude + } + ) + return parameter_values + + +def get_learn_to_learn_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +) -> Tuple[str, Dict[str, Any]]: + """Returns l2l_forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. If quantiles are specified, then the quantiles of the + distribution are also returned. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + group_columns: A list of time series attribute column names that define the + time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + quantiles=quantiles, + encryption_spec_key_name=encryption_spec_key_name, + enable_probabilistic_inference=enable_probabilistic_inference, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + group_columns=group_columns, + group_total_weight=group_total_weight, + temporal_total_weight=temporal_total_weight, + group_temporal_total_weight=group_temporal_total_weight, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'learn_to_learn_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + enable_probabilistic_inference: bool = False, + quantiles: Optional[List[float]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, + group_columns: Optional[List[str]] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +) -> Tuple[str, Dict[str, Any]]: + """Returns timeseries_dense_encoder_forecasting pipeline and parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. If quantiles are specified, then the quantiles of the + distribution are also returned. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + group_columns: A list of time series attribute column names that define the + time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + quantiles=quantiles, + encryption_spec_key_name=encryption_spec_key_name, + enable_probabilistic_inference=enable_probabilistic_inference, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + group_columns=group_columns, + group_total_weight=group_total_weight, + temporal_total_weight=temporal_total_weight, + group_temporal_total_weight=group_temporal_total_weight, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'time_series_dense_encoder_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, +): + """Returns tft_forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + """ + # TFT should only have 1 selected trial to freeze the ensemble size at 1. + excluded_parameters = _RETAIL_MODEL_DISABLED_OPTIONS.union({ + 'num_selected_trials', + }) + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + encryption_spec_key_name=encryption_spec_key_name, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + fields_to_exclude=excluded_parameters, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'temporal_fusion_transformer_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_sequence_to_sequence_forecasting_pipeline_and_parameters( + *, + project: str, + location: str, + root_dir: str, + target_column: str, + optimization_objective: str, + transformations: Dict[str, List[str]], + train_budget_milli_node_hours: float, + time_column: str, + time_series_identifier_column: str, + time_series_attribute_columns: Optional[List[str]] = None, + available_at_forecast_columns: Optional[List[str]] = None, + unavailable_at_forecast_columns: Optional[List[str]] = None, + forecast_horizon: Optional[int] = None, + context_window: Optional[int] = None, + evaluated_examples_bigquery_path: Optional[str] = None, + window_predefined_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + stage_1_num_parallel_trials: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + stage_2_num_parallel_trials: Optional[int] = None, + num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + feature_transform_engine_bigquery_staging_full_dataset_id: str = '', + feature_transform_engine_dataflow_machine_type: str = 'n1-standard-16', + feature_transform_engine_dataflow_max_num_workers: int = 10, + feature_transform_engine_dataflow_disk_size_gb: int = 40, + evaluation_batch_predict_machine_type: str = 'n1-standard-16', + evaluation_batch_predict_starting_replica_count: int = 25, + evaluation_batch_predict_max_replica_count: int = 25, + evaluation_dataflow_machine_type: str = 'n1-standard-16', + evaluation_dataflow_max_num_workers: int = 25, + evaluation_dataflow_disk_size_gb: int = 50, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + stage_2_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + model_description: Optional[str] = None, + run_evaluation: bool = True, +): + """Returns seq2seq forecasting pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + optimization_objective: "minimize-rmse", "minimize-mae", "minimize-rmsle", + "minimize-rmspe", "minimize-wape-mae", "minimize-mape", or + "minimize-quantile-loss". + transformations: Dict mapping auto and/or type-resolutions to feature + columns. The supported types are: auto, categorical, numeric, text, and + timestamp. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + time_column: The column that indicates the time. + time_series_identifier_column: The column which distinguishes different time + series. + time_series_attribute_columns: The columns that are invariant across the + same time series. + available_at_forecast_columns: The columns that are available at the + forecast time. + unavailable_at_forecast_columns: The columns that are unavailable at the + forecast time. + forecast_horizon: The length of the horizon. + context_window: The length of the context window. + evaluated_examples_bigquery_path: The bigquery dataset to write the + predicted examples into for evaluation, in the format + `bq://project.dataset`. + window_predefined_column: The column that indicate the start of each window. + window_stride_length: The stride length to generate the window. + window_max_count: The maximum number of windows that will be generated. + holiday_regions: The geographical regions where the holiday effect is + applied in modeling. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + num_selected_trials: Number of selected trails. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: The test fraction. + weight_column: The weight column name. + dataflow_service_account: The full service account name. + dataflow_subnetwork: The dataflow subnetwork. + dataflow_use_public_ips: `True` to enable dataflow public IPs. + feature_transform_engine_bigquery_staging_full_dataset_id: The full id of + the feature transform engine staging dataset. + feature_transform_engine_dataflow_machine_type: The dataflow machine type of + the feature transform engine. + feature_transform_engine_dataflow_max_num_workers: The max number of + dataflow workers of the feature transform engine. + feature_transform_engine_dataflow_disk_size_gb: The disk size of the + dataflow workers of the feature transform engine. + evaluation_batch_predict_machine_type: Machine type for the batch prediction + job in evaluation, such as 'n1-standard-16'. + evaluation_batch_predict_starting_replica_count: Number of replicas to use + in the batch prediction cluster at startup time. + evaluation_batch_predict_max_replica_count: The maximum count of replicas + the batch prediction job can scale to. + evaluation_dataflow_machine_type: Machine type for the dataflow job in + evaluation, such as 'n1-standard-16'. + evaluation_dataflow_max_num_workers: Maximum number of dataflow workers. + evaluation_dataflow_disk_size_gb: The disk space in GB for dataflow. + study_spec_parameters_override: The list for overriding study spec. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding + stage 1 tuner worker pool spec. + stage_2_trainer_worker_pool_specs_override: The dictionary for overriding + stage 2 trainer worker pool spec. + encryption_spec_key_name: The KMS key name. + model_display_name: Optional display name for model. + model_description: Optional description. + run_evaluation: `True` to evaluate the ensembled model on the test split. + """ + + parameter_values = _get_base_forecasting_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + evaluated_examples_bigquery_path=evaluated_examples_bigquery_path, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + time_column=time_column, + dataflow_service_account=dataflow_service_account, + time_series_identifier_column=time_series_identifier_column, + time_series_attribute_columns=time_series_attribute_columns, + available_at_forecast_columns=available_at_forecast_columns, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + forecast_horizon=forecast_horizon, + context_window=context_window, + window_predefined_column=window_predefined_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + num_selected_trials=num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + dataflow_use_public_ips=dataflow_use_public_ips, + dataflow_subnetwork=dataflow_subnetwork, + feature_transform_engine_bigquery_staging_full_dataset_id=feature_transform_engine_bigquery_staging_full_dataset_id, + feature_transform_engine_dataflow_machine_type=feature_transform_engine_dataflow_machine_type, + feature_transform_engine_dataflow_max_num_workers=feature_transform_engine_dataflow_max_num_workers, + feature_transform_engine_dataflow_disk_size_gb=feature_transform_engine_dataflow_disk_size_gb, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + study_spec_parameters_override=study_spec_parameters_override, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + stage_2_trainer_worker_pool_specs_override=stage_2_trainer_worker_pool_specs_override, + encryption_spec_key_name=encryption_spec_key_name, + model_display_name=model_display_name, + model_description=model_description, + run_evaluation=run_evaluation, + fields_to_exclude=_RETAIL_MODEL_DISABLED_OPTIONS, + ) + + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, + 'sequence_to_sequence_forecasting_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py new file mode 100644 index 0000000000..764539056a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -0,0 +1,35 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Preview AutoML tabular components.""" + +from google_cloud_pipeline_components.preview.automl.tabular.feature_selection import tabular_feature_ranking_and_selection as FeatureSelectionOp +from google_cloud_pipeline_components.preview.automl.tabular.feature_transform_engine import feature_transform_engine as FeatureTransformEngineOp +from google_cloud_pipeline_components.preview.automl.tabular.tabnet_hyperparameter_tuning_job import tabnet_hyperparameter_tuning_job as TabNetHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.tabnet_trainer import tabnet_trainer as TabNetTrainerOp +from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_hyperparameter_tuning_job import wide_and_deep_hyperparameter_tuning_job as WideAndDeepHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_trainer import wide_and_deep_trainer as WideAndDeepTrainerOp +from google_cloud_pipeline_components.preview.automl.tabular.xgboost_hyperparameter_tuning_job import xgboost_hyperparameter_tuning_job as XGBoostHyperparameterTuningJobOp +from google_cloud_pipeline_components.preview.automl.tabular.xgboost_trainer import xgboost_trainer as XGBoostTrainerOp + +__all__ = [ + 'FeatureSelectionOp', + 'WideAndDeepHyperparameterTuningJobOp', + 'WideAndDeepTrainerOp', + 'TabNetHyperparameterTuningJobOp', + 'TabNetTrainerOp', + 'FeatureTransformEngineOp', + 'XGBoostHyperparameterTuningJobOp', + 'XGBoostTrainerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml new file mode 100644 index 0000000000..b10b4b421a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -0,0 +1,11427 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-feature-selection-pipeline +# Description: The AutoML Tabular pipeline. +# Inputs: +# additional_experiments: dict +# apply_feature_selection_tuning: bool [Default: False] +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# disable_early_stopping: bool [Default: False] +# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# distill_batch_predict_max_replica_count: int [Default: 25.0] +# distill_batch_predict_starting_replica_count: int [Default: 25.0] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# location: str +# max_selected_features: int [Default: 1000.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] +# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] +# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# transformations: str +# validation_fraction: float [Default: -1.0] +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-3-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-3-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-3: + executorLabel: exec-automl-tabular-ensemble-3 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-3: + executorLabel: exec-automl-tabular-infra-validator-3 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-stage-1-tuner-2: + executorLabel: exec-automl-tabular-stage-1-tuner-2 + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform: + executorLabel: exec-automl-tabular-transform + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform-2: + executorLabel: exec-automl-tabular-transform-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-check-if-binary-classification: + executorLabel: exec-check-if-binary-classification + inputDefinitions: + artifacts: + example_gen_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: metadata generated by example gen. + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + runtimeValue: + constant: '' + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-7 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-7 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + feature_ranking: + componentInputArtifact: pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking + materialized_eval_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + tune_feature_selection_rate: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity-2 + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_distillation + taskInfo: + name: bool-identity-3 + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + condition-7: + componentRef: + name: comp-condition-7 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + - calculate-training-parameters-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + pipelinechannel--purge-unused-features-output_metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + pipelinechannel--tabular-stats-and-example-gen-eval_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + pipelinechannel--tabular-stats-and-example-gen-test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + pipelinechannel--tabular-stats-and-example-gen-train_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + parameters: + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: distill_stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: is-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'true' + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-7: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-8 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-8 + tasks: + automl-tabular-ensemble-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-3 + dependentTasks: + - automl-tabular-stage-1-tuner-2 + - automl-tabular-transform-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-3 + automl-tabular-infra-validator-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-3 + dependentTasks: + - automl-tabular-ensemble-3 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + taskInfo: + name: automl-tabular-infra-validator-3 + automl-tabular-stage-1-tuner-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner-2 + dependentTasks: + - automl-tabular-transform-2 + inputs: + artifacts: + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform-2 + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform-2 + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + parameters: + deadline_hours: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + runtimeValue: + constant: 1.0 + single_run_max_secs: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner-2 + automl-tabular-transform-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform-2 + dependentTasks: + - write-bp-result-path + - write-bp-result-path-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + eval_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path-2 + metadata: + componentInputArtifact: pipelinechannel--purge-unused-features-output_metadata + test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + train_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform-2 + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - automl-tabular-ensemble-3 + - model-upload-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + pipelinechannel--model-upload-3-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-3 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-batch-predict-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-3 + dependentTasks: + - read-input-uri + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-train-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-3 + model-batch-predict-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-4 + dependentTasks: + - read-input-uri-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri-2 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-eval-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-4 + model-upload-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-3 + dependentTasks: + - automl-tabular-ensemble-3 + - automl-tabular-infra-validator-3 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + parameters: + display_name: + runtimeValue: + constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-3 + read-input-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + taskInfo: + name: read-input-uri + read-input-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri-2 + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + taskInfo: + name: read-input-uri-2 + write-bp-result-path: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path + dependentTasks: + - model-batch-predict-3 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-3 + taskInfo: + name: write-bp-result-path + write-bp-result-path-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path-2 + dependentTasks: + - model-batch-predict-4 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-4 + taskInfo: + name: write-bp-result-path-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--purge-unused-features-output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-8: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-3 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-3 + tasks: + feature-attribution-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-3 + dependentTasks: + - model-batch-explanation-3 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-3 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-3 + model-batch-explanation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-3 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-3 + model-batch-predict-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-5 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-5 + model-evaluation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-3 + dependentTasks: + - model-batch-predict-5 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-5 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-3 + model-evaluation-import-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-3 + dependentTasks: + - feature-attribution-3 + - model-evaluation-3 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-3 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-3 + model: + componentInputArtifact: pipelinechannel--model-upload-3-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-3 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-3-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-transform: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform + dependentTasks: + - purge-unused-features + - tabular-stats-and-example-gen + inputs: + artifacts: + dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform + check-if-binary-classification: + cachingOptions: + enableCache: true + componentRef: + name: comp-check-if-binary-classification + dependentTasks: + - tabular-stats-and-example-gen + inputs: + artifacts: + example_gen_metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + taskInfo: + name: check-if-binary-classification + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - purge-unused-features + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--purge-unused-features-output_metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - purge-unused-features + - string-not-empty + - tabular-feature-ranking-and-selection + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--purge-unused-features-output_metadata: + taskOutputArtifact: + outputArtifactKey: output_metadata + producerTask: purge-unused-features + pipelinechannel--tabular-feature-ranking-and-selection-feature_ranking: + taskOutputArtifact: + outputArtifactKey: feature_ranking + producerTask: tabular-feature-ranking-and-selection + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--model_display_name: + componentInputParameter: pipelinechannel--model_display_name + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - automl-tabular-transform + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + taskInfo: + name: merge-materialized-splits + purge-unused-features: + cachingOptions: + enableCache: true + componentRef: + name: comp-purge-unused-features + dependentTasks: + - tabular-feature-ranking-and-selection + - tabular-stats-and-example-gen + inputs: + artifacts: + selected_features: + taskOutputArtifact: + outputArtifactKey: selected_features + producerTask: tabular-feature-ranking-and-selection + unpurged_metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + taskInfo: + name: purge-unused-features + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + runtimeValue: + constant: '' + taskInfo: + name: string-not-empty + tabular-feature-ranking-and-selection: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-feature-ranking-and-selection + dependentTasks: + - check-if-binary-classification + - tabular-stats-and-example-gen + inputs: + artifacts: + data_source: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + binary_classification: + taskOutputParameter: + outputParameterKey: Output + producerTask: check-if-binary-classification + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column_name: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: tabular-feature-ranking-and-selection + tabular-stats-and-example-gen: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-stats-and-example-gen + inputs: + parameters: + additional_experiments_json: + componentInputParameter: pipelinechannel--additional_experiments + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + quantiles: + componentInputParameter: pipelinechannel--quantiles + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column_name: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + transformations: + runtimeValue: + constant: '[]' + transformations_path: + componentInputParameter: pipelinechannel--transformations + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column_name: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabular-stats-and-example-gen + inputDefinitions: + parameters: + pipelinechannel--additional_experiments: + parameterType: STRUCT + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--model_display_name: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + parameterType: STRING + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--transformations: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-3: + executorLabel: exec-feature-attribution-3 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-3: + executorLabel: exec-model-batch-explanation-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-3: + executorLabel: exec-model-batch-predict-3 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-4: + executorLabel: exec-model-batch-predict-4 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-5: + executorLabel: exec-model-batch-predict-5 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-3: + executorLabel: exec-model-evaluation-3 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-3: + executorLabel: exec-model-evaluation-import-3 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-3: + executorLabel: exec-model-upload-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-purge-unused-features: + executorLabel: exec-purge-unused-features + inputDefinitions: + artifacts: + selected_features: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: selected feature names separated by comma. + unpurged_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: metadata generated by example gen. + outputDefinitions: + artifacts: + output_metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-read-input-uri: + executorLabel: exec-read-input-uri + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-read-input-uri-2: + executorLabel: exec-read-input-uri-2 + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-tabular-feature-ranking-and-selection: + executorLabel: exec-tabular-feature-ranking-and-selection + inputDefinitions: + artifacts: + data_source: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + algorithm: + defaultValue: AMI + isOptional: true + parameterType: STRING + binary_classification: + defaultValue: 'false' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key. + + If this is set, then all resources will be encrypted with the provided + + encryption key. data_source(Dataset): The input dataset artifact which + + references csv, BigQuery, or TF Records. target_column_name(str): Target + + column name of the input dataset.' + isOptional: true + parameterType: STRING + location: + description: 'Location for running the feature selection. If not set, + + default to us-central1.' + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'number of features to select by the + + algorithm. If not set, default to 1000.' + isOptional: true + parameterType: NUMBER_INTEGER + prediction_type: + defaultValue: unknown + isOptional: true + parameterType: STRING + project: + description: Project to run feature selection. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + target_column_name: + parameterType: STRING + outputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: the dictionary of feature names and feature ranking values. + selected_features: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A json array of selected feature names. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-tabular-stats-and-example-gen: + executorLabel: exec-tabular-stats-and-example-gen + inputDefinitions: + parameters: + additional_experiments: + defaultValue: '' + isOptional: true + parameterType: STRING + additional_experiments_json: + defaultValue: {} + isOptional: true + parameterType: STRUCT + data_source_bigquery_table_path: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Location for running dataset statistics and example + + generation.' + parameterType: STRING + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The prediction type. Supported values: + + "classification", "regression".' + parameterType: STRING + project: + description: 'Project to run dataset statistics and example + + generation.' + parameterType: STRING + quantiles: + defaultValue: [] + isOptional: true + parameterType: LIST + request_type: + defaultValue: COLUMN_STATS_ONLY + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + target_column_name: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Quote escaped JSON string for transformations. Each + + transformation will apply transform function to given input column. And + + the result will be used for training. When creating transformation for + + BigQuery Struct column, the column should be flattened using "." as the + + delimiter.' + parameterType: STRING + transformations_path: + defaultValue: '' + description: 'Path to a GCS file containing JSON + + string for transformations.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column_name: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The instance baseline used to calculate explanations. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + downsampled_test_split_json: + description: The downsampled test split JSON object. + parameterType: LIST + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + test_split_json: + description: The test split JSON object. + parameterType: LIST + comp-write-bp-result-path: + executorLabel: exec-write-bp-result-path + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-write-bp-result-path-2: + executorLabel: exec-write-bp-result-path-2 + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-3: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-3: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-stage-1-tuner-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-check-if-binary-classification: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _check_if_binary_classification + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _check_if_binary_classification(\n example_gen_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + ) -> str:\n \"\"\"Construct Dataset based on the batch prediction job.\n\ + \n Args:\n example_gen_metadata: metadata generated by example gen.\n\ + \n Returns:\n \"true\" if binary classification, \"false\" otherwise.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(example_gen_metadata, 'r') as f:\n metadata_path = f.read()\n\ + \ metadata = json.loads(metadata_path)\n return str(metadata['objective']\ + \ == 'binary_classification').lower()\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-3: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-importer: + importer: + artifactUri: + constant: '' + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-4: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-5: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-3: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-3: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-3: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-purge-unused-features: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _purge_unused_features + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _purge_unused_features(\n unpurged_metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + \ selected_features: dsl.InputPath('SelectedFeatures'),\n output_metadata:\ + \ dsl.OutputPath('TabularExampleGenMetadata'),\n):\n \"\"\"Purge features\ + \ from metadata if not included in selected features.\n\n Args:\n unpurged_metadata:\ + \ metadata generated by example gen.\n selected_features: selected feature\ + \ names separated by comma.\n output_metadata: purged metadata.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(unpurged_metadata, 'r') as f:\n metadata_path = f.read()\n\ + \ metadata = json.loads(metadata_path)\n\n with open(selected_features,\ + \ 'r') as f:\n selected_features_path = f.read()\n features = json.loads(selected_features_path)\n\ + \n train_spec = metadata['train_spec']\n\n features_set = set(features)\n\ + \n purged_transformation_list = []\n for transformation in train_spec['transformations']:\n\ + \ if 'numeric' in transformation:\n if transformation['numeric']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'categorical' in transformation:\n if transformation['categorical']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'timestamp' in transformation:\n if transformation['timestamp']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'text' in transformation:\n if transformation['text']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_numeric' in transformation:\n if transformation['repeated_numeric']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_categorical' in transformation:\n if transformation['repeated_categorical']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ elif 'repeated_text' in transformation:\n if transformation['repeated_text']['column_name']\ + \ in features_set:\n purged_transformation_list.append(transformation)\n\ + \ else:\n raise ValueError(f'unsupported transformation: {transformation}')\n\ + \n train_spec['transformations'] = purged_transformation_list\n metadata['train_spec']\ + \ = train_spec\n\n with open(output_metadata, 'w') as f:\n f.write(json.dumps(metadata))\n\ + \n" + image: python:3.7 + exec-read-input-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-read-input-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-tabular-feature-ranking-and-selection: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", + "\", \"--binary_classification=", "{{$.inputs.parameters[''binary_classification'']}}", + "\", \"--algorithm=", "{{$.inputs.parameters[''algorithm'']}}", "\", \"--feature_selection_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection/\", + \"--job_name=tabular-feature-selection-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}", + "\", \"--feature_selection_result_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}", + "\", \"--selected_features_path=", "{{$.outputs.artifacts[''selected_features''].uri}}", + "\", \"--parse_json=true\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-tabular-stats-and-example-gen: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": + \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": + \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": + \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": + \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": + ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": + ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": + ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", + \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", + "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", + "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", + "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", + "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", + "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", + "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", + "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", + "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", + \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", + "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", + "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", + \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", + \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", + "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", + "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", + "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", + "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", + \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", + \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", + "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", + "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-write-bp-result-path: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 + exec-write-bp-result-path-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 +pipelineInfo: + description: The AutoML Tabular pipeline. + name: automl-tabular-feature-selection-pipeline +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--additional_experiments: + componentInputParameter: additional_experiments + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: distill_batch_predict_starting_replica_count + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--model_display_name: + componentInputParameter: model_display_name + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + componentInputParameter: stats_and_example_gen_dataflow_machine_type + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + componentInputParameter: stats_and_example_gen_dataflow_max_num_workers + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + additional_experiments: + description: Use this field to config private preview features. + isOptional: true + parameterType: STRUCT + apply_feature_selection_tuning: + defaultValue: false + description: tuning feature selection rate if true. + isOptional: true + parameterType: BOOLEAN + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + distill_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'The prediction server machine type for + + batch predict component in the model distillation.' + isOptional: true + parameterType: STRING + distill_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The max number of prediction server + + for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + distill_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'The initial number of + + prediction server for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: number of features to select for training. + isOptional: true + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in + + GB for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for + + stats_and_example_gen component.' + isOptional: true + parameterType: STRING + stats_and_example_gen_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow + + workers for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transformations: + description: 'The path to a GCS file containing the transformations to + + apply.' + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml new file mode 100644 index 0000000000..c625e042bc --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -0,0 +1,8327 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-v2 +# Description: The AutoML Tabular pipeline v2. +# Inputs: +# apply_feature_selection_tuning: bool [Default: False] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_early_stopping: bool [Default: False] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# feature_selection_algorithm: str [Default: 'AMI'] +# feature_transform_engine_dataflow_disk_size_gb: int [Default: 40.0] +# feature_transform_engine_dataflow_machine_type: str [Default: 'n1-standard-16'] +# feature_transform_engine_dataflow_max_num_workers: int [Default: 25.0] +# legacy_transformations_path: str [Default: ''] +# location: str +# max_selected_features: int [Default: 1000.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# num_selected_features: int [Default: 0.0] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + - training-configurator-and-validator + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + - training-configurator-and-validator + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: check-if-is-eval + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - calculate-training-parameters + inputs: + artifacts: + dataset_stats: + componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats + instance_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + training_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema + parameters: + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + quantiles: + componentInputParameter: pipelinechannel--quantiles + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts + stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters + stage_2_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-split_example_counts: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator-2 + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + - training-configurator-and-validator-2 + inputs: + artifacts: + feature_ranking: + componentInputArtifact: pipelinechannel--feature-transform-engine-feature_ranking + materialized_eval_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--split-materialized-data-materialized_train_split + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + tune_feature_selection_rate: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: check-if-is-eval + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + runtimeValue: + constant: 0.0 + taskInfo: + name: check-if-is-distillation + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + training-configurator-and-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator-2 + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + dataset_stats: + componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats + instance_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + training_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-training_schema + parameters: + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + quantiles: + componentInputParameter: pipelinechannel--quantiles + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + componentInputParameter: pipelinechannel--feature-transform-engine-split_example_counts + stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + stage_2_deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator-2 + inputDefinitions: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--split-materialized-data-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-split_example_counts: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - split-materialized-data + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-feature_ranking: + taskOutputArtifact: + outputArtifactKey: feature_ranking + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + legacy_transformations_path: + componentInputParameter: pipelinechannel--legacy_transformations_path + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distill: + componentInputParameter: pipelinechannel--run_distillation + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - split-materialized-data + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + taskInfo: + name: merge-materialized-splits + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty + inputDefinitions: + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--legacy_transformations_path: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-training-configurator-and-validator-2: + executorLabel: exec-training-configurator-and-validator-2 + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-training-configurator-and-validator-2: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The AutoML Tabular pipeline v2. + name: automl-tabular-v2 +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: apply_feature_selection_tuning + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: feature_transform_engine_dataflow_max_num_workers + pipelinechannel--legacy_transformations_path: + componentInputParameter: legacy_transformations_path + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--num_selected_features: + componentInputParameter: num_selected_features + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + apply_feature_selection_tuning: + defaultValue: false + description: tuning feature selection rate if true. + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size + + in GB for feature transform engine component.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_transform_engine_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type + + for feature transform engine component.' + isOptional: true + parameterType: STRING + feature_transform_engine_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of + + Dataflow workers for feature transform engine component.' + isOptional: true + parameterType: NUMBER_INTEGER + legacy_transformations_path: + defaultValue: '' + description: Path to train spec transformations json. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features for feature selection, + + defaults to None, in which case all features are used.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to apply feature selection or not. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json new file mode 100644 index 0000000000..65e64d953d --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 70000, 90000, 110000, 130000, 150000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [4096, 8192, 16384, 32768, 65536] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 700 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json new file mode 100644 index 0000000000..e7346ea9ae --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [4096, 8192, 16384, 32768] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 200, + "max_value": 500 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json new file mode 100644 index 0000000000..90ed01db8f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_large_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [8192, 16384, 32768] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0002, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 400 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 3, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 10.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json new file mode 100644 index 0000000000..b9350f33b6 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [50000, 60000, 70000, 80000, 90000, 100000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 500 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json new file mode 100644 index 0000000000..e7143fae84 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 400 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 4, + "max_value": 10 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json new file mode 100644 index 0000000000..46968c00c8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_medium_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 4096, 8192, 16384] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 100, + "max_value": 300 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 10000, + "max_value": 50000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json new file mode 100644 index 0000000000..40d2e7f85b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_large_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.00007, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [3, 5, 10] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [5, 10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 300 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.05, + "max_value": 3.2 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 10000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.0000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false", "true"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json new file mode 100644 index 0000000000..3a75145edf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_medium_search_space.json @@ -0,0 +1,158 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.03 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 200 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 10000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.0625, 0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.2, + "max_value": 0.8 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy", "focal_loss"] + } + }, + { + "parameter_id": "alpha_focal_loss", + "discrete_value_spec": { + "values": [0.1, 0.25, 0.5, 0.75, 0.9, 0.99] + } + }, + { + "parameter_id": "gamma_focal_loss", + "discrete_value_spec": { + "values": [0.0, 0.5, 1.0, 2.0, 3.0, 4.0] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json new file mode 100644 index 0000000000..eb7a4c99f7 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/tabnet_params_small_data_small_search_space.json @@ -0,0 +1,146 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 15000, 20000, 25000, 30000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [512, 1024, 2048, 4096] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.02 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "large_category_dim", + "discrete_value_spec": { + "values": [5] + } + }, + { + "parameter_id": "large_category_thresh", + "discrete_value_spec": { + "values": [10] + } + }, + { + "parameter_id": "feature_dim", + "integer_value_spec": { + "min_value": 50, + "max_value": 200 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "feature_dim_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_decision_steps", + "integer_value_spec": { + "min_value": 2, + "max_value": 6 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "relaxation_factor", + "double_value_spec": { + "min_value": 1.2, + "max_value": 2.5 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "decay_rate", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.999 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "decay_every", + "integer_value_spec": { + "min_value": 1000, + "max_value": 5000 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "sparsity_loss_weight", + "double_value_spec": { + "min_value": 0.000001, + "max_value": 0.001 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + { + "parameter_id": "batch_momentum", + "double_value_spec": { + "min_value": 0.5, + "max_value": 0.95 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "batch_size_ratio", + "discrete_value_spec": { + "values": [0.125, 0.25, 0.5] + } + }, + { + "parameter_id": "num_transformer_layers", + "integer_value_spec": { + "min_value": 2, + "max_value": 4 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "num_transformer_layers_ratio", + "double_value_spec": { + "min_value": 0.3, + "max_value": 0.7 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "class_weight", + "double_value_spec": { + "min_value": 1.0, + "max_value": 100.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "loss_function_type", + "categorical_value_spec": { + "values": ["weighted_cross_entropy"] + } + }, + { + "parameter_id": "yeo_johnson_transform", + "categorical_value_spec": { + "values": ["false"] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json new file mode 100644 index 0000000000..6458b992d0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/wide_and_deep_params.json @@ -0,0 +1,132 @@ +[ + { + "parameter_id": "max_steps", + "discrete_value_spec": { + "values": [5000, 10000, 20000, 30000, 40000, 50000] + } + }, + { + "parameter_id": "max_train_secs", + "discrete_value_spec": { + "values": [-1] + } + }, + { + "parameter_id": "learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.0005 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "optimizer_type", + "categorical_value_spec": { + "values": ["adam", "ftrl", "sgd"] + } + }, + { + "parameter_id": "l1_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "l2_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "l2_shrinkage_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "beta_1", + "discrete_value_spec": { + "values": [0.7, 0.8, 0.9] + } + }, + { + "parameter_id": "beta_2", + "discrete_value_spec": { + "values": [0.8, 0.9, 0.999] + } + }, + { + "parameter_id": "hidden_units", + "categorical_value_spec": { + "values": ["30,30,30"] + } + }, + { + "parameter_id": "use_wide", + "categorical_value_spec": { + "values": ["true", "false"] + } + }, + { + "parameter_id": "embed_categories", + "categorical_value_spec": { + "values": ["true", "false"] + } + }, + { + "parameter_id": "dnn_dropout", + "discrete_value_spec": { + "values": [0, 0.1, 0.2] + } + }, + { + "parameter_id": "dnn_learning_rate", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 0.0005 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + { + "parameter_id": "dnn_optimizer_type", + "categorical_value_spec": { + "values": ["adam", "ftrl", "sgd"] + } + }, + { + "parameter_id": "dnn_l1_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_l2_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_l2_shrinkage_regularization_strength", + "discrete_value_spec": { + "values": [0, 0.01, 0.02] + } + }, + { + "parameter_id": "dnn_beta_1", + "discrete_value_spec": { + "values": [0.7, 0.8, 0.9] + } + }, + { + "parameter_id": "dnn_beta_2", + "discrete_value_spec": { + "values": [0.8, 0.9, 0.999] + } + }, + { + "parameter_id": "batch_size", + "discrete_value_spec": { + "values": [1024, 2048, 4096, 8192, 16384] + } + } +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json new file mode 100644 index 0000000000..245a738beb --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/configs/xgboost_params.json @@ -0,0 +1,309 @@ +[{ + "parameter_id": "num_boost_round", + "discrete_value_spec": { + "values": [1, 5, 10, 15, 20] + } +}, { + "parameter_id": "early_stopping_rounds", + "discrete_value_spec": { + "values": [3, 5, 10] + } +}, { + "parameter_id": "base_score", + "discrete_value_spec": { + "values": [0.5] + } +}, { + "parameter_id": "booster", + "categorical_value_spec": { + "values": ["gbtree", "gblinear", "dart"] + }, + "conditional_parameter_specs": [{ + "parameter_spec": { + "parameter_id": "eta", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "gamma", + "discrete_value_spec": { + "values": [0, 10, 50, 100, 500, 1000] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_depth", + "integer_value_spec": { + "min_value": 6, + "max_value": 10 + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "min_child_weight", + "double_value_spec": { + "min_value": 0.0, + "max_value": 10.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_delta_step", + "discrete_value_spec": { + "values": [0.0, 1.0, 3.0, 5.0, 7.0, 9.0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "subsample", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bytree", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bylevel", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "colsample_bynode", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LINEAR_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "lambda", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_REVERSE_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart", "gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "alpha", + "double_value_spec": { + "min_value": 0.0001, + "max_value": 1.0 + }, + "scale_type": "UNIT_LOG_SCALE" + }, + "parent_categorical_values": { + "values": ["gbtree", "dart", "gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "tree_method", + "categorical_value_spec": { + "values": ["auto"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "scale_pos_weight", + "discrete_value_spec": { + "values": [1.0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "refresh_leaf", + "discrete_value_spec": { + "values": [1] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "process_type", + "categorical_value_spec": { + "values": ["default"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "grow_policy", + "categorical_value_spec": { + "values": ["depthwise"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "sampling_method", + "categorical_value_spec": { + "values": ["uniform"] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "sample_type", + "categorical_value_spec": { + "values": ["uniform"] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "normalize_type", + "categorical_value_spec": { + "values": ["tree"] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "rate_drop", + "discrete_value_spec": { + "values": [0.0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "one_drop", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "skip_drop", + "discrete_value_spec": { + "values": [0.0] + } + }, + "parent_categorical_values": { + "values": ["dart"] + } + }, { + "parameter_spec": { + "parameter_id": "num_parallel_tree", + "discrete_value_spec": { + "values": [1] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "feature_selector", + "categorical_value_spec": { + "values": ["cyclic"] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "top_k", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["gblinear"] + } + }, { + "parameter_spec": { + "parameter_id": "max_leaves", + "discrete_value_spec": { + "values": [0] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }, { + "parameter_spec": { + "parameter_id": "max_bin", + "discrete_value_spec": { + "values": [256] + } + }, + "parent_categorical_values": { + "values": ["gbtree", "dart"] + } + }] +}] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py new file mode 100644 index 0000000000..c1f753bd03 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -0,0 +1,179 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Feature Ranking and Selection component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +# pylint: disable=dangerous-default-value,g-bare-generic,g-doc-args,unused-argument +@dsl.container_component +def tabular_feature_ranking_and_selection( + project: str, + location: str, + root_dir: str, + data_source: Input[Dataset], + target_column_name: str, + feature_ranking: Output[Artifact], + selected_features: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + algorithm: Optional[str] = 'AMI', + prediction_type: Optional[str] = 'unknown', + binary_classification: Optional[str] = 'false', + max_selected_features: Optional[int] = 1000, +): + # fmt: off + """Launches a feature selection task to pick top features. + + Args: + project: Project to run feature selection. + location: Location for running the feature selection. If not set, + default to us-central1. + root_dir: The Cloud Storage location to store the output. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + If this is set, then all resources will be encrypted with the provided + encryption key. data_source(Dataset): The input dataset artifact which + references csv, BigQuery, or TF Records. target_column_name(str): Target + column name of the input dataset. + max_selected_features: number of features to select by the + algorithm. If not set, default to 1000. + + Returns: + feature_ranking: the dictionary of feature names and feature ranking values. + selected_features: A json array of selected feature names. + gcp_resources: GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["feature_selection", "--data_source=', + data_source.uri, + '", "--target_column=', + target_column_name, + '", "--prediction_type=', + prediction_type, + '", "--binary_classification=', + binary_classification, + '", "--algorithm=', + algorithm, + '", "--feature_selection_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection/",' + f' "--job_name=tabular-feature-selection-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--max_selected_features=', + max_selected_features, + '", "--feature_selection_result_path=', + feature_ranking.uri, + '", "--selected_features_path=', + selected_features.uri, + '", "--parse_json=true"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py new file mode 100644 index 0000000000..4f93bbf285 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -0,0 +1,976 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Feature Transform Engine component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Output + + +@dsl.container_component +def feature_transform_engine( + root_dir: str, + project: str, + location: str, + dataset_stats: Output[Artifact], + materialized_data: Output[Dataset], + transform_output: Output[Artifact], + split_example_counts: dsl.OutputPath(str), + instance_schema: Output[Artifact], + training_schema: Output[Artifact], + bigquery_train_split_uri: dsl.OutputPath(str), + bigquery_validation_split_uri: dsl.OutputPath(str), + bigquery_test_split_uri: dsl.OutputPath(str), + bigquery_downsampled_test_split_uri: dsl.OutputPath(str), + feature_ranking: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataset_level_custom_transformation_definitions: Optional[list] = [], + dataset_level_transformations: Optional[list] = [], + forecasting_time_column: Optional[str] = '', + forecasting_time_series_identifier_column: Optional[str] = '', + forecasting_time_series_attribute_columns: Optional[list] = [], + forecasting_unavailable_at_forecast_columns: Optional[list] = [], + forecasting_available_at_forecast_columns: Optional[list] = [], + forecasting_forecast_horizon: Optional[int] = -1, + forecasting_context_window: Optional[int] = -1, + forecasting_predefined_window_column: Optional[str] = '', + forecasting_window_stride_length: Optional[int] = -1, + forecasting_window_max_count: Optional[int] = -1, + forecasting_holiday_regions: Optional[list] = [], + forecasting_apply_windowing: Optional[bool] = True, + predefined_split_key: Optional[str] = '', + stratified_split_key: Optional[str] = '', + timestamp_split_key: Optional[str] = '', + training_fraction: Optional[float] = -1, + validation_fraction: Optional[float] = -1, + test_fraction: Optional[float] = -1, + tf_transform_execution_engine: Optional[str] = 'dataflow', + tf_auto_transform_features: Optional[dict] = {}, + tf_custom_transformation_definitions: Optional[list] = [], + tf_transformations_path: Optional[str] = '', + legacy_transformations_path: Optional[str] = '', + target_column: Optional[str] = '', + weight_column: Optional[str] = '', + prediction_type: Optional[str] = '', + model_type: Optional[str] = None, + multimodal_image_columns: Optional[list] = [], + multimodal_text_columns: Optional[list] = [], + run_distill: Optional[bool] = False, + run_feature_selection: Optional[bool] = False, + feature_selection_algorithm: Optional[str] = 'AMI', + materialized_examples_format: Optional[str] = 'tfrecords_gzip', + max_selected_features: Optional[int] = 1000, + data_source_csv_filenames: Optional[str] = '', + data_source_bigquery_table_path: Optional[str] = '', + bigquery_staging_full_dataset_id: Optional[str] = '', + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + autodetect_csv_schema: Optional[bool] = False, + group_columns: Optional[list] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +): + # fmt: off + """Transforms raw data to engineered features. + + FTE performs dataset level transformations, data splitting, data statistic + generation, and TensorFlow-based row level transformations on the input + dataset based on the provided transformation configuration. + + Args: + root_dir: The Cloud Storage location to store the output. + project: Project to run feature transform engine. + location: Location for the created GCP services. + dataset_level_custom_transformation_definitions: List of dataset-level custom transformation definitions. Custom, + bring-your-own dataset-level transform functions, where users can define + and import their own transform function and use it with FTE's built-in + transformations. Using custom transformations is an experimental feature + and it is currently not supported during batch prediction. + Example: .. code-block:: python [ { "transformation": "ConcatCols", + "module_path": "/path/to/custom_transform_fn_dlt.py", + "function_name": "concat_cols" } ] Using custom transform function + together with FTE's built-in transformations: .. code-block:: + python [ { "transformation": "Join", "right_table_uri": + "bq://test-project.dataset_test.table", "join_keys": + [["join_key_col", "join_key_col"]] },{ "transformation": + "ConcatCols", "cols": ["feature_1", "feature_2"], "output_col": + "feature_1_2" } ] + dataset_level_transformations: List of dataset-level + transformations. + Example: .. code-block:: python [ { "transformation": "Join", + "right_table_uri": "bq://test-project.dataset_test.table", + "join_keys": [["join_key_col", "join_key_col"]] }, ... ] Additional + information about FTE's currently supported built-in + transformations: + Join: Joins features from right_table_uri. For each join key, the + left table keys will be included and the right table keys will + be dropped. + Example: .. code-block:: python { "transformation": "Join", + "right_table_uri": "bq://test-project.dataset_test.table", + "join_keys": [["join_key_col", "join_key_col"]] } + Arguments: + right_table_uri: Right table BigQuery uri to join + with input_full_table_id. + join_keys: Features to join on. For each + nested list, the first element is a left table column + and the second is its corresponding right table column. + TimeAggregate: Creates a new feature composed of values of an + existing feature from a fixed time period ago or in the future. + Ex: A feature for sales by store 1 year ago. + Example: .. code-block:: python { "transformation": + "TimeAggregate", "time_difference": 40, + "time_difference_units": "DAY", + "time_series_identifier_columns": ["store_id"], + "time_column": "time_col", "time_difference_target_column": + "target_col", "output_column": "output_col" } + Arguments: + time_difference: Number of time_difference_units to + look back or into the future on our + time_difference_target_column. + time_difference_units: Units of time_difference to + look back or into the future on our + time_difference_target_column. Must be one of * 'DAY' * + 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * + 'YEAR' + time_series_identifier_columns: Names of the + time series identifier columns. + time_column: Name of the time column. + time_difference_target_column: Column we wish to get + the value of time_difference time_difference_units in + the past or future. + output_column: Name of our new time aggregate + feature. + is_future: Whether we wish to look + forward in time. Defaults to False. + PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum: + Performs a partition by reduce operation (one of max, + min, avg, or sum) with a fixed historic time period. Ex: + Getting avg sales (the reduce column) for each store + (partition_by_column) over the previous 5 days + (time_column, time_ago_units, and time_ago). + Example: .. code-block:: python { "transformation": + "PartitionByMax", "reduce_column": "sell_price", + "partition_by_columns": ["store_id", "state_id"], + "time_column": "date", "time_ago": 1, "time_ago_units": + "WEEK", "output_column": "partition_by_reduce_max_output" } + Arguments: + reduce_column: Column to apply the reduce operation + on. Reduce operations include the + following: Max, Min, Avg, Sum. + partition_by_columns: List of columns to + partition by. + time_column: Time column for the partition by + operation's window function. + time_ago: Number of time_ago_units to look back on + our target_column, starting from time_column + (inclusive). + time_ago_units: Units of time_ago to look back on + our target_column. Must be one of * 'DAY' * 'WEEK' + output_column: Name of our output feature. + forecasting_time_column: Forecasting time column. + forecasting_time_series_identifier_column: Forecasting + time series identifier column. + forecasting_time_series_attribute_columns: Forecasting + time series attribute columns. + forecasting_unavailable_at_forecast_columns: Forecasting + unavailable at forecast columns. + forecasting_available_at_forecast_columns: Forecasting + available at forecast columns. + forecasting_forecast_horizon: Forecasting horizon. + forecasting_context_window: Forecasting context window. + forecasting_predefined_window_column: Forecasting predefined window column. + forecasting_window_stride_length: Forecasting window stride length. + forecasting_window_max_count: Forecasting window max count. + forecasting_holiday_regions: The geographical region based on which the + holiday effect is applied in modeling by adding holiday categorical + array feature that include all holidays matching the date. This option + only allowed when data granularity is day. By default, holiday effect + modeling is disabled. To turn it on, specify the holiday region using + this option. + Top level: * 'GLOBAL' + Second level: continental regions: * 'NA': North America + * 'JAPAC': Japan and Asia Pacific + * 'EMEA': Europe, the Middle East and Africa + * 'LAC': Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. + Valid regions: * 'GLOBAL' * 'NA' * 'JAPAC' * 'EMEA' * 'LAC' * 'AE' + * 'AR' * 'AT' * 'AU' * 'BE' * 'BR' * 'CA' * 'CH' * 'CL' * 'CN' * 'CO' + * 'CZ' * 'DE' * 'DK' * 'DZ' * 'EC' * 'EE' * 'EG' * 'ES' * 'FI' * 'FR' + * 'GB' * 'GR' * 'HK' * 'HU' * 'ID' * 'IE' * 'IL' * 'IN' * 'IR' * 'IT' + * 'JP' * 'KR' * 'LV' * 'MA' * 'MX' * 'MY' * 'NG' * 'NL' * 'NO' * 'NZ' + * 'PE' * 'PH' * 'PK' * 'PL' * 'PT' * 'RO' * 'RS' * 'RU' * 'SA' * 'SE' + * 'SG' * 'SI' * 'SK' * 'TH' * 'TR' * 'TW' * 'UA' * 'US' * 'VE' * 'VN' + * 'ZA' + forecasting_apply_windowing: Whether to apply window strategy. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + timestamp_split_key: Timestamp split key. + training_fraction: Fraction of input data for training. + validation_fraction: Fraction of input data for validation. + test_fraction: Fraction of input data for testing. + tf_transform_execution_engine: Execution engine to perform + row-level TF transformations. Can be one of: "dataflow" (by default) or + "bigquery". Using "bigquery" as the execution engine is experimental and + is for allowlisted customers only. In addition, executing on "bigquery" + only supports auto transformations (i.e., specified by + tf_auto_transform_features) and will raise an error when + tf_custom_transformation_definitions or tf_transformations_path is set. + tf_auto_transform_features: Dict mapping auto and/or type-resolutions to + TF transform features. FTE will automatically configure a set of + built-in transformations for each feature based on its data statistics. + If users do not want auto type resolution, but want the set of + transformations for a given type to be automatically generated, they + may specify pre-resolved transformations types. The following type hint + dict keys are supported: * 'auto' * 'categorical' * 'numeric' * 'text' + * 'timestamp' + Example: .. code-block:: python { "auto": ["feature1"], + "categorical": ["feature2", "feature3"], } Note that the target and + weight column may not be included as an auto transformation unless + users are running forecasting. + tf_custom_transformation_definitions: List of + TensorFlow-based custom transformation definitions. Custom, + bring-your-own transform functions, where users can define and import + their own transform function and use it with FTE's built-in + transformations. + Example: .. code-block:: python [ { "transformation": "PlusOne", + "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "plus_one_transform" }, { "transformation": + "MultiplyTwo", "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "multiply_two_transform" } ] Using custom + transform function together with FTE's built-in transformations: .. + code-block:: python [ { "transformation": "CastToFloat", + "input_columns": ["feature_1"], "output_columns": ["feature_1"] },{ + "transformation": "PlusOne", "input_columns": ["feature_1"] + "output_columns": ["feature_1_plused_one"] },{ "transformation": + "MultiplyTwo", "input_columns": ["feature_1"] "output_columns": + ["feature_1_multiplied_two"] } ] + tf_transformations_path: Path to TensorFlow-based + transformation configuration. Path to a JSON file used to specified + FTE's TF transformation configurations. In the following, we provide + some sample transform configurations to demonstrate FTE's capabilities. + All transformations on input columns are explicitly specified with FTE's + built-in transformations. Chaining of multiple transformations on a + single column is also supported. For example: .. code-block:: python [ + { "transformation": "ZScale", "input_columns": ["feature_1"] }, { + "transformation": "ZScale", "input_columns": ["feature_2"] } ] + Additional information about FTE's currently supported built-in + transformations: + Datetime: Extracts datetime featues from a column containing + timestamp strings. + Example: .. code-block:: python { "transformation": + "Datetime", "input_columns": ["feature_1"], "time_format": + "%Y-%m-%d" } + Arguments: + input_columns: A list with a single column to + perform the datetime transformation on. + output_columns: Names of output + columns, one for each datetime_features element. + time_format: Datetime format string. Time format is + a combination of Date + Time Delimiter (optional) + Time + (optional) directives. Valid date directives are as + follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # + 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' # + 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' # + 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # + 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' # + 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y' + # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # + 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' # + 11302018 * '%Y%m%d' # 20181130 Valid time delimiters + are as follows * 'T' * ' ' Valid time directives are as + follows * '%H:%M' # 23:59 * '%H:%M:%S' # + 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * + '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 * + '%H:%M:%S%z', # 23:59:58+0000 + datetime_features: List of datetime + features to be extract. Each entry must be one of * + 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR' + * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * + 'SECOND' Defaults to ['YEAR', 'MONTH', 'DAY', + 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR'] + Log: Performs the natural log on a numeric column. + Example: .. code-block:: python { "transformation": "Log", + "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the log transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + ZScale: Performs Z-scale normalization on a numeric column. + Example: .. code-block:: python { "transformation": + "ZScale", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the z-scale transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + Vocabulary: Converts strings to integers, where each unique string + gets a unique integer representation. + Example: .. code-block:: python { "transformation": + "Vocabulary", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to + perform the vocabulary transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the vocabulary + only to words whose number of occurrences in the input + exceeds frequency_threshold. If not specified, all words + in the vocabulary will be included. If both top_k and + frequency_threshold are specified, a word must satisfy + both conditions to be included. Defaults to None. + Categorical: Transforms categorical columns to integer columns. + Example: .. code-block:: python { "transformation": + "Categorical", "input_columns": ["feature_1"], "top_k": 10 } + Arguments: + input_columns: A list with a single column to + perform the categorical transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. + frequency_threshold: Limit the vocabulary + only to words whose number of occurrences in the input + exceeds frequency_threshold. If not specified, all words + in the vocabulary will be included. If both top_k and + frequency_threshold are specified, a word must satisfy + both conditions to be included. + Reduce: Given a column where each entry is a numeric array, + reduces arrays according to our reduce_mode. + Example: .. code-block:: python { "transformation": + "Reduce", "input_columns": ["feature_1"], "reduce_mode": + "MEAN", "output_columns": ["feature_1_mean"] } + Arguments: + input_columns: A list with a single column to + perform the reduce transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + reduce_mode: One of * 'MAX' * 'MIN' * + 'MEAN' * 'LAST_K' Defaults to 'MEAN'. + last_k: The number of last k elements when + 'LAST_K' reduce mode is used. Defaults to 1. + SplitString: Given a column of strings, splits strings into token + arrays. + Example: .. code-block:: python { "transformation": + "SplitString", "input_columns": ["feature_1"], "separator": + "$" } + Arguments: + input_columns: A list with a single column to + perform the split string transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + separator: Separator to split input string + into tokens. Defaults to ' '. + missing_token: Missing token to use when + no string is included. Defaults to ' _MISSING_ '. + NGram: Given a column of strings, splits strings into token arrays + where each token is an integer. + Example: .. code-block:: python { "transformation": "NGram", + "input_columns": ["feature_1"], "min_ngram_size": 1, + "max_ngram_size": 2, "separator": " " } + Arguments: + input_columns: A list with a single column to + perform the n-gram transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + min_ngram_size: Minimum n-gram size. Must + be a positive number and <= max_ngram_size. Defaults to + 1. + max_ngram_size: Maximum n-gram size. Must + be a positive number and >= min_ngram_size. Defaults to + 2. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the + dictionary's vocabulary only to words whose number of + occurrences in the input exceeds frequency_threshold. If + not specified, all words in the vocabulary will be + included. If both top_k and frequency_threshold are + specified, a word must satisfy both conditions to be + included. Defaults to None. + separator: Separator to split input string + into tokens. Defaults to ' '. + missing_token: Missing token to use when + no string is included. Defaults to ' _MISSING_ '. + Clip: Given a numeric column, clips elements such that elements < + min_value are assigned min_value, and elements > max_value are + assigned max_value. + Example: .. code-block:: python { "transformation": "Clip", + "input_columns": ["col1"], "output_columns": + ["col1_clipped"], "min_value": 1., "max_value": 10., } + Arguments: + input_columns: A list with a single column to + perform the n-gram transformation on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + min_value: Number where all values below + min_value are set to min_value. If no min_value is + provided, min clipping will not occur. Defaults to None. + max_value: Number where all values above + max_value are set to max_value If no max_value is + provided, max clipping will not occur. Defaults to None. + MultiHotEncoding: Performs multi-hot encoding on a categorical + array column. + Example: .. code-block:: python { "transformation": + "MultiHotEncoding", "input_columns": ["col1"], } The number + of classes is determened by the largest number included in + the input if it is numeric or the total number of unique + values of the input if it is type str. If the input is has + type str and an element contians separator tokens, the input + will be split at separator indices, and the each element of + the split list will be considered a seperate class. For + example, + Input: .. code-block:: python [ ["foo bar"], # Example + 0 ["foo", "bar"], # Example 1 ["foo"], # Example + 2 ["bar"], # Example 3 ] + Output (with default separator=" "): .. code-block:: python [ + [1, 1], # Example 0 [1, 1], # Example 1 + [1, 0], # Example 2 [0, 1], # Example 3 ] + Arguments: + input_columns: A list with a single column to + perform the multi-hot-encoding on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + top_k: Number of the most frequent words + in the vocabulary to use for generating dictionary + lookup indices. If not specified, all words in the + vocabulary will be used. Defaults to None. + frequency_threshold: Limit the + dictionary's vocabulary only to words whose number of + occurrences in the input exceeds frequency_threshold. If + not specified, all words in the vocabulary will be + included. If both top_k and frequency_threshold are + specified, a word must satisfy both conditions to be + included. Defaults to None. + separator: Separator to split input string + into tokens. Defaults to ' '. + MaxAbsScale: Performs maximum absolute scaling on a numeric + column. + Example: .. code-block:: python { "transformation": + "MaxAbsScale", "input_columns": ["col1"], "output_columns": + ["col1_max_abs_scaled"] } + Arguments: + input_columns: A list with a single column to + perform max-abs-scale on. + output_columns: A list with a single + output column name, corresponding to the output of our + transformation. + Custom: Transformations defined in + tf_custom_transformation_definitions are included here in the + TensorFlow-based transformation configuration. For example, + given the following tf_custom_transformation_definitions: .. + code-block:: python [ { "transformation": "PlusX", + "module_path": "gs://bucket/custom_transform_fn.py", + "function_name": "plus_one_transform" } ] We can include the + following transformation: .. code-block:: python { + "transformation": "PlusX", "input_columns": ["col1"], + "output_columns": ["col1_max_abs_scaled"] "x": 5 } Note that + input_columns must still be included in our arguments and + output_columns is optional. All other arguments are those + defined in custom_transform_fn.py, which includes `"x"` in this + case. See tf_custom_transformation_definitions above. + legacy_transformations_path (Optional[str]) Deprecated. Prefer + tf_auto_transform_features. Path to a GCS file containing JSON + string for legacy style transformations. Note that + legacy_transformations_path and tf_auto_transform_features + cannot both be specified. + target_column: Target column of input data. + weight_column: Weight column of input data. + prediction_type: Model prediction type. One of + "classification", "regression", "time_series". + run_distill: Whether the distillation should be applied + to the training. + run_feature_selection: Whether the feature selection + should be applied to the dataset. + feature_selection_algorithm: The algorithm of feature + selection. One of "AMI", "CMIM", "JMIM", "MRMR", default to be "AMI". + The algorithms available are: AMI(Adjusted Mutual Information): + Reference: + https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html + Arrays are not yet supported in this algorithm. CMIM(Conditional + Mutual Information Maximization): Reference paper: Mohamed + Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using + Joint Mutual Information Maximisation,” Expert Systems with + Applications, vol. 42, issue 22, 1 December 2015, Pages + 8520-8532. JMIM(Joint Mutual Information Maximization): Reference + paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature + selection using Joint Mutual Information Maximisation,” Expert + Systems with Applications, vol. 42, issue 22, 1 December 2015, + Pages 8520-8532. MRMR(MIQ Minimum-redundancy + Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long, + and Chris Ding. "Feature selection based on mutual information + criteria of max-dependency, max-relevance, and min-redundancy." + IEEE Transactions on pattern analysis and machine intelligence + 27, no. + 8: 1226-1238. + materialized_examples_format: The format to use for the + materialized examples. Should be either 'tfrecords_gzip' (default) or + 'parquet'. + max_selected_features: Maximum number of features to + select. If specified, the transform config will be purged by only using + the selected features that ranked top in the feature ranking, which has + the ranking value for all supported features. If the number of input + features is smaller than max_selected_features specified, we will still + run the feature selection process and generate the feature ranking, no + features will be excluded. The value will be set to 1000 by default if + run_feature_selection is enabled. + data_source_csv_filenames: CSV input data source to run + feature transform on. + data_source_bigquery_table_path: BigQuery input data + source to run feature transform on. + bigquery_staging_full_dataset_id: Dataset in + "projectId.datasetId" format for storing intermediate-FTE BigQuery + tables. If the specified dataset does not exist in BigQuery, FTE will + create the dataset. If no bigquery_staging_full_dataset_id is specified, + all intermediate tables will be stored in a dataset created under the + provided project in the input data source's location during FTE + execution called + "vertex_feature_transform_engine_staging_{location.replace('-', '_')}". + All tables generated by FTE will have a 30 day TTL. + model_type: Model type, which we wish to engineer features + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or + tide. Defaults to the empty value, `None`. + multimodal_image_columns: List of multimodal image + columns. Defaults to an empty list. + multimodal_text_columns: List of multimodal text + columns. Defaults to an empty list. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + Dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + autodetect_csv_schema: If True, infers the column types + when importing CSVs into BigQuery. + + Returns: + dataset_stats: The stats of the dataset. + materialized_data: The materialized dataset. + transform_output: The transform output artifact. + split_example_counts: JSON string of data split example counts for train, + validate, and test splits. + bigquery_train_split_uri: BigQuery URI for the train split to pass to the + batch prediction component during distillation. + bigquery_validation_split_uri: BigQuery URI for the validation split to + pass to the batch prediction component during distillation. + bigquery_test_split_uri: BigQuery URI for the test split to pass to the + batch prediction component during evaluation. + bigquery_downsampled_test_split_uri: BigQuery URI for the downsampled test + split to pass to the batch prediction component during batch explain. + instance_schema_path: Schema of input data to the tf_model at serving + time. + training_schema_path: Schema of input data to the tf_model at training + time. + feature_ranking: The ranking of features, all features supported in the + dataset will be included. For "AMI" algorithm, array features won't be + available in the ranking as arrays are not supported yet. + gcp_resources: GCP resources created by this component. For more details, + see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + group_columns: A list of time series attribute column names that define + the time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over + time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated + over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions + aggregated over both the horizon and time series in the same hierarchy + group. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + command=[], + args=[ + 'feature_transform_engine', + dsl.ConcatPlaceholder(items=['--project=', project]), + dsl.ConcatPlaceholder(items=['--location=', location]), + dsl.ConcatPlaceholder( + items=[ + '--dataset_level_custom_transformation_definitions=', + dataset_level_custom_transformation_definitions, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataset_level_transformations=', + dataset_level_transformations, + ] + ), + dsl.ConcatPlaceholder( + items=['--forecasting_time_column=', forecasting_time_column] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_time_series_identifier_column=', + forecasting_time_series_identifier_column, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_time_series_attribute_columns=', + forecasting_time_series_attribute_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_unavailable_at_forecast_columns=', + forecasting_unavailable_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_available_at_forecast_columns=', + forecasting_available_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_forecast_horizon=', + forecasting_forecast_horizon, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_context_window=', + forecasting_context_window, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_predefined_window_column=', + forecasting_predefined_window_column, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_window_stride_length=', + forecasting_window_stride_length, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_window_max_count=', + forecasting_window_max_count, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_holiday_regions=', + forecasting_holiday_regions, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_apply_windowing=', + forecasting_apply_windowing, + ] + ), + dsl.ConcatPlaceholder( + items=['--predefined_split_key=', predefined_split_key] + ), + dsl.ConcatPlaceholder( + items=['--stratified_split_key=', stratified_split_key] + ), + dsl.ConcatPlaceholder( + items=['--timestamp_split_key=', timestamp_split_key] + ), + dsl.ConcatPlaceholder( + items=['--training_fraction=', training_fraction] + ), + dsl.ConcatPlaceholder( + items=['--validation_fraction=', validation_fraction] + ), + dsl.ConcatPlaceholder(items=['--test_fraction=', test_fraction]), + dsl.ConcatPlaceholder( + items=[ + '--tf_transform_execution_engine=', + tf_transform_execution_engine, + ] + ), + dsl.IfPresentPlaceholder( + input_name='tf_auto_transform_features', + then=dsl.ConcatPlaceholder( + items=[ + '--tf_auto_transform_features=', + tf_auto_transform_features, + ] + ), + ), + dsl.ConcatPlaceholder( + items=[ + '--tf_custom_transformation_definitions=', + tf_custom_transformation_definitions, + ] + ), + dsl.ConcatPlaceholder( + items=['--tf_transformations_path=', tf_transformations_path] + ), + dsl.ConcatPlaceholder( + items=[ + '--legacy_transformations_path=', + legacy_transformations_path, + ] + ), + dsl.ConcatPlaceholder( + items=['--data_source_csv_filenames=', data_source_csv_filenames] + ), + dsl.ConcatPlaceholder( + items=[ + '--data_source_bigquery_table_path=', + data_source_bigquery_table_path, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_staging_full_dataset_id=', + bigquery_staging_full_dataset_id, + ] + ), + dsl.ConcatPlaceholder(items=['--target_column=', target_column]), + dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), + dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), + dsl.IfPresentPlaceholder( + input_name='model_type', + then=dsl.ConcatPlaceholder(items=['--model_type=', model_type]), + ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_image_columns=', + multimodal_image_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_text_columns=', + multimodal_text_columns, + ] + ), + dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), + dsl.ConcatPlaceholder( + items=['--run_feature_selection=', run_feature_selection] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_examples_format=', + materialized_examples_format, + ] + ), + dsl.ConcatPlaceholder( + items=['--max_selected_features=', max_selected_features] + ), + dsl.ConcatPlaceholder( + items=[ + '--feature_selection_staging_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/feature_selection_staging_dir', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--feature_selection_algorithm=', + feature_selection_algorithm, + ] + ), + dsl.ConcatPlaceholder( + items=['--feature_ranking_path=', feature_ranking.uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.txt', + ] + ), + dsl.ConcatPlaceholder( + items=['--stats_result_path=', dataset_stats.uri] + ), + dsl.ConcatPlaceholder( + items=['--transform_output_artifact_path=', transform_output.uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--transform_output_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_examples_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--export_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/export', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized_data', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_artifact_path=', + materialized_data.uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_train_split_uri_path=', + bigquery_train_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_validation_split_uri_path=', + bigquery_validation_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=['--bigquery_test_split_uri_path=', bigquery_test_split_uri] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_downsampled_test_split_uri_path=', + bigquery_downsampled_test_split_uri, + ] + ), + dsl.ConcatPlaceholder( + items=['--split_example_counts_path=', split_example_counts] + ), + dsl.ConcatPlaceholder( + items=['--instance_schema_path=', instance_schema.path] + ), + dsl.ConcatPlaceholder( + items=['--training_schema_path=', training_schema.path] + ), + f'--job_name=feature-transform-engine-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + dsl.ConcatPlaceholder(items=['--dataflow_project=', project]), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_staging_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_tmp_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp', + ] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_max_num_workers=', dataflow_max_num_workers] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_machine_type=', dataflow_machine_type] + ), + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + dsl.ConcatPlaceholder( + items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + ] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_use_public_ips=', dataflow_use_public_ips] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_service_account=', dataflow_service_account] + ), + dsl.ConcatPlaceholder( + items=['--dataflow_kms_key=', encryption_spec_key_name] + ), + dsl.ConcatPlaceholder( + items=['--autodetect_csv_schema=', autodetect_csv_schema] + ), + dsl.ConcatPlaceholder(items=['--gcp_resources_path=', gcp_resources]), + dsl.IfPresentPlaceholder( + input_name='group_columns', + then=dsl.ConcatPlaceholder( + items=['--group_columns=', group_columns] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_total_weight', + then=dsl.ConcatPlaceholder( + items=['--group_total_weight=', group_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=['--temporal_total_weight=', temporal_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=[ + '--group_temporal_total_weight=', + group_temporal_total_weight, + ] + ), + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..a9b09479a8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -0,0 +1,236 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabnet Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input + + +@dsl.container_component +def tabnet_hyperparameter_tuning_job( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + instance_schema_uri: dsl.OutputPath(str), + prediction_schema_uri: dsl.OutputPath(str), + trials: dsl.OutputPath(str), + prediction_docker_uri_output: dsl.OutputPath(str), + execution_metrics: dsl.OutputPath(dict), + weight_column: Optional[str] = '', + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + eval_frequency_secs: Optional[int] = 600, + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes TabNet hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + study_spec_metric_id: Metric to optimize, possible + values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + instance_schema_uri: The path to the instance schema. + prediction_schema_uri: The path to the prediction schema. + trials: The path to the hyperparameter tuning trials + prediction_docker_uri_output: The URI of the prediction container. + execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJobWithMetrics', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--execution_metrics', + execution_metrics, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabnet-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ( + ', "trial_job_spec": {"worker_pool_specs":' + ' [{"replica_count":"' + ), + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--prediction_docker_uri_artifact_path=', + prediction_docker_uri_output, + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--instance_schema_path=', + instance_schema_uri, + '", "--prediction_schema_path=', + prediction_schema_uri, + '", "--trials_path=', + trials, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--measurement_selection_type=', + study_spec_measurement_selection_type, + '", "--metric_goal=', + study_spec_metric_goal, + '", "--eval_steps=', + eval_steps, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '", "--generate_feature_importance=true"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..e687acd6bf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4661 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-tabnet-hyperparameter-tuning-job +# Description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - model-evaluation + inputs: + artifacts: + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: TabNet Hyperparameter Tuning + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + - model-upload + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - tabnet-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: tabnet-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_uri + producerTask: tabnet-hyperparameter-tuning-job + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_output + producerTask: tabnet-hyperparameter-tuning-job + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_uri + producerTask: tabnet-hyperparameter-tuning-job + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials + producerTask: tabnet-hyperparameter-tuning-job + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-tabnet-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-tabnet-study-spec-parameters + dependentTasks: + - training-configurator-and-validator + inputs: + artifacts: + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + parameters: + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-tabnet-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + tabnet-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabnet-hyperparameter-tuning-job + dependentTasks: + - feature-transform-engine + - get-tabnet-study-spec-parameters + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + cache_data: + componentInputParameter: pipelinechannel--cache_data + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-tabnet-study-spec-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabnet-hyperparameter-tuning-job + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-tabnet-study-spec-parameters: + executorLabel: exec-get-tabnet-study-spec-parameters + inputDefinitions: + artifacts: + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Metadata generated by example gen. + parameters: + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to produce. + + ''classification'' or ''regression''.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-tabnet-hyperparameter-tuning-job: + executorLabel: exec-tabnet-hyperparameter-tuning-job + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible + + values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', + ''auc'', ''precision'', ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + execution_metrics: + description: Core metrics in dictionary of hyperparameter tuning job execution. + parameterType: STRUCT + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + instance_schema_uri: + description: The path to the instance schema. + parameterType: STRING + prediction_docker_uri_output: + description: The URI of the prediction container. + parameterType: STRING + prediction_schema_uri: + description: The path to the prediction schema. + parameterType: STRING + trials: + description: The path to the hyperparameter tuning trials + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-tabnet-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_tabnet_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_tabnet_study_spec_parameters(\n metadata: dsl.InputPath('TabularExampleGenMetadata'),\n\ + \ max_trial_count: int,\n prediction_type: str,\n study_spec_parameters_override:\ + \ list, # Required for KFP validation; pylint:disable=g-bare-generic\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ + \ study_spec_parameters for a TabNet hyperparameter tuning job.\n\n Args:\n\ + \ metadata: Metadata generated by example gen.\n max_trial_count:\ + \ The desired total number of trials.\n prediction_type: The type of\ + \ prediction the model is to produce.\n 'classification' or 'regression'.\n\ + \ study_spec_parameters_override: List of dictionaries representing parameters\n\ + \ to optimize. The dictionary key is the parameter_id, which is passed\ + \ to\n training job as a command line argument, and the dictionary\ + \ value is the\n parameter specification of the metric.\n\n Returns:\n\ + \ List of final Vizier study_spec_parameters of type ParameterSpec.\n\ + \ \"\"\"\n # Define different search space constants\n tabnet_params_small_data_small_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [5000, 10000, 15000, 20000, 25000, 30000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 5000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.125,\ + \ 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 4},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {'values':\ + \ ['weighted_cross_entropy']},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_small_data_medium_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 200},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ + \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {\n \ + \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ + \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ + \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_small_data_large_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n \ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [512, 1024, 2048, 4096]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.00007,\ + \ 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n {\n\ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 300},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 7},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 1000, 'max_value': 10000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'sparsity_loss_weight',\n 'double_value_spec': {'min_value':\ + \ 0.0000001, 'max_value': 0.001},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'batch_momentum',\n \ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'batch_size_ratio',\n 'discrete_value_spec': {'values': [0.0625,\ + \ 0.125, 0.25, 0.5]},\n },\n {\n 'parameter_id': 'num_transformer_layers',\n\ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'class_weight',\n \ + \ 'double_value_spec': {'min_value': 1.0, 'max_value': 100.0},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'loss_function_type',\n 'categorical_value_spec': {\n \ + \ 'values': ['weighted_cross_entropy', 'focal_loss']\n },\n\ + \ },\n {\n 'parameter_id': 'alpha_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.1, 0.25, 0.5, 0.75, 0.9, 0.99]},\n\ + \ },\n {\n 'parameter_id': 'gamma_focal_loss',\n \ + \ 'discrete_value_spec': {'values': [0.0, 0.5, 1.0, 2.0, 3.0, 4.0]},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false', 'true']},\n \ + \ },\n ]\n tabnet_params_medium_data_small_search_space = [\n \ + \ {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 4096, 8192, 16384]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0001,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 300},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.3, 'max_value': 0.7},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_medium_data_medium_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [5000, 10000, 20000, 30000, 40000, 50000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 400},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_medium_data_large_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [1024, 2048, 4096, 8192, 16384]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ + \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 50, 'max_value': 500},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 2, 'max_value': 8},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false', 'true']},\n },\n ]\n tabnet_params_large_data_small_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [8192, 16384, 32768]},\n },\n {\n 'parameter_id':\ + \ 'learning_rate',\n 'double_value_spec': {'min_value': 0.0002,\ + \ 'max_value': 0.02},\n 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 400},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 6},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 2, 'max_value': 6},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.3, 'max_value': 0.7},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 10.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {'values': ['weighted_cross_entropy']},\n\ + \ },\n {\n 'parameter_id': 'yeo_johnson_transform',\n\ + \ 'categorical_value_spec': {'values': ['false']},\n },\n\ + \ ]\n tabnet_params_large_data_medium_search_space = [\n {\n \ + \ 'parameter_id': 'max_steps',\n 'discrete_value_spec': {\n\ + \ 'values': [50000, 60000, 70000, 80000, 90000, 100000]\n \ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [4096, 8192, 16384, 32768]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec': {'min_value':\ + \ 0.0001, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [5]},\n },\n {\n \ + \ 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 200, 'max_value': 500},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 7},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.2,\ + \ 'max_value': 2.5},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false']},\n },\n ]\n tabnet_params_large_data_large_search_space\ + \ = [\n {\n 'parameter_id': 'max_steps',\n 'discrete_value_spec':\ + \ {\n 'values': [50000, 70000, 90000, 110000, 130000, 150000]\n\ + \ },\n },\n {\n 'parameter_id': 'max_train_secs',\n\ + \ 'discrete_value_spec': {'values': [-1]},\n },\n {\n\ + \ 'parameter_id': 'batch_size',\n 'discrete_value_spec':\ + \ {'values': [4096, 8192, 16384, 32768, 65536]},\n },\n {\n \ + \ 'parameter_id': 'learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.00007, 'max_value': 0.03},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'large_category_dim',\n \ + \ 'discrete_value_spec': {'values': [3, 5, 10]},\n },\n \ + \ {\n 'parameter_id': 'large_category_thresh',\n 'discrete_value_spec':\ + \ {'values': [5, 10]},\n },\n {\n 'parameter_id': 'feature_dim',\n\ + \ 'integer_value_spec': {'min_value': 100, 'max_value': 700},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'feature_dim_ratio',\n 'double_value_spec':\ + \ {'min_value': 0.2, 'max_value': 0.8},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_decision_steps',\n \ + \ 'integer_value_spec': {'min_value': 3, 'max_value': 8},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n 'parameter_id':\ + \ 'relaxation_factor',\n 'double_value_spec': {'min_value': 1.05,\ + \ 'max_value': 3.2},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n {\n 'parameter_id': 'decay_rate',\n 'double_value_spec':\ + \ {'min_value': 0.5, 'max_value': 0.999},\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n {\n 'parameter_id': 'decay_every',\n \ + \ 'integer_value_spec': {'min_value': 10000, 'max_value': 50000},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'sparsity_loss_weight',\n 'double_value_spec':\ + \ {'min_value': 0.0000001, 'max_value': 0.001},\n 'scale_type':\ + \ 'UNIT_LOG_SCALE',\n },\n {\n 'parameter_id': 'batch_momentum',\n\ + \ 'double_value_spec': {'min_value': 0.5, 'max_value': 0.95},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'batch_size_ratio',\n 'discrete_value_spec':\ + \ {'values': [0.0625, 0.125, 0.25, 0.5]},\n },\n {\n \ + \ 'parameter_id': 'num_transformer_layers',\n 'integer_value_spec':\ + \ {'min_value': 4, 'max_value': 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'num_transformer_layers_ratio',\n\ + \ 'double_value_spec': {'min_value': 0.2, 'max_value': 0.8},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'class_weight',\n 'double_value_spec': {'min_value':\ + \ 1.0, 'max_value': 100.0},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'loss_function_type',\n \ + \ 'categorical_value_spec': {\n 'values': ['weighted_cross_entropy',\ + \ 'focal_loss']\n },\n },\n {\n 'parameter_id':\ + \ 'alpha_focal_loss',\n 'discrete_value_spec': {'values': [0.1,\ + \ 0.25, 0.5, 0.75, 0.9, 0.99]},\n },\n {\n 'parameter_id':\ + \ 'gamma_focal_loss',\n 'discrete_value_spec': {'values': [0.0,\ + \ 0.5, 1.0, 2.0, 3.0, 4.0]},\n },\n {\n 'parameter_id':\ + \ 'yeo_johnson_transform',\n 'categorical_value_spec': {'values':\ + \ ['false', 'true']},\n },\n ]\n search_spaces = {\n 'tabnet_params_small_data_small_search_space':\ + \ (\n tabnet_params_small_data_small_search_space\n ),\n \ + \ 'tabnet_params_small_data_medium_search_space': (\n tabnet_params_small_data_medium_search_space\n\ + \ ),\n 'tabnet_params_small_data_large_search_space': (\n \ + \ tabnet_params_small_data_large_search_space\n ),\n 'tabnet_params_medium_data_small_search_space':\ + \ (\n tabnet_params_medium_data_small_search_space\n ),\n\ + \ 'tabnet_params_medium_data_medium_search_space': (\n tabnet_params_medium_data_medium_search_space\n\ + \ ),\n 'tabnet_params_medium_data_large_search_space': (\n \ + \ tabnet_params_medium_data_large_search_space\n ),\n 'tabnet_params_large_data_small_search_space':\ + \ (\n tabnet_params_large_data_small_search_space\n ),\n \ + \ 'tabnet_params_large_data_medium_search_space': (\n tabnet_params_large_data_medium_search_space\n\ + \ ),\n 'tabnet_params_large_data_large_search_space': (\n \ + \ tabnet_params_large_data_large_search_space\n ),\n }\n\n #\ + \ pylint: disable=g-import-not-at-top,import-outside-toplevel\n import\ + \ json\n import warnings\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \ with open(metadata, 'r') as f:\n metadata_path = f.read()\n metadata\ + \ = json.loads(metadata_path)\n # Calculate dataset size bucket. One of\ + \ 'small' (< 1M rows),\n # 'medium' (1M - 100M rows), or 'large' (> 100M\ + \ rows)\n num_rows = (\n metadata['num_examples']['train']\n \ + \ + metadata['num_examples']['valid']\n + metadata['num_examples']['test']\n\ + \ )\n dataset_size_bucket = 'medium'\n if num_rows < 10000000:\n dataset_size_bucket\ + \ = 'small'\n elif num_rows > 100000000:\n dataset_size_bucket = 'large'\n\ + \n # Calculate search space bucket using max_trial_count.\n # One of 'small'\ + \ (< 10), medium (1 - 100), and large (> 100).\n search_space = 'medium'\n\ + \ if max_trial_count < 10:\n search_space = 'small'\n elif max_trial_count\ + \ > 100:\n search_space = 'large'\n\n # Get params for classification.\n\ + \ params = search_spaces[\n f'tabnet_params_{dataset_size_bucket}_data_{search_space}_search_space'\n\ + \ ]\n\n # Format for regression. To get regression study_spec_parameters,\ + \ we need\n # to set `loss_function_type` to \u2018mae\u2019 (\u2018mae\u2019\ + \ and \u2018mse\u2019 for 'large'\n # search space), remove the `alpha_focal_loss`,\ + \ `gamma_focal_loss`\n # and `class_weight` parameters and increase the\ + \ max for\n # `sparsity_loss_weight` to 100.\n if prediction_type == 'regression':\n\ + \ formatted_params = []\n for param in params:\n if param['parameter_id']\ + \ in [\n 'alpha_focal_loss',\n 'gamma_focal_loss',\n \ + \ 'class_weight',\n ]:\n continue\n elif param['parameter_id']\ + \ == 'sparsity_loss_weight':\n param['double_value_spec']['max_value']\ + \ = 100\n elif param['parameter_id'] == 'loss_function_type':\n \ + \ if search_space == 'large':\n param['categorical_value_spec']['values']\ + \ = ['mae', 'mse']\n else:\n param['categorical_value_spec']['values']\ + \ = ['mae']\n formatted_params.append(param)\n else:\n formatted_params\ + \ = params\n\n # Create parameter_id -> parameter_config dictionary for\ + \ params to override\n # and override parameters.\n override_params =\ + \ {}\n for param in study_spec_parameters_override:\n override_params[param['parameter_id']]\ + \ = param\n\n study_spec_parameters = []\n for param in formatted_params:\n\ + \ study_spec_parameters.append(\n override_params.get(param['parameter_id'],\ + \ param)\n )\n\n extra_overrides = set(override_params) - set(\n \ + \ p['parameter_id'] for p in params\n )\n if extra_overrides:\n extra_override_str\ + \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ + \ {extra_override_str} were not found in the params and '\n 'will\ + \ be ignored.'\n )\n\n return study_spec_parameters\n\n" + image: python:3.7-slim + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-tabnet-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJobWithMetrics + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --execution_metrics + - '{{$.outputs.parameters[''execution_metrics''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabnet-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", + "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", + "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", + "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", + \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. + name: automl-tabular-tabnet-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible values: [ ''loss'', + + ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', + + ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py new file mode 100644 index 0000000000..e0ceeb08f9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -0,0 +1,300 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabnet Trainer component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def tabnet_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument + weight_column: Optional[str] = '', + max_steps: Optional[int] = -1, + max_train_secs: Optional[int] = -1, + large_category_dim: Optional[int] = 1, + large_category_thresh: Optional[int] = 300, + yeo_johnson_transform: Optional[bool] = True, + feature_dim: Optional[int] = 64, + feature_dim_ratio: Optional[float] = 0.5, + num_decision_steps: Optional[int] = 6, + relaxation_factor: Optional[float] = 1.5, + decay_every: Optional[float] = 100, + decay_rate: Optional[float] = 0.95, + gradient_thresh: Optional[float] = 2000, + sparsity_loss_weight: Optional[float] = 1e-05, + batch_momentum: Optional[float] = 0.95, + batch_size_ratio: Optional[float] = 0.25, + num_transformer_layers: Optional[int] = 4, + num_transformer_layers_ratio: Optional[float] = 0.25, + class_weight: Optional[float] = 1.0, + loss_function_type: Optional[str] = 'default', + alpha_focal_loss: Optional[float] = 0.25, + gamma_focal_loss: Optional[float] = 2.0, + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + batch_size: Optional[int] = 100, + measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + optimization_metric: Optional[str] = '', + eval_frequency_secs: Optional[int] = 600, + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains a TabNet model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the + trainer for. + learning_rate: The learning rate used by the linear optimizer. + large_category_dim: Embedding dimension for categorical + feature with large number of categories. + large_category_thresh: Threshold for number of categories + to apply large_category_dim embedding dimension to. + yeo_johnson_transform: Enables trainable Yeo-Johnson + power transform. + feature_dim: Dimensionality of the hidden representation + in feature transformation block. + feature_dim_ratio: The ratio of output dimension + (dimensionality of the outputs of each decision step) to feature + dimension. + num_decision_steps: Number of sequential decision steps. + relaxation_factor: Relaxation factor that promotes the + reuse of each feature at different decision steps. When it is 1, a + feature is enforced to be used only at one decision step and as it + increases, more flexibility is provided to use a feature at multiple + decision steps. + decay_every: Number of iterations for periodically + applying learning rate decaying. + decay_rate: Learning rate decaying. + gradient_thresh: Threshold for the norm of gradients for clipping. + sparsity_loss_weight: Weight of the loss for sparsity + regularization (increasing it will yield more sparse feature selection). + batch_momentum: Momentum in ghost batch normalization. + batch_size_ratio: The ratio of virtual batch size (size + of the ghost batch normalization) to batch size. + num_transformer_layers: The number of transformer layers + for each decision step. used only at one decision step and as it + increases, more flexibility is provided to use a feature at multiple + decision steps. + num_transformer_layers_ratio: The ratio of shared + transformer layer to transformer layers. + class_weight: The class weight is used to computes a + weighted cross entropy which is helpful in classify imbalanced dataset. + Only used for classification. + loss_function_type: Loss function type. Loss function in + classification [cross_entropy, weighted_cross_entropy, focal_loss], + default is cross_entropy. Loss function in regression: [rmse, mae, mse], + default is mse. + alpha_focal_loss: Alpha value (balancing factor) in + focal_loss function. Only used for classification. + gamma_focal_loss: Gamma value (modulating factor) for + focal loss for focal loss. Only used for classification. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use + if/when the service automatically selects the final measurement from + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabnet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--max_steps=', + max_steps, + '", "--max_train_secs=', + max_train_secs, + '", "--learning_rate=', + learning_rate, + '", "--large_category_dim=', + large_category_dim, + '", "--large_category_thresh=', + large_category_thresh, + '", "--yeo_johnson_transform=', + yeo_johnson_transform, + '", "--feature_dim=', + feature_dim, + '", "--feature_dim_ratio=', + feature_dim_ratio, + '", "--num_decision_steps=', + num_decision_steps, + '", "--relaxation_factor=', + relaxation_factor, + '", "--decay_every=', + decay_every, + '", "--decay_rate=', + decay_rate, + '", "--gradient_thresh=', + gradient_thresh, + '", "--sparsity_loss_weight=', + sparsity_loss_weight, + '", "--batch_momentum=', + batch_momentum, + '", "--batch_size_ratio=', + batch_size_ratio, + '", "--num_transformer_layers=', + num_transformer_layers, + '", "--num_transformer_layers_ratio=', + num_transformer_layers_ratio, + '", "--class_weight=', + class_weight, + '", "--loss_function_type=', + loss_function_type, + '", "--alpha_focal_loss=', + alpha_focal_loss, + '", "--gamma_focal_loss=', + gamma_focal_loss, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--batch_size=', + batch_size, + '", "--measurement_selection_type=', + measurement_selection_type, + '", "--optimization_metric=', + optimization_metric, + '", "--eval_frequency_secs=', + eval_frequency_secs, + ( + '", "--generate_feature_importance=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml new file mode 100644 index 0000000000..32f5b41c9e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -0,0 +1,4302 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-tabnet-trainer +# Description: The TabNet training pipeline. +# Inputs: +# alpha_focal_loss: float [Default: 0.25] +# batch_momentum: float [Default: 0.95] +# batch_size: int [Default: 100.0] +# batch_size_ratio: float [Default: 0.25] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# class_weight: float [Default: 1.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# decay_every: float [Default: 100.0] +# decay_rate: float [Default: 0.95] +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_dim: int [Default: 64.0] +# feature_dim_ratio: float [Default: 0.5] +# feature_selection_algorithm: str [Default: 'AMI'] +# gamma_focal_loss: float [Default: 2.0] +# gradient_thresh: float [Default: 2000.0] +# large_category_dim: int [Default: 1.0] +# large_category_thresh: int [Default: 300.0] +# learning_rate: float +# location: str +# loss_function_type: str [Default: 'default'] +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_selected_features: int [Default: -1.0] +# max_steps: int [Default: -1.0] +# max_train_secs: int [Default: -1.0] +# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# num_decision_steps: int [Default: 6.0] +# num_transformer_layers: int [Default: 4.0] +# num_transformer_layers_ratio: float [Default: 0.25] +# optimization_metric: str [Default: ''] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# relaxation_factor: float [Default: 1.5] +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# sparsity_loss_weight: float [Default: 1e-05] +# stratified_split_key: str [Default: ''] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# yeo_johnson_transform: bool [Default: True] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--tabnet-trainer-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - model-evaluation + inputs: + artifacts: + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: TabNet Trainer + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + pipelinechannel--tabnet-trainer-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - tabnet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - model-upload + - tabnet-trainer + inputs: + artifacts: + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + pipelinechannel--tabnet-trainer-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - tabnet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: tabnet-trainer + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + tabnet-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabnet-trainer + dependentTasks: + - feature-transform-engine + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + alpha_focal_loss: + componentInputParameter: pipelinechannel--alpha_focal_loss + batch_momentum: + componentInputParameter: pipelinechannel--batch_momentum + batch_size: + componentInputParameter: pipelinechannel--batch_size + batch_size_ratio: + componentInputParameter: pipelinechannel--batch_size_ratio + cache_data: + componentInputParameter: pipelinechannel--cache_data + class_weight: + componentInputParameter: pipelinechannel--class_weight + decay_every: + componentInputParameter: pipelinechannel--decay_every + decay_rate: + componentInputParameter: pipelinechannel--decay_rate + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + feature_dim: + componentInputParameter: pipelinechannel--feature_dim + feature_dim_ratio: + componentInputParameter: pipelinechannel--feature_dim_ratio + gamma_focal_loss: + componentInputParameter: pipelinechannel--gamma_focal_loss + gradient_thresh: + componentInputParameter: pipelinechannel--gradient_thresh + large_category_dim: + componentInputParameter: pipelinechannel--large_category_dim + large_category_thresh: + componentInputParameter: pipelinechannel--large_category_thresh + learning_rate: + componentInputParameter: pipelinechannel--learning_rate + location: + componentInputParameter: pipelinechannel--location + loss_function_type: + componentInputParameter: pipelinechannel--loss_function_type + max_steps: + componentInputParameter: pipelinechannel--max_steps + max_train_secs: + componentInputParameter: pipelinechannel--max_train_secs + measurement_selection_type: + componentInputParameter: pipelinechannel--measurement_selection_type + num_decision_steps: + componentInputParameter: pipelinechannel--num_decision_steps + num_transformer_layers: + componentInputParameter: pipelinechannel--num_transformer_layers + num_transformer_layers_ratio: + componentInputParameter: pipelinechannel--num_transformer_layers_ratio + optimization_metric: + componentInputParameter: pipelinechannel--optimization_metric + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + relaxation_factor: + componentInputParameter: pipelinechannel--relaxation_factor + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + sparsity_loss_weight: + componentInputParameter: pipelinechannel--sparsity_loss_weight + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + yeo_johnson_transform: + componentInputParameter: pipelinechannel--yeo_johnson_transform + taskInfo: + name: tabnet-trainer + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + pipelinechannel--alpha_focal_loss: + parameterType: NUMBER_DOUBLE + pipelinechannel--batch_momentum: + parameterType: NUMBER_DOUBLE + pipelinechannel--batch_size: + parameterType: NUMBER_INTEGER + pipelinechannel--batch_size_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--class_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--decay_every: + parameterType: NUMBER_DOUBLE + pipelinechannel--decay_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_dim: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_dim_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--gamma_focal_loss: + parameterType: NUMBER_DOUBLE + pipelinechannel--gradient_thresh: + parameterType: NUMBER_DOUBLE + pipelinechannel--large_category_dim: + parameterType: NUMBER_INTEGER + pipelinechannel--large_category_thresh: + parameterType: NUMBER_INTEGER + pipelinechannel--learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--location: + parameterType: STRING + pipelinechannel--loss_function_type: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--max_train_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--measurement_selection_type: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_decision_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--num_transformer_layers: + parameterType: NUMBER_INTEGER + pipelinechannel--num_transformer_layers_ratio: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_metric: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--relaxation_factor: + parameterType: NUMBER_DOUBLE + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--sparsity_loss_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + pipelinechannel--yeo_johnson_transform: + parameterType: BOOLEAN + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-tabnet-trainer: + executorLabel: exec-tabnet-trainer + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + alpha_focal_loss: + defaultValue: 0.25 + description: 'Alpha value (balancing factor) in + + focal_loss function. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + batch_momentum: + defaultValue: 0.95 + description: Momentum in ghost batch normalization. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + batch_size_ratio: + defaultValue: 0.25 + description: 'The ratio of virtual batch size (size + + of the ghost batch normalization) to batch size.' + isOptional: true + parameterType: NUMBER_DOUBLE + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + class_weight: + defaultValue: 1.0 + description: 'The class weight is used to computes a + + weighted cross entropy which is helpful in classify imbalanced dataset. + + Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_every: + defaultValue: 100.0 + description: 'Number of iterations for periodically + + applying learning rate decaying.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_rate: + defaultValue: 0.95 + description: Learning rate decaying. + isOptional: true + parameterType: NUMBER_DOUBLE + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim: + defaultValue: 64.0 + description: 'Dimensionality of the hidden representation + + in feature transformation block.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim_ratio: + defaultValue: 0.5 + description: 'The ratio of output dimension + + (dimensionality of the outputs of each decision step) to feature + + dimension.' + isOptional: true + parameterType: NUMBER_DOUBLE + gamma_focal_loss: + defaultValue: 2.0 + description: 'Gamma value (modulating factor) for + + focal loss for focal loss. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + gradient_thresh: + defaultValue: 2000.0 + description: Threshold for the norm of gradients for clipping. + isOptional: true + parameterType: NUMBER_DOUBLE + large_category_dim: + defaultValue: 1.0 + description: 'Embedding dimension for categorical + + feature with large number of categories.' + isOptional: true + parameterType: NUMBER_INTEGER + large_category_thresh: + defaultValue: 300.0 + description: 'Threshold for number of categories + + to apply large_category_dim embedding dimension to.' + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + loss_function_type: + defaultValue: default + description: 'Loss function type. Loss function in + + classification [cross_entropy, weighted_cross_entropy, focal_loss], + + default is cross_entropy. Loss function in regression: [rmse, mae, mse], + + default is mse.' + isOptional: true + parameterType: STRING + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: 'Amount of time in seconds to run the + + trainer for.' + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use + + if/when the service automatically selects the final measurement from + + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + + or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + num_decision_steps: + defaultValue: 6.0 + description: Number of sequential decision steps. + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers: + defaultValue: 4.0 + description: 'The number of transformer layers + + for each decision step. used only at one decision step and as it + + increases, more flexibility is provided to use a feature at multiple + + decision steps.' + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers_ratio: + defaultValue: 0.25 + description: 'The ratio of shared + + transformer layer to transformer layers.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + relaxation_factor: + defaultValue: 1.5 + description: 'Relaxation factor that promotes the + + reuse of each feature at different decision steps. When it is 1, a + + feature is enforced to be used only at one decision step and as it + + increases, more flexibility is provided to use a feature at multiple + + decision steps.' + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + sparsity_loss_weight: + defaultValue: 1.0e-05 + description: 'Weight of the loss for sparsity + + regularization (increasing it will yield more sparse feature selection).' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + yeo_johnson_transform: + defaultValue: true + description: 'Enables trainable Yeo-Johnson + + power transform.' + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-tabnet-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabnet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", + "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", + "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", + "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--large_category_dim=", + "{{$.inputs.parameters[''large_category_dim'']}}", "\", \"--large_category_thresh=", + "{{$.inputs.parameters[''large_category_thresh'']}}", "\", \"--yeo_johnson_transform=", + "{{$.inputs.parameters[''yeo_johnson_transform'']}}", "\", \"--feature_dim=", + "{{$.inputs.parameters[''feature_dim'']}}", "\", \"--feature_dim_ratio=", + "{{$.inputs.parameters[''feature_dim_ratio'']}}", "\", \"--num_decision_steps=", + "{{$.inputs.parameters[''num_decision_steps'']}}", "\", \"--relaxation_factor=", + "{{$.inputs.parameters[''relaxation_factor'']}}", "\", \"--decay_every=", + "{{$.inputs.parameters[''decay_every'']}}", "\", \"--decay_rate=", "{{$.inputs.parameters[''decay_rate'']}}", + "\", \"--gradient_thresh=", "{{$.inputs.parameters[''gradient_thresh'']}}", + "\", \"--sparsity_loss_weight=", "{{$.inputs.parameters[''sparsity_loss_weight'']}}", + "\", \"--batch_momentum=", "{{$.inputs.parameters[''batch_momentum'']}}", + "\", \"--batch_size_ratio=", "{{$.inputs.parameters[''batch_size_ratio'']}}", + "\", \"--num_transformer_layers=", "{{$.inputs.parameters[''num_transformer_layers'']}}", + "\", \"--num_transformer_layers_ratio=", "{{$.inputs.parameters[''num_transformer_layers_ratio'']}}", + "\", \"--class_weight=", "{{$.inputs.parameters[''class_weight'']}}", "\", + \"--loss_function_type=", "{{$.inputs.parameters[''loss_function_type'']}}", + "\", \"--alpha_focal_loss=", "{{$.inputs.parameters[''alpha_focal_loss'']}}", + "\", \"--gamma_focal_loss=", "{{$.inputs.parameters[''gamma_focal_loss'']}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", + "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", + "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--generate_feature_importance=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 +pipelineInfo: + description: The TabNet training pipeline. + name: automl-tabular-tabnet-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--alpha_focal_loss: + componentInputParameter: alpha_focal_loss + pipelinechannel--batch_momentum: + componentInputParameter: batch_momentum + pipelinechannel--batch_size: + componentInputParameter: batch_size + pipelinechannel--batch_size_ratio: + componentInputParameter: batch_size_ratio + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--class_weight: + componentInputParameter: class_weight + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--decay_every: + componentInputParameter: decay_every + pipelinechannel--decay_rate: + componentInputParameter: decay_rate + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_dim: + componentInputParameter: feature_dim + pipelinechannel--feature_dim_ratio: + componentInputParameter: feature_dim_ratio + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--gamma_focal_loss: + componentInputParameter: gamma_focal_loss + pipelinechannel--gradient_thresh: + componentInputParameter: gradient_thresh + pipelinechannel--large_category_dim: + componentInputParameter: large_category_dim + pipelinechannel--large_category_thresh: + componentInputParameter: large_category_thresh + pipelinechannel--learning_rate: + componentInputParameter: learning_rate + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--loss_function_type: + componentInputParameter: loss_function_type + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_steps: + componentInputParameter: max_steps + pipelinechannel--max_train_secs: + componentInputParameter: max_train_secs + pipelinechannel--measurement_selection_type: + componentInputParameter: measurement_selection_type + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--num_decision_steps: + componentInputParameter: num_decision_steps + pipelinechannel--num_transformer_layers: + componentInputParameter: num_transformer_layers + pipelinechannel--num_transformer_layers_ratio: + componentInputParameter: num_transformer_layers_ratio + pipelinechannel--optimization_metric: + componentInputParameter: optimization_metric + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--relaxation_factor: + componentInputParameter: relaxation_factor + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--sparsity_loss_weight: + componentInputParameter: sparsity_loss_weight + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + pipelinechannel--yeo_johnson_transform: + componentInputParameter: yeo_johnson_transform + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + alpha_focal_loss: + defaultValue: 0.25 + description: 'Alpha value (balancing factor) in focal_loss function. + + Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + batch_momentum: + defaultValue: 0.95 + description: Momentum in ghost batch normalization. + isOptional: true + parameterType: NUMBER_DOUBLE + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + batch_size_ratio: + defaultValue: 0.25 + description: 'The ratio of virtual batch size (size of the ghost batch + + normalization) to batch size.' + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + class_weight: + defaultValue: 1.0 + description: 'The class weight is used to computes a weighted cross entropy + + which is helpful in classify imbalanced dataset. Only used for + + classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + decay_every: + defaultValue: 100.0 + description: 'Number of iterations for periodically applying learning rate + + decaying.' + isOptional: true + parameterType: NUMBER_DOUBLE + decay_rate: + defaultValue: 0.95 + description: Learning rate decaying. + isOptional: true + parameterType: NUMBER_DOUBLE + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim: + defaultValue: 64.0 + description: 'Dimensionality of the hidden representation in feature + + transformation block.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_dim_ratio: + defaultValue: 0.5 + description: 'The ratio of output dimension (dimensionality of the + + outputs of each decision step) to feature dimension.' + isOptional: true + parameterType: NUMBER_DOUBLE + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + gamma_focal_loss: + defaultValue: 2.0 + description: 'Gamma value (modulating factor) for focal loss for focal + + loss. Only used for classification.' + isOptional: true + parameterType: NUMBER_DOUBLE + gradient_thresh: + defaultValue: 2000.0 + description: Threshold for the norm of gradients for clipping. + isOptional: true + parameterType: NUMBER_DOUBLE + large_category_dim: + defaultValue: 1.0 + description: 'Embedding dimension for categorical feature with large + + number of categories.' + isOptional: true + parameterType: NUMBER_INTEGER + large_category_thresh: + defaultValue: 300.0 + description: 'Threshold for number of categories to apply + + large_category_dim embedding dimension to.' + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + loss_function_type: + defaultValue: default + description: 'Loss function type. Loss function in classification + + [cross_entropy, weighted_cross_entropy, focal_loss], default is + + cross_entropy. Loss function in regression: [rmse, mae, mse], default is + + mse.' + isOptional: true + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: Amount of time in seconds to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use if/when the service + + automatically selects the final measurement from previously reported + + intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + num_decision_steps: + defaultValue: 6.0 + description: Number of sequential decision steps. + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers: + defaultValue: 4.0 + description: 'The number of transformer layers for each decision + + step. used only at one decision step and as it increases, more flexibility + + is provided to use a feature at multiple decision steps.' + isOptional: true + parameterType: NUMBER_INTEGER + num_transformer_layers_ratio: + defaultValue: 0.25 + description: 'The ratio of shared transformer layer to + + transformer layers.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + relaxation_factor: + defaultValue: 1.5 + description: 'Relaxation factor that promotes the reuse of each feature + + at different decision steps. When it is 1, a feature is enforced to be + + used only at one decision step and as it increases, more flexibility is + + provided to use a feature at multiple decision steps.' + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + sparsity_loss_weight: + defaultValue: 1.0e-05 + description: 'Weight of the loss for sparsity regularization + + (increasing it will yield more sparse feature selection).' + isOptional: true + parameterType: NUMBER_DOUBLE + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + yeo_johnson_transform: + defaultValue: true + description: Enables trainable Yeo-Johnson power transform. + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py new file mode 100644 index 0000000000..096c5e378c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py @@ -0,0 +1,3360 @@ +"""Util functions for AutoML Tabular pipeline.""" + +import json +import os +import pathlib +from typing import Any, Dict, List, Optional, Tuple, Union +import uuid +import warnings + +_DEFAULT_NUM_PARALLEL_TRAILS = 35 +_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 +_NUM_FOLDS = 5 +_DISTILL_TOTAL_TRIALS = 100 +_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 +_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 +_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 +_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 +_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' +_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 +_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 +_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 + +# Needed because we reference the AutoML Tabular V1 pipeline. +_GCPC_STAGING_PATH = pathlib.Path( + __file__ +).parent.parent.parent.parent.resolve() +_GCPC_GA_TABULAR_PATH = str(_GCPC_STAGING_PATH / 'v1' / 'automl' / 'tabular') + + +def _update_parameters( + parameter_values: Dict[str, Any], new_params: Dict[str, Any] +): + parameter_values.update( + {param: value for param, value in new_params.items() if value is not None} + ) + + +def _generate_model_display_name() -> str: + """Automatically generates a model_display_name. + + Returns: + model_display_name. + """ + return f'tabular-workflow-model-{uuid.uuid4()}' + + +# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag +# to signify FTE usage instead of the presence of num_selected_features. +def _get_default_pipeline_params( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[float] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + max_selected_features: Optional[int] = None, + apply_feature_selection_tuning: bool = False, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Dict[str, Any]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. If specified, + enable_probabilistic_inference and run_distillation cannot be enabled. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not cv_trainer_worker_pool_specs_override: + cv_trainer_worker_pool_specs_override = [] + if not quantiles: + quantiles = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'optimization_objective': optimization_objective, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'weight_column': weight_column, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': ( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'dataflow_service_account': dataflow_service_account, + 'encryption_spec_key_name': encryption_spec_key_name, + 'max_selected_features': max_selected_features, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'quantiles': quantiles, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + } + parameter_values.update( + {param: value for param, value in parameters.items() if value is not None} + ) + + if run_evaluation: + eval_parameters = { + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_batch_explain_machine_type': ( + evaluation_batch_explain_machine_type + ), + 'evaluation_batch_explain_starting_replica_count': ( + evaluation_batch_explain_starting_replica_count + ), + 'evaluation_batch_explain_max_replica_count': ( + evaluation_batch_explain_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + } + parameter_values.update( + { + param: value + for param, value in eval_parameters.items() + if value is not None + } + ) + + # V1 pipeline without FTE + if num_selected_features is None: + if not additional_experiments: + additional_experiments = {} + + parameters = { + 'transformations': transformations, + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'additional_experiments': additional_experiments, + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + if apply_feature_selection_tuning: + parameter_values.update({ + 'apply_feature_selection_tuning': apply_feature_selection_tuning, + }) + + if run_distillation: + distillation_parameters = { + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + } + parameter_values.update( + { + param: value + for param, value in distillation_parameters.items() + if value is not None + } + ) + + # V2 pipeline (with FTE) + else: + if run_distillation: + raise ValueError( + 'Distillation is currently not supported' + ' when num_selected_features is specified.' + ) + + parameters = { + 'num_selected_features': num_selected_features, + 'dataset_level_custom_transformation_definitions': [], + 'dataset_level_transformations': [], + 'tf_auto_transform_features': {}, + 'tf_custom_transformation_definitions': [], + 'legacy_transformations_path': transformations, + 'feature_transform_engine_dataflow_machine_type': ( + transform_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + transform_dataflow_disk_size_gb + ), + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + return parameter_values + + +def get_automl_tabular_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=quantiles, + enable_probabilistic_inference=enable_probabilistic_inference, + num_selected_features=num_selected_features, + model_display_name=model_display_name, + model_description=model_description, + ) + + # V1 pipeline without FTE + if num_selected_features is None: + pipeline_definition_path = os.path.join( + _GCPC_GA_TABULAR_PATH, 'automl_tabular_pipeline.yaml' + ) + + # V2 pipeline with FTE + else: + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'automl_tabular_v2_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_automl_tabular_feature_selection_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + max_selected_features: int = 1000, + apply_feature_selection_tuning: bool = False, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + model_display_name = ( + model_display_name + if model_display_name + else _generate_model_display_name() + ) + + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + max_selected_features=max_selected_features, + apply_feature_selection_tuning=apply_feature_selection_tuning, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + model_display_name=model_display_name, + model_description=model_description, + ) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'automl_tabular_feature_selection_pipeline.yaml', + ) + return pipeline_definition_path, parameter_values + + +def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: + """Convert json input dict to encoded parameter string. + + This function is required due to the limitation on YAML component definition + that YAML definition does not have a keyword for apply quote escape, so the + JSON argument's quote must be manually escaped using this function. + + Args: + input_dict: The input json dictionary. + + Returns: + The encoded string used for parameter. + """ + if not input_dict: + return '' + out = json.dumps(json.dumps(input_dict)) + return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo + + +def get_skip_architecture_search_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_tuning_result_artifact_uri: str, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips architecture search. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + + return get_automl_tabular_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=None, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=[], + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override={}, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=None, + distill_batch_predict_machine_type=None, + distill_batch_predict_starting_replica_count=None, + distill_batch_predict_max_replica_count=None, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=[], + enable_probabilistic_inference=False, + ) + + +def get_wide_and_deep_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + dnn_learning_rate: float, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + optimizer_type: str = 'adam', + max_steps: int = -1, + max_train_secs: int = -1, + l1_regularization_strength: float = 0, + l2_regularization_strength: float = 0, + l2_shrinkage_regularization_strength: float = 0, + beta_1: float = 0.9, + beta_2: float = 0.999, + hidden_units: str = '30,30,30', + use_wide: bool = True, + embed_categories: bool = True, + dnn_dropout: float = 0, + dnn_optimizer_type: str = 'adam', + dnn_l1_regularization_strength: float = 0, + dnn_l2_regularization_strength: float = 0, + dnn_l2_shrinkage_regularization_strength: float = 0, + dnn_beta_1: float = 0.9, + dnn_beta_2: float = 0.999, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + batch_size: int = 100, + measurement_selection_type: Optional[str] = None, + optimization_metric: Optional[str] = None, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the Wide & Deep training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + 'classification' or 'regression'. + learning_rate: The learning rate used by the linear optimizer. + dnn_learning_rate: The learning rate for training the deep part of the + model. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + optimizer_type: The type of optimizer to use. Choices are "adam", "ftrl" and + "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. + l1_regularization_strength: L1 regularization strength for + optimizer_type="ftrl". + l2_regularization_strength: L2 regularization strength for + optimizer_type="ftrl". + l2_shrinkage_regularization_strength: L2 shrinkage regularization strength + for optimizer_type="ftrl". + beta_1: Beta 1 value for optimizer_type="adam". + beta_2: Beta 2 value for optimizer_type="adam". + hidden_units: Hidden layer sizes to use for DNN feature columns, provided in + comma-separated layers. + use_wide: If set to true, the categorical columns will be used in the wide + part of the DNN model. + embed_categories: If set to true, the categorical columns will be used + embedded and used in the deep part of the model. Embedding size is the + square root of the column cardinality. + dnn_dropout: The probability we will drop out a given coordinate. + dnn_optimizer_type: The type of optimizer to use for the deep part of the + model. Choices are "adam", "ftrl" and "sgd". for the Adam, FTRL, and + Gradient Descent Optimizers, respectively. + dnn_l1_regularization_strength: L1 regularization strength for + dnn_optimizer_type="ftrl". + dnn_l2_regularization_strength: L2 regularization strength for + dnn_optimizer_type="ftrl". + dnn_l2_shrinkage_regularization_strength: L2 shrinkage regularization + strength for dnn_optimizer_type="ftrl". + dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". + dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use if/when the service + automatically selects the final measurement from previously reported + intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = {} + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'learning_rate': learning_rate, + 'dnn_learning_rate': dnn_learning_rate, + 'optimizer_type': optimizer_type, + 'max_steps': max_steps, + 'max_train_secs': max_train_secs, + 'l1_regularization_strength': l1_regularization_strength, + 'l2_regularization_strength': l2_regularization_strength, + 'l2_shrinkage_regularization_strength': ( + l2_shrinkage_regularization_strength + ), + 'beta_1': beta_1, + 'beta_2': beta_2, + 'hidden_units': hidden_units, + 'use_wide': use_wide, + 'embed_categories': embed_categories, + 'dnn_dropout': dnn_dropout, + 'dnn_optimizer_type': dnn_optimizer_type, + 'dnn_l1_regularization_strength': dnn_l1_regularization_strength, + 'dnn_l2_regularization_strength': dnn_l2_regularization_strength, + 'dnn_l2_shrinkage_regularization_strength': ( + dnn_l2_shrinkage_regularization_strength + ), + 'dnn_beta_1': dnn_beta_1, + 'dnn_beta_2': dnn_beta_2, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'batch_size': batch_size, + 'measurement_selection_type': measurement_selection_type, + 'optimization_metric': optimization_metric, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'wide_and_deep_trainer_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + algorithm: str, + enable_profiler: bool = False, + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the built-in algorithm HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + algorithm: Algorithm to train. One of "tabnet" and "wide_and_deep". + enable_profiler: Enables profiling and saves a trace during evaluation. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'This method is deprecated. Please use' + ' get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters or' + ' get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters' + ' instead.' + ) + + if algorithm == 'tabnet': + return get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + study_spec_metric_id=study_spec_metric_id, + study_spec_metric_goal=study_spec_metric_goal, + study_spec_parameters_override=study_spec_parameters_override, + max_trial_count=max_trial_count, + parallel_trial_count=parallel_trial_count, + transform_config=transform_config, + dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, + dataset_level_transformations=dataset_level_transformations, + predefined_split_key=predefined_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + tf_transform_execution_engine=tf_transform_execution_engine, + tf_auto_transform_features=tf_auto_transform_features, + tf_custom_transformation_definitions=tf_custom_transformation_definitions, + tf_transformations_path=tf_transformations_path, + enable_profiler=enable_profiler, + seed=seed, + eval_steps=eval_steps, + eval_frequency_secs=eval_frequency_secs, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, + weight_column=weight_column, + max_failed_trial_count=max_failed_trial_count, + study_spec_algorithm=study_spec_algorithm, + study_spec_measurement_selection_type=study_spec_measurement_selection_type, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + worker_pool_specs_override=worker_pool_specs_override, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + elif algorithm == 'wide_and_deep': + return get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + study_spec_metric_id=study_spec_metric_id, + study_spec_metric_goal=study_spec_metric_goal, + study_spec_parameters_override=study_spec_parameters_override, + max_trial_count=max_trial_count, + parallel_trial_count=parallel_trial_count, + transform_config=transform_config, + dataset_level_custom_transformation_definitions=dataset_level_custom_transformation_definitions, + dataset_level_transformations=dataset_level_transformations, + predefined_split_key=predefined_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + tf_transform_execution_engine=tf_transform_execution_engine, + tf_auto_transform_features=tf_auto_transform_features, + tf_custom_transformation_definitions=tf_custom_transformation_definitions, + tf_transformations_path=tf_transformations_path, + enable_profiler=enable_profiler, + seed=seed, + eval_steps=eval_steps, + eval_frequency_secs=eval_frequency_secs, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + bigquery_staging_full_dataset_id=bigquery_staging_full_dataset_id, + weight_column=weight_column, + max_failed_trial_count=max_failed_trial_count, + study_spec_algorithm=study_spec_algorithm, + study_spec_measurement_selection_type=study_spec_measurement_selection_type, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + worker_pool_specs_override=worker_pool_specs_override, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + else: + raise ValueError( + 'Invalid algorithm provided. Supported values are "tabnet" and' + ' "wide_and_deep".' + ) + + +def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the TabNet HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'study_spec_parameters_override': study_spec_parameters_override, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'tabnet_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: List[Dict[str, Any]], + max_trial_count: int, + parallel_trial_count: int, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + max_failed_trial_count: int = 0, + study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: str = 'BEST_MEASUREMENT', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the Wide & Deep algorithm HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', + 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', + 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of + "ALGORITHM_UNSPECIFIED", "GRID_SEARCH", or "RANDOM_SEARCH". + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'study_spec_parameters_override': study_spec_parameters_override, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'wide_and_deep_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def get_tabnet_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + transform_config: Optional[str] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: bool = False, + feature_selection_algorithm: Optional[str] = None, + materialized_examples_format: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_transform_execution_engine: Optional[str] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + max_steps: int = -1, + max_train_secs: int = -1, + large_category_dim: int = 1, + large_category_thresh: int = 300, + yeo_johnson_transform: bool = True, + feature_dim: int = 64, + feature_dim_ratio: float = 0.5, + num_decision_steps: int = 6, + relaxation_factor: float = 1.5, + decay_every: float = 100, + decay_rate: float = 0.95, + gradient_thresh: float = 2000, + sparsity_loss_weight: float = 0.00001, + batch_momentum: float = 0.95, + batch_size_ratio: float = 0.25, + num_transformer_layers: int = 4, + num_transformer_layers_ratio: float = 0.25, + class_weight: float = 1.0, + loss_function_type: str = 'default', + alpha_focal_loss: float = 0.25, + gamma_focal_loss: float = 2.0, + enable_profiler: bool = False, + cache_data: str = 'auto', + seed: int = 1, + eval_steps: int = 0, + batch_size: int = 100, + measurement_selection_type: Optional[str] = None, + optimization_metric: Optional[str] = None, + eval_frequency_secs: int = 600, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: str = '', + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + worker_pool_specs_override: Optional[Dict[str, Any]] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_starting_num_workers: int = _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the TabNet training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + learning_rate: The learning rate used by the linear optimizer. + transform_config: Path to v1 TF transformation configuration. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + materialized_examples_format: The format for the materialized examples. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_transform_execution_engine: The execution engine used to execute TF-based + transformations. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. + large_category_dim: Embedding dimension for categorical feature with large + number of categories. + large_category_thresh: Threshold for number of categories to apply + large_category_dim embedding dimension to. + yeo_johnson_transform: Enables trainable Yeo-Johnson power transform. + feature_dim: Dimensionality of the hidden representation in feature + transformation block. + feature_dim_ratio: The ratio of output dimension (dimensionality of the + outputs of each decision step) to feature dimension. + num_decision_steps: Number of sequential decision steps. + relaxation_factor: Relaxation factor that promotes the reuse of each feature + at different decision steps. When it is 1, a feature is enforced to be + used only at one decision step and as it increases, more flexibility is + provided to use a feature at multiple decision steps. + decay_every: Number of iterations for periodically applying learning rate + decaying. + decay_rate: Learning rate decaying. + gradient_thresh: Threshold for the norm of gradients for clipping. + sparsity_loss_weight: Weight of the loss for sparsity regularization + (increasing it will yield more sparse feature selection). + batch_momentum: Momentum in ghost batch normalization. + batch_size_ratio: The ratio of virtual batch size (size of the ghost batch + normalization) to batch size. + num_transformer_layers: The number of transformer layers for each decision + step. used only at one decision step and as it increases, more flexibility + is provided to use a feature at multiple decision steps. + num_transformer_layers_ratio: The ratio of shared transformer layer to + transformer layers. + class_weight: The class weight is used to computes a weighted cross entropy + which is helpful in classify imbalanced dataset. Only used for + classification. + loss_function_type: Loss function type. Loss function in classification + [cross_entropy, weighted_cross_entropy, focal_loss], default is + cross_entropy. Loss function in regression: [rmse, mae, mse], default is + mse. + alpha_focal_loss: Alpha value (balancing factor) in focal_loss function. + Only used for classification. + gamma_focal_loss: Gamma value (modulating factor) for focal loss for focal + loss. Only used for classification. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is + determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not specified or + negative, it means run evaluation on the whole validation dataset. If set + to 0, it means run evaluation for a fixed number of samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use if/when the service + automatically selects the final measurement from previously reported + intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + worker_pool_specs_override: The dictionary for overriding training and + evaluation worker pool specs. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + if transform_config and tf_transformations_path: + raise ValueError( + 'Only one of transform_config and tf_transformations_path can ' + 'be specified.' + ) + + elif transform_config: + warnings.warn( + 'transform_config parameter is deprecated. ' + 'Please use the flattened transform config arguments instead.' + ) + tf_transformations_path = transform_config + + if not worker_pool_specs_override: + worker_pool_specs_override = [] + + parameter_values = {} + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'learning_rate': learning_rate, + 'max_steps': max_steps, + 'max_train_secs': max_train_secs, + 'large_category_dim': large_category_dim, + 'large_category_thresh': large_category_thresh, + 'yeo_johnson_transform': yeo_johnson_transform, + 'feature_dim': feature_dim, + 'feature_dim_ratio': feature_dim_ratio, + 'num_decision_steps': num_decision_steps, + 'relaxation_factor': relaxation_factor, + 'decay_every': decay_every, + 'decay_rate': decay_rate, + 'gradient_thresh': gradient_thresh, + 'sparsity_loss_weight': sparsity_loss_weight, + 'batch_momentum': batch_momentum, + 'batch_size_ratio': batch_size_ratio, + 'num_transformer_layers': num_transformer_layers, + 'num_transformer_layers_ratio': num_transformer_layers_ratio, + 'class_weight': class_weight, + 'loss_function_type': loss_function_type, + 'alpha_focal_loss': alpha_focal_loss, + 'gamma_focal_loss': gamma_focal_loss, + 'enable_profiler': enable_profiler, + 'cache_data': cache_data, + 'seed': seed, + 'eval_steps': eval_steps, + 'batch_size': batch_size, + 'measurement_selection_type': measurement_selection_type, + 'optimization_metric': optimization_metric, + 'eval_frequency_secs': eval_frequency_secs, + 'weight_column': weight_column, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'worker_pool_specs_override': worker_pool_specs_override, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + 'materialized_examples_format': ( + materialized_examples_format + if materialized_examples_format + else 'tfrecords_gzip' + ), + 'tf_transform_execution_engine': ( + tf_transform_execution_engine + if tf_transform_execution_engine + else 'dataflow' + ), + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'tabnet_trainer_pipeline.yaml' + ) + + return pipeline_definition_path, parameter_values + + +def get_tabnet_study_spec_parameters_override( + dataset_size_bucket: str, prediction_type: str, training_budget_bucket: str +) -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for a TabNet hyperparameter tuning job. + + Args: + dataset_size_bucket: Size of the dataset. One of "small" (< 1M rows), + "medium" (1M - 100M rows), or "large" (> 100M rows). + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + training_budget_bucket: Bucket of the estimated training budget. One of + "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This + parameter is only used as a hint for the hyperparameter search space, + unrelated to the real cost. + + Returns: + List of study_spec_parameters_override. + """ + + if dataset_size_bucket not in ['small', 'medium', 'large']: + raise ValueError( + 'Invalid dataset_size_bucket provided. Supported values ' + ' are "small", "medium" or "large".' + ) + if training_budget_bucket not in ['small', 'medium', 'large']: + raise ValueError( + 'Invalid training_budget_bucket provided. Supported values ' + 'are "small", "medium" or "large".' + ) + + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + f'configs/tabnet_params_{dataset_size_bucket}_data_{training_budget_bucket}_search_space.json', + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + if prediction_type == 'regression': + return _format_tabnet_regression_study_spec_parameters_override( + params, training_budget_bucket + ) + return params + + +def _format_tabnet_regression_study_spec_parameters_override( + params: List[Dict[str, Any]], training_budget_bucket: str +) -> List[Dict[str, Any]]: + """Get regression study_spec_parameters_override for a TabNet hyperparameter tuning job. + + Args: + params: List of dictionaries representing parameters to optimize. The + dictionary key is the parameter_id, which is passed to training job as a + command line argument, and the dictionary value is the parameter + specification of the metric. + training_budget_bucket: Bucket of the estimated training budget. One of + "small" (< $600), "medium" ($600 - $2400), or "large" (> $2400). This + parameter is only used as a hint for the hyperparameter search space, + unrelated to the real cost. + + Returns: + List of study_spec_parameters_override for regression. + """ + + # To get regression study_spec_parameters, we need to set + # `loss_function_type` to ‘mae’ (‘mae’ and ‘mse’ for "large" search space), + # remove the `alpha_focal_loss`, `gamma_focal_loss` + # and `class_weight` parameters and increase the max for + # `sparsity_loss_weight` to 100. + formatted_params = [] + for param in params: + if param['parameter_id'] in [ + 'alpha_focal_loss', + 'gamma_focal_loss', + 'class_weight', + ]: + continue + elif param['parameter_id'] == 'sparsity_loss_weight': + param['double_value_spec']['max_value'] = 100 + elif param['parameter_id'] == 'loss_function_type': + if training_budget_bucket == 'large': + param['categorical_value_spec']['values'] = ['mae', 'mse'] + else: + param['categorical_value_spec']['values'] = ['mae'] + + formatted_params.append(param) + + return formatted_params + + +def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for a Wide & Deep hyperparameter tuning job. + + Returns: + List of study_spec_parameters_override. + """ + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'configs/wide_and_deep_params.json', + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + return params + + +def get_xgboost_study_spec_parameters_override() -> List[Dict[str, Any]]: + """Get study_spec_parameters_override for an XGBoost hyperparameter tuning job. + + Returns: + List of study_spec_parameters_override. + """ + param_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'configs/xgboost_params.json' + ) + with open(param_path, 'r') as f: + param_content = f.read() + params = json.loads(param_content) + + return params + + +def get_xgboost_trainer_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + objective: str, + eval_metric: Optional[str] = None, + num_boost_round: Optional[int] = None, + early_stopping_rounds: Optional[int] = None, + base_score: Optional[float] = None, + disable_default_eval_metric: Optional[int] = None, + seed: Optional[int] = None, + seed_per_iteration: Optional[bool] = None, + booster: Optional[str] = None, + eta: Optional[float] = None, + gamma: Optional[float] = None, + max_depth: Optional[int] = None, + min_child_weight: Optional[float] = None, + max_delta_step: Optional[float] = None, + subsample: Optional[float] = None, + colsample_bytree: Optional[float] = None, + colsample_bylevel: Optional[float] = None, + colsample_bynode: Optional[float] = None, + reg_lambda: Optional[float] = None, + reg_alpha: Optional[float] = None, + tree_method: Optional[str] = None, + scale_pos_weight: Optional[float] = None, + updater: Optional[str] = None, + refresh_leaf: Optional[int] = None, + process_type: Optional[str] = None, + grow_policy: Optional[str] = None, + sampling_method: Optional[str] = None, + monotone_constraints: Optional[str] = None, + interaction_constraints: Optional[str] = None, + sample_type: Optional[str] = None, + normalize_type: Optional[str] = None, + rate_drop: Optional[float] = None, + one_drop: Optional[int] = None, + skip_drop: Optional[float] = None, + num_parallel_tree: Optional[int] = None, + feature_selector: Optional[str] = None, + top_k: Optional[int] = None, + max_cat_to_onehot: Optional[int] = None, + max_leaves: Optional[int] = None, + max_bin: Optional[int] = None, + tweedie_variance_power: Optional[float] = None, + huber_slope: Optional[float] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: Optional[bool] = None, + feature_selection_algorithm: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: Optional[str] = None, + training_machine_type: Optional[str] = None, + training_total_replica_count: Optional[int] = None, + training_accelerator_type: Optional[str] = None, + training_accelerator_count: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + run_evaluation: Optional[bool] = None, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, +): + """Get the XGBoost training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + objective: Specifies the learning task and the learning objective. Must be + one of [reg:squarederror, reg:squaredlogerror, + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + binary:logistic, multi:softprob]. + eval_metric: Evaluation metrics for validation data represented as a + comma-separated string. + num_boost_round: Number of boosting iterations. + early_stopping_rounds: Activates early stopping. Validation error needs to + decrease at least every early_stopping_rounds round(s) to continue + training. + base_score: The initial prediction score of all instances, global bias. + disable_default_eval_metric: Flag to disable default metric. Set to >0 to + disable. Default to 0. + seed: Random seed. + seed_per_iteration: Seed PRNG determnisticly via iterator number. + booster: Which booster to use, can be gbtree, gblinear or dart. gbtree and + dart use tree based model while gblinear uses linear function. + eta: Learning rate. + gamma: Minimum loss reduction required to make a further partition on a leaf + node of the tree. + max_depth: Maximum depth of a tree. + min_child_weight: Minimum sum of instance weight(hessian) needed in a child. + max_delta_step: Maximum delta step we allow each tree's weight estimation to + be. + subsample: Subsample ratio of the training instance. + colsample_bytree: Subsample ratio of columns when constructing each tree. + colsample_bylevel: Subsample ratio of columns for each split, in each level. + colsample_bynode: Subsample ratio of columns for each node (split). + reg_lambda: L2 regularization term on weights. + reg_alpha: L1 regularization term on weights. + tree_method: The tree construction algorithm used in XGBoost. Choices: + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"]. + scale_pos_weight: Control the balance of positive and negative weights. + updater: A comma separated string defining the sequence of tree updaters to + run. + refresh_leaf: Refresh updater plugin. Update tree leaf and nodes's stats if + True. When it is False, only node stats are updated. + process_type: A type of boosting process to run. Choices:["default", + "update"] + grow_policy: Controls a way new nodes are added to the tree. Only supported + if tree_method is hist. Choices:["depthwise", "lossguide"] + sampling_method: The method to use to sample the training instances. + monotone_constraints: Constraint of variable monotonicity. + interaction_constraints: Constraints for interaction representing permitted + interactions. + sample_type: [dart booster only] Type of sampling algorithm. + Choices:["uniform", "weighted"] + normalize_type: [dart booster only] Type of normalization algorithm, + Choices:["tree", "forest"] + rate_drop: [dart booster only] Dropout rate.' + one_drop: [dart booster only] When this flag is enabled, at least one tree + is always dropped during the dropout (allows Binomial-plus-one or + epsilon-dropout from the original DART paper). + skip_drop: [dart booster only] Probability of skipping the dropout procedure + during a boosting iteration. + num_parallel_tree: Number of parallel trees constructed during each + iteration. This option is used to support boosted random forest. + feature_selector: [linear booster only] Feature selection and ordering + method. + top_k: The number of top features to select in greedy and thrifty feature + selector. The value of 0 means using all the features. + max_cat_to_onehot: A threshold for deciding whether XGBoost should use + one-hot encoding based split for categorical data. + max_leaves: Maximum number of nodes to be added. + max_bin: Maximum number of discrete bins to bucket continuous features. + tweedie_variance_power: Parameter that controls the variance of the Tweedie + distribution. + huber_slope: A parameter used for Pseudo-Huber loss to define the delta + term. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + training_machine_type: Machine type. + training_total_replica_count: Number of workers. + training_accelerator_type: Accelerator type. + training_accelerator_count: Accelerator count. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = {} + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'objective': objective, + 'eval_metric': eval_metric, + 'num_boost_round': num_boost_round, + 'early_stopping_rounds': early_stopping_rounds, + 'base_score': base_score, + 'disable_default_eval_metric': disable_default_eval_metric, + 'seed': seed, + 'seed_per_iteration': seed_per_iteration, + 'booster': booster, + 'eta': eta, + 'gamma': gamma, + 'max_depth': max_depth, + 'min_child_weight': min_child_weight, + 'max_delta_step': max_delta_step, + 'subsample': subsample, + 'colsample_bytree': colsample_bytree, + 'colsample_bylevel': colsample_bylevel, + 'colsample_bynode': colsample_bynode, + 'reg_lambda': reg_lambda, + 'reg_alpha': reg_alpha, + 'tree_method': tree_method, + 'scale_pos_weight': scale_pos_weight, + 'updater': updater, + 'refresh_leaf': refresh_leaf, + 'process_type': process_type, + 'grow_policy': grow_policy, + 'sampling_method': sampling_method, + 'monotone_constraints': monotone_constraints, + 'interaction_constraints': interaction_constraints, + 'sample_type': sample_type, + 'normalize_type': normalize_type, + 'rate_drop': rate_drop, + 'one_drop': one_drop, + 'skip_drop': skip_drop, + 'num_parallel_tree': num_parallel_tree, + 'feature_selector': feature_selector, + 'top_k': top_k, + 'max_cat_to_onehot': max_cat_to_onehot, + 'max_leaves': max_leaves, + 'max_bin': max_bin, + 'tweedie_variance_power': tweedie_variance_power, + 'huber_slope': huber_slope, + 'weight_column': weight_column, + 'training_machine_type': training_machine_type, + 'training_total_replica_count': training_total_replica_count, + 'training_accelerator_type': training_accelerator_type, + 'training_accelerator_count': training_accelerator_count, + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'xgboost_trainer_pipeline.yaml' + ) + + return pipeline_definition_path, parameter_values + + +def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + objective: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + max_trial_count: int, + parallel_trial_count: int, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + eval_metric: Optional[str] = None, + disable_default_eval_metric: Optional[int] = None, + seed: Optional[int] = None, + seed_per_iteration: Optional[bool] = None, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: Optional[bool] = None, + feature_selection_algorithm: Optional[str] = None, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + tf_custom_transformation_definitions: Optional[List[Dict[str, Any]]] = None, + tf_transformations_path: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + weight_column: Optional[str] = None, + max_failed_trial_count: Optional[int] = None, + training_machine_type: Optional[str] = None, + training_total_replica_count: Optional[int] = None, + training_accelerator_type: Optional[str] = None, + training_accelerator_count: Optional[int] = None, + study_spec_algorithm: Optional[str] = None, + study_spec_measurement_selection_type: Optional[str] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + run_evaluation: Optional[bool] = None, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + dataflow_service_account: Optional[str] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, +): + """Get the XGBoost HyperparameterTuningJob pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + objective: Specifies the learning task and the learning objective. Must be + one of [reg:squarederror, reg:squaredlogerror, + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + binary:logistic, multi:softprob]. + study_spec_metric_id: Metric to optimize. For options, please look under + 'eval_metric' at + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. + study_spec_metric_goal: Optimization goal of the metric, possible values: + "MAXIMIZE", "MINIMIZE". + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run in parallel. + study_spec_parameters_override: List of dictionaries representing parameters + to optimize. The dictionary key is the parameter_id, which is passed to + training job as a command line argument, and the dictionary value is the + parameter specification of the metric. + eval_metric: Evaluation metrics for validation data represented as a + comma-separated string. + disable_default_eval_metric: Flag to disable default metric. Set to >0 to + disable. Default to 0. + seed: Random seed. + seed_per_iteration: Seed PRNG determnisticly via iterator number. + dataset_level_custom_transformation_definitions: Dataset-level custom + transformation definitions in string format. + dataset_level_transformations: Dataset-level transformation configuration in + string format. + run_feature_selection: Whether to enable feature selection. + feature_selection_algorithm: Feature selection algorithm. + max_selected_features: Maximum number of features to select. + predefined_split_key: Predefined split key. + stratified_split_key: Stratified split key. + training_fraction: Training fraction. + validation_fraction: Validation fraction. + test_fraction: Test fraction. + tf_auto_transform_features: List of auto transform features in the + comma-separated string format. + tf_custom_transformation_definitions: TF custom transformation definitions + in string format. + tf_transformations_path: Path to TF transformation configuration. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + bigquery_staging_full_dataset_id: The BigQuery staging full dataset id for + storing intermediate tables. + weight_column: The weight column name. + max_failed_trial_count: The number of failed trials that need to be seen + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + how many trials must fail before the whole job fails. + training_machine_type: Machine type. + training_total_replica_count: Number of workers. + training_accelerator_type: Accelerator type. + training_accelerator_count: Accelerator count. + study_spec_algorithm: The search algorithm specified for the study. One of + 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement to use if/when the + service automatically selects the final measurement from previously + reported intermediate measurements. One of "BEST_MEASUREMENT" or + "LAST_MEASUREMENT". + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + run_evaluation: Whether to run evaluation steps during training. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = {} + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + training_and_eval_parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'objective': objective, + 'eval_metric': eval_metric, + 'study_spec_metric_id': study_spec_metric_id, + 'study_spec_metric_goal': study_spec_metric_goal, + 'max_trial_count': max_trial_count, + 'parallel_trial_count': parallel_trial_count, + 'study_spec_parameters_override': ( + study_spec_parameters_override + if study_spec_parameters_override + else [] + ), + 'disable_default_eval_metric': disable_default_eval_metric, + 'seed': seed, + 'seed_per_iteration': seed_per_iteration, + 'weight_column': weight_column, + 'max_failed_trial_count': max_failed_trial_count, + 'training_machine_type': training_machine_type, + 'training_total_replica_count': training_total_replica_count, + 'training_accelerator_type': training_accelerator_type, + 'training_accelerator_count': training_accelerator_count, + 'study_spec_algorithm': study_spec_algorithm, + 'study_spec_measurement_selection_type': ( + study_spec_measurement_selection_type + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + _update_parameters(parameter_values, training_and_eval_parameters) + + fte_params = { + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': ( + tf_auto_transform_features if tf_auto_transform_features else {} + ), + 'tf_custom_transformation_definitions': ( + tf_custom_transformation_definitions + if tf_custom_transformation_definitions + else [] + ), + 'tf_transformations_path': tf_transformations_path, + } + _update_parameters(parameter_values, fte_params) + + data_source_and_split_parameters = { + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + } + _update_parameters(parameter_values, data_source_and_split_parameters) + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'xgboost_hyperparameter_tuning_job_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..6f76075d48 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -0,0 +1,236 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Wide and Deep Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input + + +@dsl.container_component +def wide_and_deep_hyperparameter_tuning_job( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + instance_schema_uri: dsl.OutputPath(str), + prediction_schema_uri: dsl.OutputPath(str), + trials: dsl.OutputPath(str), + prediction_docker_uri_output: dsl.OutputPath(str), + execution_metrics: dsl.OutputPath(dict), + weight_column: Optional[str] = '', + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + eval_frequency_secs: Optional[int] = 600, + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes Wide & Deep hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + study_spec_metric_id: Metric to optimize, , possible + values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + instance_schema_uri: The path to the instance schema. + prediction_schema_uri: The path to the prediction schema. + trials: The path to the hyperparameter tuning trials + prediction_docker_uri_output: The URI of the prediction container. + execution_metrics: Core metrics in dictionary of hyperparameter tuning job execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJobWithMetrics', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--execution_metrics', + execution_metrics, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "wide-and-deep-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ( + ', "trial_job_spec": {"worker_pool_specs":' + ' [{"replica_count":"' + ), + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--prediction_docker_uri_artifact_path=', + prediction_docker_uri_output, + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--instance_schema_path=', + instance_schema_uri, + '", "--prediction_schema_path=', + prediction_schema_uri, + '", "--trials_path=', + trials, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--measurement_selection_type=', + study_spec_measurement_selection_type, + '", "--metric_goal=', + study_spec_metric_goal, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..f6c3308c7f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4018 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-wide-and-deep-hyperparameter-tuning-job +# Description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - wide-and-deep-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: wide-and-deep-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_uri + producerTask: wide-and-deep-hyperparameter-tuning-job + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_output + producerTask: wide-and-deep-hyperparameter-tuning-job + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_uri + producerTask: wide-and-deep-hyperparameter-tuning-job + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials + producerTask: wide-and-deep-hyperparameter-tuning-job + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-wide-and-deep-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-wide-and-deep-study-spec-parameters + inputs: + parameters: + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-wide-and-deep-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + wide-and-deep-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-wide-and-deep-hyperparameter-tuning-job + dependentTasks: + - feature-transform-engine + - get-wide-and-deep-study-spec-parameters + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + cache_data: + componentInputParameter: pipelinechannel--cache_data + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-wide-and-deep-study-spec-parameters + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: wide-and-deep-hyperparameter-tuning-job + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-wide-and-deep-study-spec-parameters: + executorLabel: exec-get-wide-and-deep-study-spec-parameters + inputDefinitions: + parameters: + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-wide-and-deep-hyperparameter-tuning-job: + executorLabel: exec-wide-and-deep-hyperparameter-tuning-job + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, , possible + + values: [ ''loss'', ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', + ''auc'', ''precision'', ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + execution_metrics: + description: Core metrics in dictionary of hyperparameter tuning job execution. + parameterType: STRUCT + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING + instance_schema_uri: + description: The path to the instance schema. + parameterType: STRING + prediction_docker_uri_output: + description: The URI of the prediction container. + parameterType: STRING + prediction_schema_uri: + description: The path to the prediction schema. + parameterType: STRING + trials: + description: The path to the hyperparameter tuning trials + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-wide-and-deep-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_wide_and_deep_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_wide_and_deep_study_spec_parameters(\n study_spec_parameters_override:\ + \ list # Required for KFP validation; pylint:disable=g-bare-generic\n)\ + \ -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Get\ + \ study_spec_parameters for a Wide & Deep hyperparameter tuning job.\n\n\ + \ Args:\n study_spec_parameters_override: List of dictionaries representing\ + \ parameters\n to optimize. The dictionary key is the parameter_id,\ + \ which is passed to\n training job as a command line argument, and\ + \ the dictionary value is the\n parameter specification of the metric.\n\ + \n Returns:\n List of final Vizier study_spec_parameters of type ParameterSpec.\n\ + \ \"\"\"\n default_params = [\n {\n 'parameter_id': 'max_steps',\n\ + \ 'discrete_value_spec': {\n 'values': [5000, 10000,\ + \ 20000, 30000, 40000, 50000]\n },\n },\n {\n \ + \ 'parameter_id': 'max_train_secs',\n 'discrete_value_spec':\ + \ {'values': [-1]},\n },\n {\n 'parameter_id': 'learning_rate',\n\ + \ 'double_value_spec': {'min_value': 0.0001, 'max_value': 0.0005},\n\ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n },\n {\n \ + \ 'parameter_id': 'optimizer_type',\n 'categorical_value_spec':\ + \ {'values': ['adam', 'ftrl', 'sgd']},\n },\n {\n 'parameter_id':\ + \ 'l1_regularization_strength',\n 'discrete_value_spec': {'values':\ + \ [0, 0.01, 0.02]},\n },\n {\n 'parameter_id': 'l2_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'l2_shrinkage_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'beta_1',\n 'discrete_value_spec':\ + \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ + \ 'beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9, 0.999]},\n\ + \ },\n {\n 'parameter_id': 'hidden_units',\n \ + \ 'categorical_value_spec': {'values': ['30,30,30']},\n },\n \ + \ {\n 'parameter_id': 'use_wide',\n 'categorical_value_spec':\ + \ {'values': ['true', 'false']},\n },\n {\n 'parameter_id':\ + \ 'embed_categories',\n 'categorical_value_spec': {'values': ['true',\ + \ 'false']},\n },\n {\n 'parameter_id': 'dnn_dropout',\n\ + \ 'discrete_value_spec': {'values': [0, 0.1, 0.2]},\n },\n\ + \ {\n 'parameter_id': 'dnn_learning_rate',\n 'double_value_spec':\ + \ {'min_value': 0.0001, 'max_value': 0.0005},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n {\n 'parameter_id': 'dnn_optimizer_type',\n \ + \ 'categorical_value_spec': {'values': ['adam', 'ftrl', 'sgd']},\n\ + \ },\n {\n 'parameter_id': 'dnn_l1_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_l2_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_l2_shrinkage_regularization_strength',\n\ + \ 'discrete_value_spec': {'values': [0, 0.01, 0.02]},\n },\n\ + \ {\n 'parameter_id': 'dnn_beta_1',\n 'discrete_value_spec':\ + \ {'values': [0.7, 0.8, 0.9]},\n },\n {\n 'parameter_id':\ + \ 'dnn_beta_2',\n 'discrete_value_spec': {'values': [0.8, 0.9,\ + \ 0.999]},\n },\n {\n 'parameter_id': 'batch_size',\n\ + \ 'discrete_value_spec': {'values': [1024, 2048, 4096, 8192, 16384]},\n\ + \ },\n ]\n # pylint:disable=g-import-not-at-top,redefined-outer-name\n\ + \ import warnings\n # pylint:enable=g-import-not-at-top,redefined-outer-name\n\ + \n override_params = {}\n for param in study_spec_parameters_override:\n\ + \ override_params[param['parameter_id']] = param\n\n study_spec_parameters\ + \ = []\n for param in default_params:\n study_spec_parameters.append(\n\ + \ override_params.get(param['parameter_id'], param)\n )\n\n extra_overrides\ + \ = set(override_params) - set(\n p['parameter_id'] for p in default_params\n\ + \ )\n if extra_overrides:\n extra_override_str = ', '.join(extra_overrides)\n\ + \ warnings.warn(\n f'The overrides {extra_override_str} were not\ + \ found in the params and '\n 'will be ignored.'\n )\n\n return\ + \ study_spec_parameters\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-wide-and-deep-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJobWithMetrics + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --execution_metrics + - '{{$.outputs.parameters[''execution_metrics''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"wide-and-deep-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", + "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--instance_schema_path=", "{{$.outputs.parameters[''instance_schema_uri''].output_file}}", + "\", \"--prediction_schema_path=", "{{$.outputs.parameters[''prediction_schema_uri''].output_file}}", + "\", \"--trials_path=", "{{$.outputs.parameters[''trials''].output_file}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", "\", + \"--metric_goal=", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\", \"--seed=", "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", + "{{$.inputs.parameters[''eval_steps'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The Wide & Deep built-in algorithm HyperparameterTuningJob pipeline. + name: automl-tabular-wide-and-deep-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize, possible values: [ ''loss'', + + ''average_loss'', ''rmse'', ''mae'', ''mql'', ''accuracy'', ''auc'', ''precision'', + + ''recall''].' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py new file mode 100644 index 0000000000..19eaddb481 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -0,0 +1,281 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Wide and Deep Trainer component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def wide_and_deep_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + learning_rate: float, + dnn_learning_rate: float, + instance_baseline: Input[Artifact], + metadata: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + transform_output: Input[Artifact], + training_schema_uri: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], # pylint: disable=unused-argument + weight_column: Optional[str] = '', + max_steps: Optional[int] = -1, + max_train_secs: Optional[int] = -1, + optimizer_type: Optional[str] = 'adam', + l1_regularization_strength: Optional[float] = 0, + l2_regularization_strength: Optional[float] = 0, + l2_shrinkage_regularization_strength: Optional[float] = 0, + beta_1: Optional[float] = 0.9, + beta_2: Optional[float] = 0.999, + hidden_units: Optional[str] = '30,30,30', + use_wide: Optional[bool] = True, + embed_categories: Optional[bool] = True, + dnn_dropout: Optional[float] = 0, + dnn_optimizer_type: Optional[str] = 'ftrl', + dnn_l1_regularization_strength: Optional[float] = 0, + dnn_l2_regularization_strength: Optional[float] = 0, + dnn_l2_shrinkage_regularization_strength: Optional[float] = 0, + dnn_beta_1: Optional[float] = 0.9, + dnn_beta_2: Optional[float] = 0.999, + enable_profiler: Optional[bool] = False, + cache_data: Optional[str] = 'auto', + seed: Optional[int] = 1, + eval_steps: Optional[int] = 0, + batch_size: Optional[int] = 100, + measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + optimization_metric: Optional[str] = '', + eval_frequency_secs: Optional[int] = 600, + training_machine_spec: Optional[dict] = {'machine_type': 'c2-standard-16'}, + training_disk_spec: Optional[dict] = { + 'boot_disk_type': 'pd-ssd', + 'boot_disk_size_gb': 100, + }, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains a Wide & Deep model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to + produce. "classification" or "regression". + weight_column: The weight column name. + max_steps: Number of steps to run the trainer for. + max_train_secs: Amount of time in seconds to run the + trainer for. + learning_rate: The learning rate used by the linear optimizer. + optimizer_type: The type of optimizer to use. Choices are + "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent + Optimizers, respectively. + l1_regularization_strength: L1 regularization strength + for optimizer_type="ftrl". + l2_regularization_strength: L2 regularization strength + for optimizer_type="ftrl" + l2_shrinkage_regularization_strength: L2 shrinkage + regularization strength for optimizer_type="ftrl". + beta_1: Beta 1 value for optimizer_type="adam". + beta_2: Beta 2 value for optimizer_type="adam". + hidden_units: Hidden layer sizes to use for DNN feature + columns, provided in comma-separated layers. + use_wide: If set to true, the categorical columns will be + used in the wide part of the DNN model. + embed_categories: If set to true, the categorical columns + will be used embedded and used in the deep part of the model. Embedding + size is the square root of the column cardinality. + dnn_dropout: The probability we will drop out a given + coordinate. + dnn_learning_rate: The learning rate for training the + deep part of the model. + dnn_optimizer_type: The type of optimizer to use for the + deep part of the model. Choices are "adam", "ftrl" and "sgd". for the + Adam, FTRL, and Gradient Descent Optimizers, respectively. + dnn_l1_regularization_strength: L1 regularization + strength for dnn_optimizer_type="ftrl". + dnn_l2_regularization_strength: L2 regularization + strength for dnn_optimizer_type="ftrl". + dnn_l2_shrinkage_regularization_strength: L2 shrinkage + regularization strength for dnn_optimizer_type="ftrl". + dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". + dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". + enable_profiler: Enables profiling and saves a trace + during evaluation. + cache_data: Whether to cache data or not. If set to + 'auto', caching is determined based on the dataset size. + seed: Seed to be used for this run. + eval_steps: Number of steps to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + batch_size: Batch size for training. + measurement_selection_type: Which measurement to use + if/when the service automatically selects the final measurement from + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for + `measurement_selection_type`. Default is "rmse" for regression and "auc" + for classification. + eval_frequency_secs: Frequency at which evaluation and + checkpointing will take place. + training_machine_spec: The training machine + spec. See https://cloud.google.com/compute/docs/machine-types for + options. + training_disk_spec: The training disk spec. + instance_baseline: The path to a JSON file for baseline values. + metadata: Amount of time in seconds to run the trainer for. + materialized_train_split: The path to the materialized train split. + materialized_eval_split: The path to the materialized validation split. + transform_output: The path to transform output. + training_schema_uri: The path to the training schema. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "wide-and-deep-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":"', + '1', + '", "machine_spec": ', + training_machine_spec, + ', "disk_spec": ', + training_disk_spec, + ', "container_spec": {"image_uri":"', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + '", "args": ["--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--model_type=', + prediction_type, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--baseline_path=', + instance_baseline.uri, + '", "--metadata_path=', + metadata.uri, + '", "--transform_output_path=', + transform_output.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + '", "--job_dir=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--training_data_path=' + ), + materialized_train_split.uri, + '", "--validation_data_path=', + materialized_eval_split.uri, + '", "--max_steps=', + max_steps, + '", "--max_train_secs=', + max_train_secs, + '", "--learning_rate=', + learning_rate, + '", "--optimizer_type=', + optimizer_type, + '", "--l1_regularization_strength=', + l1_regularization_strength, + '", "--l2_regularization_strength=', + l2_regularization_strength, + '", "--l2_shrinkage_regularization_strength=', + l2_shrinkage_regularization_strength, + '", "--beta_1=', + beta_1, + '", "--beta_2=', + beta_2, + '", "--hidden_units=', + hidden_units, + '", "--use_wide=', + use_wide, + '", "--embed_categories=', + embed_categories, + '", "--dnn_dropout=', + dnn_dropout, + '", "--dnn_learning_rate=', + dnn_learning_rate, + '", "--dnn_optimizer_type=', + dnn_optimizer_type, + '", "--dnn_l1_regularization_strength=', + dnn_l1_regularization_strength, + '", "--dnn_l2_regularization_strength=', + dnn_l2_regularization_strength, + '", "--dnn_l2_shrinkage_regularization_strength=', + dnn_l2_shrinkage_regularization_strength, + '", "--dnn_beta_1=', + dnn_beta_1, + '", "--dnn_beta_2=', + dnn_beta_2, + '", "--enable_profiler=', + enable_profiler, + '", "--cache_data=', + cache_data, + '", "--seed=', + seed, + '", "--eval_steps=', + eval_steps, + '", "--batch_size=', + batch_size, + '", "--measurement_selection_type=', + measurement_selection_type, + '", "--optimization_metric=', + optimization_metric, + '", "--eval_frequency_secs=', + eval_frequency_secs, + '", "--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml new file mode 100644 index 0000000000..748711a0dd --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -0,0 +1,4048 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-wide-and-deep-trainer +# Description: The Wide & Deep training pipeline. +# Inputs: +# batch_size: int [Default: 100.0] +# beta_1: float [Default: 0.9] +# beta_2: float [Default: 0.999] +# bigquery_staging_full_dataset_id: str [Default: ''] +# cache_data: str [Default: 'auto'] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# dnn_beta_1: float [Default: 0.9] +# dnn_beta_2: float [Default: 0.999] +# dnn_dropout: float [Default: 0.0] +# dnn_l1_regularization_strength: float [Default: 0.0] +# dnn_l2_regularization_strength: float [Default: 0.0] +# dnn_l2_shrinkage_regularization_strength: float [Default: 0.0] +# dnn_learning_rate: float +# dnn_optimizer_type: str [Default: 'adam'] +# embed_categories: bool [Default: True] +# enable_profiler: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# eval_frequency_secs: int [Default: 600.0] +# eval_steps: int [Default: 0.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# hidden_units: str [Default: '30,30,30'] +# l1_regularization_strength: float [Default: 0.0] +# l2_regularization_strength: float [Default: 0.0] +# l2_shrinkage_regularization_strength: float [Default: 0.0] +# learning_rate: float +# location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] +# max_selected_features: int [Default: -1.0] +# max_steps: int [Default: -1.0] +# max_train_secs: int [Default: -1.0] +# measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_metric: str [Default: ''] +# optimizer_type: str [Default: 'adam'] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 1.0] +# stratified_split_key: str [Default: ''] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] +# tf_transformations_path: str [Default: ''] +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# use_wide: bool [Default: True] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# worker_pool_specs_override: list +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--wide-and-deep-trainer-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - wide-and-deep-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - wide-and-deep-trainer + inputs: + artifacts: + pipelinechannel--wide-and-deep-trainer-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: neural_network + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-infra-validator + - wide-and-deep-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: wide-and-deep-trainer + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + parse-worker-pool-specs-override: + cachingOptions: + enableCache: true + componentRef: + name: comp-parse-worker-pool-specs-override + inputs: + parameters: + worker_pool_specs_override: + componentInputParameter: pipelinechannel--worker_pool_specs_override + taskInfo: + name: parse-worker-pool-specs-override + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + wide-and-deep-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-wide-and-deep-trainer + dependentTasks: + - feature-transform-engine + - parse-worker-pool-specs-override + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + batch_size: + componentInputParameter: pipelinechannel--batch_size + beta_1: + componentInputParameter: pipelinechannel--beta_1 + beta_2: + componentInputParameter: pipelinechannel--beta_2 + cache_data: + componentInputParameter: pipelinechannel--cache_data + dnn_beta_1: + componentInputParameter: pipelinechannel--dnn_beta_1 + dnn_beta_2: + componentInputParameter: pipelinechannel--dnn_beta_2 + dnn_dropout: + componentInputParameter: pipelinechannel--dnn_dropout + dnn_l1_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l1_regularization_strength + dnn_l2_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l2_regularization_strength + dnn_l2_shrinkage_regularization_strength: + componentInputParameter: pipelinechannel--dnn_l2_shrinkage_regularization_strength + dnn_learning_rate: + componentInputParameter: pipelinechannel--dnn_learning_rate + dnn_optimizer_type: + componentInputParameter: pipelinechannel--dnn_optimizer_type + embed_categories: + componentInputParameter: pipelinechannel--embed_categories + enable_profiler: + componentInputParameter: pipelinechannel--enable_profiler + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + eval_frequency_secs: + componentInputParameter: pipelinechannel--eval_frequency_secs + eval_steps: + componentInputParameter: pipelinechannel--eval_steps + hidden_units: + componentInputParameter: pipelinechannel--hidden_units + l1_regularization_strength: + componentInputParameter: pipelinechannel--l1_regularization_strength + l2_regularization_strength: + componentInputParameter: pipelinechannel--l2_regularization_strength + l2_shrinkage_regularization_strength: + componentInputParameter: pipelinechannel--l2_shrinkage_regularization_strength + learning_rate: + componentInputParameter: pipelinechannel--learning_rate + location: + componentInputParameter: pipelinechannel--location + max_steps: + componentInputParameter: pipelinechannel--max_steps + max_train_secs: + componentInputParameter: pipelinechannel--max_train_secs + measurement_selection_type: + componentInputParameter: pipelinechannel--measurement_selection_type + optimization_metric: + componentInputParameter: pipelinechannel--optimization_metric + optimizer_type: + componentInputParameter: pipelinechannel--optimizer_type + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + seed: + componentInputParameter: pipelinechannel--seed + target_column: + componentInputParameter: pipelinechannel--target_column + training_disk_spec: + taskOutputParameter: + outputParameterKey: training_disk_spec + producerTask: parse-worker-pool-specs-override + training_machine_spec: + taskOutputParameter: + outputParameterKey: training_machine_spec + producerTask: parse-worker-pool-specs-override + use_wide: + componentInputParameter: pipelinechannel--use_wide + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: wide-and-deep-trainer + inputDefinitions: + parameters: + pipelinechannel--batch_size: + parameterType: NUMBER_INTEGER + pipelinechannel--beta_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--beta_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cache_data: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--dnn_beta_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_beta_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_dropout: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l1_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l2_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_l2_shrinkage_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--dnn_optimizer_type: + parameterType: STRING + pipelinechannel--embed_categories: + parameterType: BOOLEAN + pipelinechannel--enable_profiler: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_frequency_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--eval_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--hidden_units: + parameterType: STRING + pipelinechannel--l1_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--l2_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--l2_shrinkage_regularization_strength: + parameterType: NUMBER_DOUBLE + pipelinechannel--learning_rate: + parameterType: NUMBER_DOUBLE + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_steps: + parameterType: NUMBER_INTEGER + pipelinechannel--max_train_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--measurement_selection_type: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_metric: + parameterType: STRING + pipelinechannel--optimizer_type: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--use_wide: + parameterType: BOOLEAN + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + pipelinechannel--worker_pool_specs_override: + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-parse-worker-pool-specs-override: + executorLabel: exec-parse-worker-pool-specs-override + inputDefinitions: + parameters: + worker_pool_specs_override: + description: 'The list of dictionaries for overriding training + + and evaluation worker pool specs.' + parameterType: LIST + outputDefinitions: + parameters: + eval_machine_spec: + description: The eval machine spec. + parameterType: STRUCT + eval_replica_count: + description: The replica count for eval. + parameterType: NUMBER_INTEGER + training_disk_spec: + description: The training disk spec. + parameterType: STRUCT + training_machine_spec: + description: The training machine spec. + parameterType: STRUCT + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-wide-and-deep-trainer: + executorLabel: exec-wide-and-deep-trainer + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to a JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized validation split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the materialized train split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Amount of time in seconds to run the trainer for. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The path to transform output. + parameters: + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + beta_1: + defaultValue: 0.9 + description: Beta 1 value for optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + beta_2: + defaultValue: 0.999 + description: Beta 2 value for optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to + + ''auto'', caching is determined based on the dataset size.' + isOptional: true + parameterType: STRING + dnn_beta_1: + defaultValue: 0.9 + description: Beta 1 value for dnn_optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_beta_2: + defaultValue: 0.999 + description: Beta 2 value for dnn_optimizer_type="adam". + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_dropout: + defaultValue: 0.0 + description: 'The probability we will drop out a given + + coordinate.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization + + strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization + + strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage + + regularization strength for dnn_optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_learning_rate: + description: 'The learning rate for training the + + deep part of the model.' + parameterType: NUMBER_DOUBLE + dnn_optimizer_type: + defaultValue: ftrl + description: 'The type of optimizer to use for the + + deep part of the model. Choices are "adam", "ftrl" and "sgd". for the + + Adam, FTRL, and Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + embed_categories: + defaultValue: true + description: 'If set to true, the categorical columns + + will be used embedded and used in the deep part of the model. Embedding + + size is the square root of the column cardinality.' + isOptional: true + parameterType: BOOLEAN + enable_profiler: + defaultValue: false + description: 'Enables profiling and saves a trace + + during evaluation.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and + + checkpointing will take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not + + specified or negative, it means run evaluation on the whole validation + + dataset. If set to 0, it means run evaluation for a fixed number of + + samples.' + isOptional: true + parameterType: NUMBER_INTEGER + hidden_units: + defaultValue: 30,30,30 + description: 'Hidden layer sizes to use for DNN feature + + columns, provided in comma-separated layers.' + isOptional: true + parameterType: STRING + l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength + + for optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength + + for optimizer_type="ftrl"' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage + + regularization strength for optimizer_type="ftrl".' + isOptional: true + parameterType: NUMBER_DOUBLE + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: 'Amount of time in seconds to run the + + trainer for.' + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use + + if/when the service automatically selects the final measurement from + + previously reported intermediate measurements. One of "BEST_MEASUREMENT" + + or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use. Choices are + + "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent + + Optimizers, respectively.' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to + + produce. "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + target_column: + description: The target column name. + parameterType: STRING + training_disk_spec: + defaultValue: + boot_disk_size_gb: 100.0 + boot_disk_type: pd-ssd + description: The training disk spec. + isOptional: true + parameterType: STRUCT + training_machine_spec: + defaultValue: + machine_type: c2-standard-16 + description: 'The training machine + + spec. See https://cloud.google.com/compute/docs/machine-types for + + options.' + isOptional: true + parameterType: STRUCT + use_wide: + defaultValue: true + description: 'If set to true, the categorical columns will be + + used in the wide part of the DNN model.' + isOptional: true + parameterType: BOOLEAN + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: Serialized gcp_resources proto tracking the custom training + job. + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-parse-worker-pool-specs-override: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _parse_worker_pool_specs_override + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _parse_worker_pool_specs_override(\n worker_pool_specs_override:\ + \ list, # pylint:disable=g-bare-generic\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('training_machine_spec', dict), # pylint:disable=g-bare-generic\n\ + \ ('training_disk_spec', dict),\n ('eval_machine_spec', dict),\ + \ # pylint:disable=g-bare-generic\n ('eval_replica_count', int),\n\ + \ ],\n):\n \"\"\"Parses worker_pool_specs_override and returns training\ + \ and evaluation machine specifications.\n\n Args:\n worker_pool_specs_override:\ + \ The list of dictionaries for overriding training\n and evaluation\ + \ worker pool specs.\n\n Returns:\n training_machine_spec: The training\ + \ machine spec.\n training_disk_spec: The training disk spec.\n \ + \ eval_machine_spec: The eval machine spec.\n eval_replica_count:\ + \ The replica count for eval.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n training_machine_spec = {'machine_type': 'c2-standard-16'}\n training_disk_spec\ + \ = {'boot_disk_type': 'pd-ssd', 'boot_disk_size_gb': 100}\n eval_machine_spec\ + \ = {'machine_type': 'c2-standard-8'}\n eval_replica_count = 1\n\n if\ + \ worker_pool_specs_override:\n if len(worker_pool_specs_override) >=\ + \ 1 and isinstance(\n worker_pool_specs_override[0], dict\n ):\n\ + \ training_machine_spec = worker_pool_specs_override[0].get(\n \ + \ 'machine_spec', training_machine_spec\n )\n training_disk_spec\ + \ = worker_pool_specs_override[0].get(\n 'disk_spec', training_disk_spec\n\ + \ )\n if len(worker_pool_specs_override) == 4 and isinstance(\n\ + \ worker_pool_specs_override[3], dict\n ):\n eval_machine_spec\ + \ = worker_pool_specs_override[3].get(\n 'machine_spec', eval_machine_spec\n\ + \ )\n eval_replica_count = worker_pool_specs_override[3].get(\n\ + \ 'replica_count', eval_replica_count\n )\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'training_machine_spec',\n \ + \ 'training_disk_spec',\n 'eval_machine_spec',\n 'eval_replica_count',\n\ + \ ],\n )(\n training_machine_spec,\n training_disk_spec,\n\ + \ eval_machine_spec,\n eval_replica_count,\n )\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-wide-and-deep-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"wide-and-deep-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", + "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", + ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", + "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_schema_path=", "{{$.inputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--training_data_path=", "{{$.inputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--validation_data_path=", "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--max_steps=", "{{$.inputs.parameters[''max_steps'']}}", "\", \"--max_train_secs=", + "{{$.inputs.parameters[''max_train_secs'']}}", "\", \"--learning_rate=", + "{{$.inputs.parameters[''learning_rate'']}}", "\", \"--optimizer_type=", + "{{$.inputs.parameters[''optimizer_type'']}}", "\", \"--l1_regularization_strength=", + "{{$.inputs.parameters[''l1_regularization_strength'']}}", "\", \"--l2_regularization_strength=", + "{{$.inputs.parameters[''l2_regularization_strength'']}}", "\", \"--l2_shrinkage_regularization_strength=", + "{{$.inputs.parameters[''l2_shrinkage_regularization_strength'']}}", "\", + \"--beta_1=", "{{$.inputs.parameters[''beta_1'']}}", "\", \"--beta_2=", + "{{$.inputs.parameters[''beta_2'']}}", "\", \"--hidden_units=", "{{$.inputs.parameters[''hidden_units'']}}", + "\", \"--use_wide=", "{{$.inputs.parameters[''use_wide'']}}", "\", \"--embed_categories=", + "{{$.inputs.parameters[''embed_categories'']}}", "\", \"--dnn_dropout=", + "{{$.inputs.parameters[''dnn_dropout'']}}", "\", \"--dnn_learning_rate=", + "{{$.inputs.parameters[''dnn_learning_rate'']}}", "\", \"--dnn_optimizer_type=", + "{{$.inputs.parameters[''dnn_optimizer_type'']}}", "\", \"--dnn_l1_regularization_strength=", + "{{$.inputs.parameters[''dnn_l1_regularization_strength'']}}", "\", \"--dnn_l2_regularization_strength=", + "{{$.inputs.parameters[''dnn_l2_regularization_strength'']}}", "\", \"--dnn_l2_shrinkage_regularization_strength=", + "{{$.inputs.parameters[''dnn_l2_shrinkage_regularization_strength'']}}", + "\", \"--dnn_beta_1=", "{{$.inputs.parameters[''dnn_beta_1'']}}", "\", \"--dnn_beta_2=", + "{{$.inputs.parameters[''dnn_beta_2'']}}", "\", \"--enable_profiler=", "{{$.inputs.parameters[''enable_profiler'']}}", + "\", \"--cache_data=", "{{$.inputs.parameters[''cache_data'']}}", "\", \"--seed=", + "{{$.inputs.parameters[''seed'']}}", "\", \"--eval_steps=", "{{$.inputs.parameters[''eval_steps'']}}", + "\", \"--batch_size=", "{{$.inputs.parameters[''batch_size'']}}", "\", \"--measurement_selection_type=", + "{{$.inputs.parameters[''measurement_selection_type'']}}", "\", \"--optimization_metric=", + "{{$.inputs.parameters[''optimization_metric'']}}", "\", \"--eval_frequency_secs=", + "{{$.inputs.parameters[''eval_frequency_secs'']}}", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The Wide & Deep training pipeline. + name: automl-tabular-wide-and-deep-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--batch_size: + componentInputParameter: batch_size + pipelinechannel--beta_1: + componentInputParameter: beta_1 + pipelinechannel--beta_2: + componentInputParameter: beta_2 + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--cache_data: + componentInputParameter: cache_data + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--dnn_beta_1: + componentInputParameter: dnn_beta_1 + pipelinechannel--dnn_beta_2: + componentInputParameter: dnn_beta_2 + pipelinechannel--dnn_dropout: + componentInputParameter: dnn_dropout + pipelinechannel--dnn_l1_regularization_strength: + componentInputParameter: dnn_l1_regularization_strength + pipelinechannel--dnn_l2_regularization_strength: + componentInputParameter: dnn_l2_regularization_strength + pipelinechannel--dnn_l2_shrinkage_regularization_strength: + componentInputParameter: dnn_l2_shrinkage_regularization_strength + pipelinechannel--dnn_learning_rate: + componentInputParameter: dnn_learning_rate + pipelinechannel--dnn_optimizer_type: + componentInputParameter: dnn_optimizer_type + pipelinechannel--embed_categories: + componentInputParameter: embed_categories + pipelinechannel--enable_profiler: + componentInputParameter: enable_profiler + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_frequency_secs: + componentInputParameter: eval_frequency_secs + pipelinechannel--eval_steps: + componentInputParameter: eval_steps + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--hidden_units: + componentInputParameter: hidden_units + pipelinechannel--l1_regularization_strength: + componentInputParameter: l1_regularization_strength + pipelinechannel--l2_regularization_strength: + componentInputParameter: l2_regularization_strength + pipelinechannel--l2_shrinkage_regularization_strength: + componentInputParameter: l2_shrinkage_regularization_strength + pipelinechannel--learning_rate: + componentInputParameter: learning_rate + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_steps: + componentInputParameter: max_steps + pipelinechannel--max_train_secs: + componentInputParameter: max_train_secs + pipelinechannel--measurement_selection_type: + componentInputParameter: measurement_selection_type + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--optimization_metric: + componentInputParameter: optimization_metric + pipelinechannel--optimizer_type: + componentInputParameter: optimizer_type + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--use_wide: + componentInputParameter: use_wide + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + pipelinechannel--worker_pool_specs_override: + componentInputParameter: worker_pool_specs_override + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + batch_size: + defaultValue: 100.0 + description: Batch size for training. + isOptional: true + parameterType: NUMBER_INTEGER + beta_1: + defaultValue: 0.9 + description: Beta 1 value for optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + beta_2: + defaultValue: 0.999 + description: Beta 2 value for optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: Staging directory for BigQuery tables. + isOptional: true + parameterType: STRING + cache_data: + defaultValue: auto + description: 'Whether to cache data or not. If set to ''auto'', caching is + + determined based on the dataset size.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + dnn_beta_1: + defaultValue: 0.9 + description: Beta 1 value for dnn_optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_beta_2: + defaultValue: 0.999 + description: Beta 2 value for dnn_optimizer_type='adam'. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_dropout: + defaultValue: 0.0 + description: The probability we will drop out a given coordinate. + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength for + + dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength for + + dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage regularization + + strength for dnn_optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + dnn_learning_rate: + description: 'The learning rate for training the deep part of the + + model.' + parameterType: NUMBER_DOUBLE + dnn_optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use for the deep part of the + + model. Choices are ''adam'', ''ftrl'' and ''sgd''. for the Adam, FTRL, and + + Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + embed_categories: + defaultValue: true + description: 'If set to true, the categorical columns will be used + + embedded and used in the deep part of the model. Embedding size is the + + square root of the column cardinality.' + isOptional: true + parameterType: BOOLEAN + enable_profiler: + defaultValue: false + description: Enables profiling and saves a trace during evaluation. + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_frequency_secs: + defaultValue: 600.0 + description: 'Frequency at which evaluation and checkpointing will + + take place.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_steps: + defaultValue: 0.0 + description: 'Number of steps to run evaluation for. If not specified or + + negative, it means run evaluation on the whole validation dataset. If set + + to 0, it means run evaluation for a fixed number of samples.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + hidden_units: + defaultValue: 30,30,30 + description: 'Hidden layer sizes to use for DNN feature columns, provided + in + + comma-separated layers.' + isOptional: true + parameterType: STRING + l1_regularization_strength: + defaultValue: 0.0 + description: 'L1 regularization strength for + + optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_regularization_strength: + defaultValue: 0.0 + description: 'L2 regularization strength for + + optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + l2_shrinkage_regularization_strength: + defaultValue: 0.0 + description: 'L2 shrinkage regularization strength + + for optimizer_type=''ftrl''.' + isOptional: true + parameterType: NUMBER_DOUBLE + learning_rate: + description: The learning rate used by the linear optimizer. + parameterType: NUMBER_DOUBLE + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_steps: + defaultValue: -1.0 + description: Number of steps to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + max_train_secs: + defaultValue: -1.0 + description: Amount of time in seconds to run the trainer for. + isOptional: true + parameterType: NUMBER_INTEGER + measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement to use if/when the service + + automatically selects the final measurement from previously reported + + intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + optimization_metric: + defaultValue: '' + description: 'Optimization metric used for + + `measurement_selection_type`. Default is "rmse" for regression and "auc" + + for classification.' + isOptional: true + parameterType: STRING + optimizer_type: + defaultValue: adam + description: 'The type of optimizer to use. Choices are "adam", "ftrl" and + + "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively.' + isOptional: true + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 1.0 + description: Seed to be used for this run. + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: List of auto transform features. + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + use_wide: + defaultValue: true + description: 'If set to true, the categorical columns will be used in the + wide + + part of the DNN model.' + isOptional: true + parameterType: BOOLEAN + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + worker_pool_specs_override: + description: 'The dictionary for overriding training and + + evaluation worker pool specs. The dictionary should be of format + + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py new file mode 100644 index 0000000000..a96e46d984 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py @@ -0,0 +1,124 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML XGBoost Hyperparameter Tuning component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def xgboost_hyperparameter_tuning_job( + project: str, + location: str, + study_spec_metric_id: str, + study_spec_metric_goal: str, + study_spec_parameters_override: list, + max_trial_count: int, + parallel_trial_count: int, + worker_pool_specs: list, + gcp_resources: dsl.OutputPath(str), + max_failed_trial_count: Optional[int] = 0, + study_spec_algorithm: Optional[str] = 'ALGORITHM_UNSPECIFIED', + study_spec_measurement_selection_type: Optional[str] = 'BEST_MEASUREMENT', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes XGBoost hyperparameters using Vertex HyperparameterTuningJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + study_spec_metric_id: Metric to optimize. For options, + please look under 'eval_metric' at + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. + study_spec_metric_goal: Optimization goal of the metric, + possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries + representing parameters to optimize. The dictionary key is the + parameter_id, which is passed to training job as a command line + argument, and the dictionary value is the parameter specification of the + metric. + max_trial_count: The desired total number of trials. + parallel_trial_count: The desired number of trials to run + in parallel. + max_failed_trial_count: The number of failed trials that + need to be seen before failing the HyperparameterTuningJob. If set to 0, + Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for + the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or + 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement + to use if/when the service automatically selects the final measurement + from previously reported intermediate measurements. One of + "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + worker_pool_specs: The worker pool specs. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher', + ], + args=[ + '--type', + 'HyperparameterTuningJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "xgboost-hyperparameter-tuning-job-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "study_spec": {"metrics": [{"metric_id": "', + study_spec_metric_id, + '", "goal": "', + study_spec_metric_goal, + '"}], "parameters": ', + study_spec_parameters_override, + ', "algorithm": "', + study_spec_algorithm, + '", "measurement_selection_type": "', + study_spec_measurement_selection_type, + '"}, "max_trial_count": ', + max_trial_count, + ', "parallel_trial_count": ', + parallel_trial_count, + ', "max_failed_trial_count": ', + max_failed_trial_count, + ', "trial_job_spec": {"worker_pool_specs": ', + worker_pool_specs, + '}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml new file mode 100644 index 0000000000..8c3017aa09 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -0,0 +1,4332 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-xgboost-hyperparameter-tuning-job +# Description: The XGBoost HyperparameterTuningJob pipeline. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_default_eval_metric: int [Default: 0.0] +# encryption_spec_key_name: str [Default: ''] +# eval_metric: str [Default: ''] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# location: str +# max_failed_trial_count: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# max_trial_count: int +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# objective: str +# parallel_trial_count: int +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: False] +# run_feature_selection: bool [Default: False] +# seed: int [Default: 0.0] +# seed_per_iteration: bool [Default: False] +# stratified_split_key: str [Default: ''] +# study_spec_algorithm: str [Default: 'ALGORITHM_UNSPECIFIED'] +# study_spec_measurement_selection_type: str [Default: 'BEST_MEASUREMENT'] +# study_spec_metric_goal: str +# study_spec_metric_id: str +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# training_accelerator_count: int [Default: 0.0] +# training_accelerator_type: str [Default: ''] +# training_fraction: float [Default: -1.0] +# training_machine_type: str [Default: 'c2-standard-16'] +# training_total_replica_count: int [Default: 1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--get-prediction-type-for-xgboost-Output: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - get-best-hyperparameter-tuning-job-trial + - get-prediction-type-for-xgboost + inputs: + artifacts: + pipelinechannel--get-best-hyperparameter-tuning-job-trial-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--get-prediction-type-for-xgboost-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - get-prediction-type-for-xgboost + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + generate-xgboost-hyperparameter-tuning-worker-pool-specs: + cachingOptions: + enableCache: true + componentRef: + name: comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs + dependentTasks: + - feature-transform-engine + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + accelerator_count: + componentInputParameter: pipelinechannel--training_accelerator_count + accelerator_type: + componentInputParameter: pipelinechannel--training_accelerator_type + disable_default_eval_metric: + componentInputParameter: pipelinechannel--disable_default_eval_metric + eval_metric: + componentInputParameter: pipelinechannel--eval_metric + machine_type: + componentInputParameter: pipelinechannel--training_machine_type + objective: + componentInputParameter: pipelinechannel--objective + seed: + componentInputParameter: pipelinechannel--seed + seed_per_iteration: + componentInputParameter: pipelinechannel--seed_per_iteration + target_column: + componentInputParameter: pipelinechannel--target_column + total_replica_count: + componentInputParameter: pipelinechannel--training_total_replica_count + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: generate-xgboost-hyperparameter-tuning-worker-pool-specs + get-best-hyperparameter-tuning-job-trial: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-best-hyperparameter-tuning-job-trial + dependentTasks: + - generate-xgboost-hyperparameter-tuning-worker-pool-specs + - xgboost-hyperparameter-tuning-job + inputs: + parameters: + gcp_resources: + taskOutputParameter: + outputParameterKey: gcp_resources + producerTask: xgboost-hyperparameter-tuning-job + instance_schema_uri: + taskOutputParameter: + outputParameterKey: instance_schema_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + prediction_docker_uri: + taskOutputParameter: + outputParameterKey: prediction_docker_uri_artifact_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + prediction_schema_uri: + taskOutputParameter: + outputParameterKey: prediction_schema_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + read_value_from_file: + runtimeValue: + constant: 1.0 + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + trials_dir: + taskOutputParameter: + outputParameterKey: trials_path + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + taskInfo: + name: get-best-hyperparameter-tuning-job-trial + get-prediction-type-for-xgboost: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-type-for-xgboost + inputs: + parameters: + objective: + componentInputParameter: pipelinechannel--objective + taskInfo: + name: get-prediction-type-for-xgboost + get-xgboost-study-spec-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-xgboost-study-spec-parameters + inputs: + parameters: + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + taskInfo: + name: get-xgboost-study-spec-parameters + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - get-best-hyperparameter-tuning-job-trial + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: get-best-hyperparameter-tuning-job-trial + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + - get-prediction-type-for-xgboost + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + xgboost-hyperparameter-tuning-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-xgboost-hyperparameter-tuning-job + dependentTasks: + - generate-xgboost-hyperparameter-tuning-worker-pool-specs + - get-xgboost-study-spec-parameters + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + max_failed_trial_count: + componentInputParameter: pipelinechannel--max_failed_trial_count + max_trial_count: + componentInputParameter: pipelinechannel--max_trial_count + parallel_trial_count: + componentInputParameter: pipelinechannel--parallel_trial_count + project: + componentInputParameter: pipelinechannel--project + study_spec_algorithm: + componentInputParameter: pipelinechannel--study_spec_algorithm + study_spec_measurement_selection_type: + componentInputParameter: pipelinechannel--study_spec_measurement_selection_type + study_spec_metric_goal: + componentInputParameter: pipelinechannel--study_spec_metric_goal + study_spec_metric_id: + componentInputParameter: pipelinechannel--study_spec_metric_id + study_spec_parameters_override: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-xgboost-study-spec-parameters + worker_pool_specs: + taskOutputParameter: + outputParameterKey: worker_pool_specs + producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs + taskInfo: + name: xgboost-hyperparameter-tuning-job + inputDefinitions: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_default_eval_metric: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eval_metric: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_failed_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--max_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--objective: + parameterType: STRING + pipelinechannel--parallel_trial_count: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--seed_per_iteration: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_algorithm: + parameterType: STRING + pipelinechannel--study_spec_measurement_selection_type: + parameterType: STRING + pipelinechannel--study_spec_metric_goal: + parameterType: STRING + pipelinechannel--study_spec_metric_id: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--training_accelerator_count: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_type: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_machine_type: + parameterType: STRING + pipelinechannel--training_total_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-generate-xgboost-hyperparameter-tuning-worker-pool-specs: + executorLabel: exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path to JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized validation + + split.' + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized train + + split.' + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to transform output. + parameters: + accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + objective: + description: Required. Specifies the learning task and the learning objective. + parameterType: STRING + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Required. Target column name. + parameterType: STRING + total_replica_count: + description: Number of workers. + parameterType: NUMBER_INTEGER + weight_column: + defaultValue: '' + description: Weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + job_dir: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + instance_schema_path: + parameterType: STRING + instance_schema_uri: + parameterType: STRING + prediction_docker_uri_artifact_path: + parameterType: STRING + prediction_docker_uri_output: + parameterType: STRING + prediction_schema_path: + parameterType: STRING + prediction_schema_uri: + parameterType: STRING + trials: + parameterType: STRING + trials_path: + parameterType: STRING + worker_pool_specs: + parameterType: LIST + comp-get-best-hyperparameter-tuning-job-trial: + executorLabel: exec-get-best-hyperparameter-tuning-job-trial + inputDefinitions: + parameters: + gcp_resources: + description: Proto tracking the hyperparameter tuning job. + parameterType: STRING + instance_schema_uri: + defaultValue: '' + description: The instance schema uri. + isOptional: true + parameterType: STRING + prediction_docker_uri: + defaultValue: '' + description: The prediction docker container uri. + isOptional: true + parameterType: STRING + prediction_schema_uri: + defaultValue: '' + description: The prediction schema_uri. + isOptional: true + parameterType: STRING + read_value_from_file: + defaultValue: false + description: If true, read file to get the relevant value. + isOptional: true + parameterType: BOOLEAN + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + trials_dir: + defaultValue: '' + description: The path to the hyperparameter tuning trials. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-get-prediction-type-for-xgboost: + executorLabel: exec-get-prediction-type-for-xgboost + inputDefinitions: + parameters: + objective: + description: The XGBoost training objective + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-xgboost-study-spec-parameters: + executorLabel: exec-get-xgboost-study-spec-parameters + inputDefinitions: + parameters: + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-xgboost-hyperparameter-tuning-job: + executorLabel: exec-xgboost-hyperparameter-tuning-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that + + need to be seen before failing the HyperparameterTuningJob. If set to + 0, + + Vertex AI decides how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + parallel_trial_count: + description: 'The desired number of trials to run + + in parallel.' + parameterType: NUMBER_INTEGER + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for + + the study. One of ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or + + ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: 'Which measurement + + to use if/when the service automatically selects the final measurement + + from previously reported intermediate measurements. One of + + "BEST_MEASUREMENT" or "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, + + possible values: "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize. For options, + + please look under ''eval_metric'' at + + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries + + representing parameters to optimize. The dictionary key is the + + parameter_id, which is passed to training job as a command line + + argument, and the dictionary value is the parameter specification of the + + metric.' + parameterType: LIST + worker_pool_specs: + description: The worker pool specs. + parameterType: LIST + outputDefinitions: + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-generate-xgboost-hyperparameter-tuning-worker-pool-specs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _generate_xgboost_hyperparameter_tuning_worker_pool_specs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _generate_xgboost_hyperparameter_tuning_worker_pool_specs(\n\ + \ total_replica_count: int,\n target_column: str,\n objective:\ + \ str,\n materialized_train_split: dsl.InputPath('MaterializedSplit'),\n\ + \ materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n transform_output:\ + \ dsl.InputPath('TransformOutput'),\n training_schema_uri: dsl.InputPath('DatasetSchema'),\n\ + \ instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ + \ job_dir: dsl.OutputPath('JobDir'),\n instance_schema_uri: dsl.OutputPath(str),\n\ + \ prediction_schema_uri: dsl.OutputPath(str),\n trials: dsl.OutputPath(str),\n\ + \ prediction_docker_uri_output: dsl.OutputPath(str),\n machine_type:\ + \ str = 'c2-standard-16',\n accelerator_type: str = '',\n accelerator_count:\ + \ int = 0,\n weight_column: str = '',\n eval_metric: str = '',\n \ + \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('worker_pool_specs',\ + \ list), # pylint:disable=g-bare-generic\n ('instance_schema_path',\ + \ str),\n ('prediction_schema_path', str),\n ('trials_path',\ + \ str),\n ('prediction_docker_uri_artifact_path', str),\n ],\n\ + ):\n \"\"\"Generates worker pool specs for XGBoost hyperparameter tuning.\n\ + \n For single machine XGBoost training, returns one worker pool spec for\ + \ master.\n For distributed XGBoost training, returns two worker pool specs,\ + \ the first one\n for master and the second one for the remaining workers.\n\ + \n Args:\n total_replica_count: Number of workers.\n target_column:\ + \ Required. Target column name.\n objective: Required. Specifies the\ + \ learning task and the learning objective.\n materialized_train_split:\ + \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ + \ Required. The path to the materialized validation\n split.\n transform_output:\ + \ Required. The path to transform output.\n training_schema_uri: Required.\ + \ The path to the training schema.\n instance_baseline: Path to JSON\ + \ file for baseline values.\n job_dir: Job dir path.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ trials: The trials uri.\n prediction_docker_uri_output: The prediction\ + \ docker container uri.\n machine_type: Machine type.\n accelerator_type:\ + \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ + \ Weight column name.\n eval_metric: Evaluation metrics for validation\ + \ data represented as a\n comma-separated string.\n disable_default_eval_metric:\ + \ Flag to disable default metric. Set to >0 to\n disable. Default to\ + \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ + \ via iterator number.\n\n Raises:\n ValueError: If accelerator_count\ + \ <= 0 and accelerator_type is specified.\n\n Returns:\n Output parameters.\n\ + \ \"\"\"\n import copy\n import collections\n import re\n\n def get_gcs_path(path):\n\ + \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ + \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ + \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ + \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ + \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ + \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ + \ f'--trials_path={get_gcs_path(trials)}',\n f'--prediction_docker_uri_artifact_path={get_gcs_path(prediction_docker_uri_output)}',\n\ + \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ + \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ + \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ + \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ + \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ + \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ + \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ + \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ + \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325',\n\ + \ ],\n },\n }\n\n # Add optional arguments if set\n if\ + \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ + \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ + \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ + \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ + \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ + \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ + \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ + \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ + \n return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'worker_pool_specs',\n 'instance_schema_path',\n 'prediction_schema_path',\n\ + \ 'trials_path',\n 'prediction_docker_uri_artifact_path',\n\ + \ ],\n )(\n worker_pool_specs_lst,\n get_gcs_path(instance_schema_uri),\n\ + \ get_gcs_path(prediction_schema_uri),\n get_gcs_path(trials),\n\ + \ get_gcs_path(prediction_docker_uri_output),\n )\n\n" + image: python:3.7 + exec-get-best-hyperparameter-tuning-job-trial: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_best_hyperparameter_tuning_job_trial + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_best_hyperparameter_tuning_job_trial(\n gcp_resources:\ + \ str,\n study_spec_metric_goal: str,\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n trials_dir: str = '',\n instance_schema_uri:\ + \ str = '',\n prediction_schema_uri: str = '',\n prediction_docker_uri:\ + \ str = '',\n read_value_from_file: bool = False,\n):\n \"\"\"Gets best\ + \ HyperparameterTuningJob trial.\n\n Args:\n gcp_resources: Proto tracking\ + \ the hyperparameter tuning job.\n study_spec_metric_goal: Optimization\ + \ goal of the metric, possible values:\n \"MAXIMIZE\", \"MINIMIZE\"\ + .\n unmanaged_container_model: The unmanaged model.\n trials_dir:\ + \ The path to the hyperparameter tuning trials.\n instance_schema_uri:\ + \ The instance schema uri.\n prediction_schema_uri: The prediction schema_uri.\n\ + \ prediction_docker_uri: The prediction docker container uri.\n read_value_from_file:\ + \ If true, read file to get the relevant value.\n\n Raises:\n RuntimeError:\ + \ If there are multiple metrics.\n \"\"\"\n\n import os\n import json\n\ + \ from google.api_core.retry import Retry\n from google.cloud import aiplatform_v1beta1\ + \ as aip\n import tensorflow as tf\n\n # If path to file with value is\ + \ provided, read the file before continuing.\n if read_value_from_file:\n\ + \ with tf.io.gfile.GFile(trials_dir, 'r') as f:\n trials_dir = f.read()\n\ + \ with tf.io.gfile.GFile(instance_schema_uri, 'r') as f:\n instance_schema_uri\ + \ = f.read()\n with tf.io.gfile.GFile(prediction_schema_uri, 'r') as\ + \ f:\n prediction_schema_uri = f.read()\n with tf.io.gfile.GFile(prediction_docker_uri,\ + \ 'r') as f:\n prediction_docker_uri = f.read()\n\n api_endpoint_suffix\ + \ = '-aiplatform.googleapis.com'\n gcp_resources_json = json.loads(gcp_resources)\n\ + \ resource = gcp_resources_json['resources'][0]\n\n uri_key = 'resource_uri'\n\ + \ if uri_key not in resource:\n uri_key = 'resourceUri'\n\n gcp_resources_split\ + \ = resource[uri_key].partition('projects')\n resource_name = gcp_resources_split[1]\ + \ + gcp_resources_split[2]\n prefix_str = gcp_resources_split[0]\n prefix_str\ + \ = prefix_str[: prefix_str.find(api_endpoint_suffix)]\n api_endpoint =\ + \ (\n prefix_str[(prefix_str.rfind('//') + 2) :] + api_endpoint_suffix\n\ + \ )\n\n job_client = aip.JobServiceClient(\n client_options={'api_endpoint':\ + \ api_endpoint}\n )\n response = job_client.get_hyperparameter_tuning_job(\n\ + \ name=resource_name,\n retry=Retry(initial=10.0, maximum=60.0,\ + \ deadline=10.0 * 60.0),\n )\n\n # Get best trial\n trials_list = []\n\ + \ for trial in response.trials:\n if trial.final_measurement:\n \ + \ trials_list.append({\n 'id': trial.id,\n 'objective_value':\ + \ trial.final_measurement.metrics[0].value,\n })\n\n if study_spec_metric_goal\ + \ == 'MAXIMIZE':\n best_fn = max\n elif study_spec_metric_goal == 'MINIMIZE':\n\ + \ best_fn = min\n else:\n raise ValueError(\n f'Unexpected\ + \ study spec metric goal: {study_spec_metric_goal}'\n )\n\n best_trial\ + \ = best_fn(trials_list, key=lambda trial: trial['objective_value'])\n\n\ + \ # Build unmanaged_container_model\n unmanaged_container_model.metadata['containerSpec']\ + \ = {\n 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ + \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ + \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" + image: python:3.7-slim + exec-get-prediction-type-for-xgboost: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_type_for_xgboost + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ + \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ + \ objective: The XGBoost training objective\n\n Returns:\n A string.\ + \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ + \ or objective.startswith('multi'):\n return 'classification'\n elif\ + \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ + \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ + \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ + \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ + \ ' multi:softprob].'\n )\n\n" + image: python:3.7 + exec-get-xgboost-study-spec-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_xgboost_study_spec_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_xgboost_study_spec_parameters(\n study_spec_parameters_override:\ + \ list, # Required for KFP validation; pylint:disable=g-bare-generic,unused-argument\n\ + ) -> list: # Required for KFP validation; pylint:disable=g-bare-generic\n\ + \ \"\"\"Get study_spec_parameters for an XGBoost hyperparameter tuning\ + \ job.\n\n Args:\n study_spec_parameters_override: List of dictionaries\ + \ representing parameters\n to optimize. The dictionary key is the\ + \ parameter_id, which is passed to\n training job as a command line\ + \ argument, and the dictionary value is the\n parameter specification\ + \ of the metric.\n\n Returns:\n List of final Vizier study_spec_parameters\ + \ of type ParameterSpec.\n \"\"\"\n # pylint:disable=g-import-not-at-top,redefined-outer-name,reimported\n\ + \ import functools\n import math\n from typing import Any, Dict, List,\ + \ Optional\n # pylint:enable=g-import-not-at-top,redefined-outer-name,reimported\n\ + \n # Need to define constants within the component function\n # pylint:disable=invalid-name\n\ + \ _GBTREE_BOOSTER = 'gbtree'\n _GBLINEAR_BOOSTER = 'gblinear'\n _DART_BOOSTER\ + \ = 'dart'\n _XGBOOST_BOOSTER_PARAMETERS_MAP = {\n 'eta': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'gamma': [_GBTREE_BOOSTER, _DART_BOOSTER],\n \ + \ 'max_depth': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'min_child_weight':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_delta_step': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'subsample': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'colsample_bytree': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bylevel':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'colsample_bynode': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'lambda': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n\ + \ 'alpha': [_GBTREE_BOOSTER, _DART_BOOSTER, _GBLINEAR_BOOSTER],\n \ + \ 'tree_method': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'scale_pos_weight':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'updater': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER, _GBLINEAR_BOOSTER],\n 'refresh_leaf': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'process_type': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'grow_policy': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'sampling_method':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'monotone_constraints': [_GBTREE_BOOSTER,\ + \ _DART_BOOSTER],\n 'interaction_constraints': [_GBTREE_BOOSTER, _DART_BOOSTER],\n\ + \ 'sample_type': [_DART_BOOSTER],\n 'normalize_type': [_DART_BOOSTER],\n\ + \ 'rate_drop': [_DART_BOOSTER],\n 'one_drop': [_DART_BOOSTER],\n\ + \ 'skip_drop': [_DART_BOOSTER],\n 'num_parallel_tree': [_GBLINEAR_BOOSTER],\n\ + \ 'feature_selector': [_GBLINEAR_BOOSTER],\n 'top_k': [_GBLINEAR_BOOSTER],\n\ + \ 'max_leaves': [_GBTREE_BOOSTER, _DART_BOOSTER],\n 'max_bin':\ + \ [_GBTREE_BOOSTER, _DART_BOOSTER],\n }\n _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS\ + \ = frozenset(\n ['updater', 'monotone_constraints', 'interaction_constraints']\n\ + \ )\n\n def _validate_float_spec(\n parameter_spec: Dict[str, Any],\ + \ lower_bound: float, upper_bound: float\n ) -> None:\n msg = (\n \ + \ f'Parameter spec for {parameter_spec[\"parameter_id\"]} must contain\ + \ '\n 'double_value_spec or discrete_value_spec with float values\ + \ within '\n f'the range of {lower_bound} and {upper_bound} (inclusive)'\n\ + \ )\n if 'double_value_spec' in parameter_spec:\n float_spec\ + \ = parameter_spec['double_value_spec']\n if float_spec['min_value']\ + \ < lower_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {float_spec[\"min_value\"]} for min_value.'\n )\n if float_spec['max_value']\ + \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {float_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ + \ in parameter_spec:\n float_spec = parameter_spec['discrete_value_spec']\n\ + \ float_values = float_spec['values']\n for val in float_values:\n\ + \ if val < lower_bound or val > upper_bound:\n raise ValueError(f'{msg},\ + \ but got {val} in {float_values}.')\n else:\n raise ValueError(\n\ + \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"\ + ]}. {msg}.'\n )\n\n def _validate_int_spec(\n parameter_spec:\ + \ Dict[str, Any],\n lower_bound: Optional[int],\n upper_bound:\ + \ Optional[int],\n ) -> None:\n msg = (\n f'Parameter spec for\ + \ {parameter_spec[\"parameter_id\"]} must contain '\n 'integer_value_spec\ + \ or discrete_value_spec with integer values within '\n f'the range\ + \ of {lower_bound} and {upper_bound} (inclusive)'\n )\n if 'integer_value_spec'\ + \ in parameter_spec:\n int_spec = parameter_spec['integer_value_spec']\n\ + \ if lower_bound is not None and int_spec['min_value'] < lower_bound:\n\ + \ raise ValueError(\n f'{msg}, but got {int_spec[\"min_value\"\ + ]} for min_value.'\n )\n if upper_bound is not None and int_spec['max_value']\ + \ > upper_bound:\n raise ValueError(\n f'{msg}, but got\ + \ {int_spec[\"max_value\"]} for max_value.'\n )\n elif 'discrete_value_spec'\ + \ in parameter_spec:\n int_values = parameter_spec['discrete_value_spec']['values']\n\ + \ for val in int_values:\n if not isinstance(val, int):\n \ + \ raise ValueError(\n f'{msg}, but got non-integer {val}\ + \ with '\n f'type {type(val)} in {int_values}.'\n \ + \ )\n if (lower_bound is not None and val < lower_bound) or (\n \ + \ upper_bound is not None and val > upper_bound\n ):\n\ + \ raise ValueError(f'{msg}, but got {val} in {int_values}.')\n\ + \ else:\n raise ValueError(\n f'Unexpected value spec for\ + \ {parameter_spec[\"parameter_id\"]}. {msg}.'\n )\n\n def _validate_categorical_spec(\n\ + \ parameter_spec: Dict[str, Any], valid_categories: Optional[List[str]]\n\ + \ ) -> None:\n msg = (\n f'Parameter spec for {parameter_spec[\"\ + parameter_id\"]} must contain '\n 'categorical_value_spec with unique\ + \ categories from '\n f'{valid_categories}'\n )\n if 'categorical_value_spec'\ + \ in parameter_spec:\n if valid_categories is None:\n # Any\ + \ category is valid.\n return\n categorical_values = parameter_spec['categorical_value_spec']['values']\n\ + \ valid_categorical_values = set(categorical_values).intersection(\n\ + \ set(valid_categories)\n )\n if len(valid_categorical_values)\ + \ != len(categorical_values):\n raise ValueError(f'{msg}, but got\ + \ {categorical_values}.')\n else:\n raise ValueError(\n \ + \ f'Unexpected value spec for {parameter_spec[\"parameter_id\"]}. {msg}.'\n\ + \ )\n\n _XGBOOST_PARAM_VALIDATIONS = {\n 'num_boost_round': functools.partial(\n\ + \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ + \ 'early_stopping_rounds': functools.partial(\n _validate_int_spec,\ + \ lower_bound=1, upper_bound=None\n ),\n 'base_score': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ + \ 'booster': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['gbtree', 'gblinear', 'dart'],\n ),\n\ + \ 'eta': functools.partial(\n _validate_float_spec, lower_bound=0,\ + \ upper_bound=1\n ),\n 'gamma': functools.partial(\n \ + \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n ),\n\ + \ 'max_depth': functools.partial(\n _validate_int_spec, lower_bound=0,\ + \ upper_bound=None\n ),\n 'min_child_weight': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=math.inf\n \ + \ ),\n 'max_delta_step': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=math.inf\n ),\n 'subsample': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ + \ ),\n 'colsample_bytree': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0.0001, upper_bound=1\n ),\n 'colsample_bylevel':\ + \ functools.partial(\n _validate_float_spec, lower_bound=0.0001,\ + \ upper_bound=1\n ),\n 'colsample_bynode': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0.0001, upper_bound=1\n \ + \ ),\n 'lambda': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'alpha': functools.partial(\n\ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n\ + \ 'tree_method': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['auto', 'exact', 'approx', 'hist', 'gpu_hist'],\n\ + \ ),\n 'scale_pos_weight': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=math.inf\n ),\n 'updater': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=None\n ),\n\ + \ 'refresh_leaf': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'process_type': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=['default', 'updated']\n\ + \ ),\n 'grow_policy': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['depthwise', 'lossguide'],\n ),\n \ + \ 'sampling_method': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['uniform', 'gradient_based'],\n ),\n \ + \ 'monotone_constraints': functools.partial(\n _validate_categorical_spec,\ + \ valid_categories=None\n ),\n 'interaction_constraints': functools.partial(\n\ + \ _validate_categorical_spec, valid_categories=None\n ),\n\ + \ 'sample_type': functools.partial(\n _validate_categorical_spec,\ + \ valid_categories=['uniform', 'weighted']\n ),\n 'normalize_type':\ + \ functools.partial(\n _validate_categorical_spec, valid_categories=['tree',\ + \ 'forest']\n ),\n 'rate_drop': functools.partial(\n \ + \ _validate_float_spec, lower_bound=0, upper_bound=1\n ),\n 'one_drop':\ + \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=1\n\ + \ ),\n 'skip_drop': functools.partial(\n _validate_float_spec,\ + \ lower_bound=0, upper_bound=1\n ),\n 'num_parallel_tree': functools.partial(\n\ + \ _validate_int_spec, lower_bound=1, upper_bound=None\n ),\n\ + \ 'feature_selector': functools.partial(\n _validate_categorical_spec,\n\ + \ valid_categories=['cyclic', 'shuffle', 'random', 'greedy', 'thrifty'],\n\ + \ ),\n 'top_k': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=None\n ),\n 'max_cat_to_onehot':\ + \ functools.partial(\n _validate_int_spec, lower_bound=0, upper_bound=None\n\ + \ ),\n 'max_leaves': functools.partial(\n _validate_int_spec,\ + \ lower_bound=0, upper_bound=None\n ),\n 'max_bin': functools.partial(\n\ + \ _validate_int_spec, lower_bound=0, upper_bound=None\n ),\n\ + \ }\n\n def _add_booster_param(\n override_booster_params: Dict[str,\ + \ Any],\n param: Dict[str, Any],\n override_boosters: List[str],\n\ + \ ) -> None:\n # Validate parameter spec.\n param_id = param['parameter_spec']['parameter_id']\n\ + \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param_id]\n validation_func(param['parameter_spec'])\n\ + \ # Add parameter spec for valid boosters.\n parent_boosters = param['parent_categorical_values']['values']\n\ + \ all_boosters = set(_XGBOOST_BOOSTER_PARAMETERS_MAP[param_id]).intersection(\n\ + \ set(override_boosters)\n )\n valid_parent_boosters = set(parent_boosters).intersection(all_boosters)\n\ + \ if valid_parent_boosters:\n override_booster_params[param_id]\ + \ = {}\n for booster in valid_parent_boosters:\n override_booster_params[param_id][booster]\ + \ = param['parameter_spec']\n\n def _get_booster_param_specs(\n override_booster_params:\ + \ Dict[str, Any],\n param_id: str,\n default_param_spec: Optional[Dict[str,\ + \ Any]],\n ) -> List[Dict[str, Any]]:\n if param_id not in override_booster_params:\n\ + \ if default_param_spec is None:\n return []\n return [default_param_spec]\n\ + \ override_param_specs = override_booster_params[param_id]\n if default_param_spec\ + \ is not None:\n for booster in default_param_spec['parent_categorical_values']['values']:\n\ + \ if booster not in override_param_specs:\n override_param_specs[booster]\ + \ = default_param_spec['parameter_spec']\n param_specs = []\n for\ + \ booster, override_spec in override_param_specs.items():\n included\ + \ = False\n for spec in param_specs:\n if spec['parameter_spec']\ + \ == override_spec:\n spec['parent_categorical_values']['values'].append(booster)\n\ + \ included = True\n break\n if not included:\n \ + \ param_specs.append({\n 'parameter_spec': override_spec,\n\ + \ 'parent_categorical_values': {'values': [booster]},\n \ + \ })\n return param_specs\n\n default_params = [\n {\n \ + \ 'parameter_id': 'num_boost_round',\n 'discrete_value_spec':\ + \ {'values': [1, 5, 10, 15, 20]},\n },\n {\n 'parameter_id':\ + \ 'early_stopping_rounds',\n 'discrete_value_spec': {'values':\ + \ [3, 5, 10]},\n },\n {'parameter_id': 'base_score', 'discrete_value_spec':\ + \ {'values': [0.5]}},\n {\n 'parameter_id': 'booster',\n \ + \ 'categorical_value_spec': {'values': ['gbtree', 'gblinear', 'dart']},\n\ + \ 'conditional_parameter_specs': [\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'eta',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LOG_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'gamma',\n\ + \ 'discrete_value_spec': {\n \ + \ 'values': [0, 10, 50, 100, 500, 1000]\n },\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'max_depth',\n\ + \ 'integer_value_spec': {'min_value': 6, 'max_value':\ + \ 10},\n 'scale_type': 'UNIT_LINEAR_SCALE',\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'min_child_weight',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0,\n 'max_value': 10.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'max_delta_step',\n\ + \ 'discrete_value_spec': {\n \ + \ 'values': [0.0, 1.0, 3.0, 5.0, 7.0, 9.0]\n },\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'subsample',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bytree',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bylevel',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'colsample_bynode',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_LINEAR_SCALE',\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'lambda',\n\ + \ 'double_value_spec': {\n \ + \ 'min_value': 0.0001,\n 'max_value': 1.0,\n \ + \ },\n 'scale_type': 'UNIT_REVERSE_LOG_SCALE',\n\ + \ },\n 'parent_categorical_values': {\n\ + \ 'values': ['gbtree', 'dart', 'gblinear']\n \ + \ },\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'alpha',\n \ + \ 'double_value_spec': {\n 'min_value': 0.0001,\n\ + \ 'max_value': 1.0,\n },\n\ + \ 'scale_type': 'UNIT_LOG_SCALE',\n \ + \ },\n 'parent_categorical_values': {\n \ + \ 'values': ['gbtree', 'dart', 'gblinear']\n },\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'tree_method',\n \ + \ 'categorical_value_spec': {'values': ['auto']},\n \ + \ },\n 'parent_categorical_values': {'values': ['gbtree',\ + \ 'dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'scale_pos_weight',\n \ + \ 'discrete_value_spec': {'values': [1.0]},\n \ + \ },\n 'parent_categorical_values': {'values': ['gbtree',\ + \ 'dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'refresh_leaf',\n \ + \ 'discrete_value_spec': {'values': [1]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'process_type',\n \ + \ 'categorical_value_spec': {'values': ['default']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'grow_policy',\n\ + \ 'categorical_value_spec': {'values': ['depthwise']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'sampling_method',\n\ + \ 'categorical_value_spec': {'values': ['uniform']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gbtree', 'dart']},\n },\n {\n \ + \ 'parameter_spec': {\n 'parameter_id': 'sample_type',\n\ + \ 'categorical_value_spec': {'values': ['uniform']},\n\ + \ },\n 'parent_categorical_values': {'values':\ + \ ['dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'normalize_type',\n \ + \ 'categorical_value_spec': {'values': ['tree']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['dart']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'rate_drop',\n \ + \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'one_drop',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'skip_drop',\n \ + \ 'discrete_value_spec': {'values': [0.0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'num_parallel_tree',\n \ + \ 'discrete_value_spec': {'values': [1]},\n \ + \ },\n 'parent_categorical_values': {'values': ['gblinear']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'feature_selector',\n \ + \ 'categorical_value_spec': {'values': ['cyclic']},\n \ + \ },\n 'parent_categorical_values': {'values':\ + \ ['gblinear']},\n },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'top_k',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n \ + \ 'parent_categorical_values': {'values': ['gblinear']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'max_leaves',\n \ + \ 'discrete_value_spec': {'values': [0]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n {\n 'parameter_spec':\ + \ {\n 'parameter_id': 'max_bin',\n \ + \ 'discrete_value_spec': {'values': [256]},\n },\n\ + \ 'parent_categorical_values': {'values': ['gbtree', 'dart']},\n\ + \ },\n ],\n },\n ]\n\n # Construct dictionaries\ + \ so that parameter specs are accessible by id.\n override_params = {}\n\ + \ override_booster_params = {}\n for param in study_spec_parameters_override:\n\ + \ # Validate a study spec before adding to the override_params dictionary.\n\ + \ validation_func = _XGBOOST_PARAM_VALIDATIONS[param['parameter_id']]\n\ + \ validation_func(param)\n override_params[param['parameter_id']]\ + \ = param\n\n # Add any param that does not have a default parameter\ + \ spec.\n if (\n param['parameter_id'] == 'max_cat_to_onehot'\n\ + \ and param['parameter_id'] not in default_params\n ):\n \ + \ default_params.append(param)\n if (\n param['parameter_id']\ + \ == 'booster'\n and 'conditional_parameter_specs' in param\n \ + \ ):\n for booster_param in param['conditional_parameter_specs']:\n\ + \ _add_booster_param(\n override_booster_params,\n \ + \ booster_param,\n override_boosters=param['categorical_value_spec']['values'],\n\ + \ )\n\n # Validate override params according to XGBoost param dependencies.\n\ + \ tree_method = override_booster_params.get('tree_method', None)\n if\ + \ tree_method is not None:\n for booster, tree_method_spec in tree_method.items():\n\ + \ if tree_method_spec['categorical_value_spec']['values'] != ['exact']:\n\ + \ continue\n # TODO(b/277777886): exact requires non-zero max_depth\ + \ value.\n # The below code is no longer necessary after raising min_value\ + \ to 6 in\n # the default spec. In the long run, we need to decide\ + \ the best\n # approach for max_depth. Keeping the code for now in\ + \ case the approach\n # involves overriding max_depth for 'exact' tree_method.\n\ + \ max_depth_spec = {\n 'parameter_id': 'max_depth',\n \ + \ 'integer_value_spec': {'min_value': 6, 'max_value': 10},\n \ + \ 'scale_type': 'UNIT_LINEAR_SCALE',\n }\n override_booster_params['max_depth']\ + \ = override_booster_params.get(\n 'max_depth', {booster: max_depth_spec}\n\ + \ )\n override_booster_params['max_depth'][booster] = override_booster_params[\n\ + \ 'max_depth'\n ].get(booster, max_depth_spec)\n try:\n\ + \ _validate_int_spec(\n override_booster_params['max_depth'][booster],\n\ + \ lower_bound=1,\n upper_bound=None,\n )\n\ + \ except ValueError as e:\n raise ValueError(\n 'max_depth\ + \ cannot be 0 (or < 0) when tree method is fixed to be '\n '\"\ + exact\".'\n ) from e\n\n # Construct the modified study specs study_spec_parameters.\n\ + \ study_spec_parameters = []\n for default_param in default_params:\n\ + \ override_param = override_params.get(\n default_param['parameter_id'],\ + \ default_param\n )\n study_spec_parameters.append(override_param)\n\ + \ # Override conditional parameters for booster.\n if default_param['parameter_id']\ + \ == 'booster':\n booster_param_specs = []\n override_booster_vals\ + \ = override_param['categorical_value_spec']['values']\n\n for booster_param\ + \ in default_param['conditional_parameter_specs']:\n override_parent_boosters\ + \ = set(\n booster_param['parent_categorical_values']['values']\n\ + \ ).intersection(override_booster_vals)\n if not override_parent_boosters:\n\ + \ # No need to include a booster param if no relevant booster will\n\ + \ # be used.\n continue\n # Update default booster\ + \ param boosters to exclude irrelevant boosters.\n booster_param['parent_categorical_values']['values']\ + \ = list(\n override_parent_boosters\n )\n booster_param_specs.extend(\n\ + \ _get_booster_param_specs(\n override_booster_params,\n\ + \ param_id=booster_param['parameter_spec']['parameter_id'],\n\ + \ default_param_spec=booster_param,\n )\n \ + \ )\n\n for booster_param_name in _XGBOOST_NO_DEFAULT_BOOSTER_PARAMS:\n\ + \ booster_param_specs.extend(\n _get_booster_param_specs(\n\ + \ override_booster_params,\n param_id=booster_param_name,\n\ + \ default_param_spec=None,\n )\n )\n\n\ + \ # booster_param_specs combines the overriding booster parameter\n\ + \ # specs from user input and the default booster parameter specs.\n\ + \ override_param['conditional_parameter_specs'] = booster_param_specs\n\ + \n return study_spec_parameters\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-xgboost-hyperparameter-tuning-job: + container: + args: + - --type + - HyperparameterTuningJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"xgboost-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"study_spec\": {\"metrics\": [{\"metric_id\": \"", "{{$.inputs.parameters[''study_spec_metric_id'']}}", + "\", \"goal\": \"", "{{$.inputs.parameters[''study_spec_metric_goal'']}}", + "\"}], \"parameters\": ", "{{$.inputs.parameters[''study_spec_parameters_override'']}}", + ", \"algorithm\": \"", "{{$.inputs.parameters[''study_spec_algorithm'']}}", + "\", \"measurement_selection_type\": \"", "{{$.inputs.parameters[''study_spec_measurement_selection_type'']}}", + "\"}, \"max_trial_count\": ", "{{$.inputs.parameters[''max_trial_count'']}}", + ", \"parallel_trial_count\": ", "{{$.inputs.parameters[''parallel_trial_count'']}}", + ", \"max_failed_trial_count\": ", "{{$.inputs.parameters[''max_failed_trial_count'']}}", + ", \"trial_job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", + "}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.hyperparameter_tuning_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The XGBoost HyperparameterTuningJob pipeline. + name: automl-tabular-xgboost-hyperparameter-tuning-job +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_default_eval_metric: + componentInputParameter: disable_default_eval_metric + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eval_metric: + componentInputParameter: eval_metric + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_failed_trial_count: + componentInputParameter: max_failed_trial_count + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--max_trial_count: + componentInputParameter: max_trial_count + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--objective: + componentInputParameter: objective + pipelinechannel--parallel_trial_count: + componentInputParameter: parallel_trial_count + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--seed_per_iteration: + componentInputParameter: seed_per_iteration + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_algorithm: + componentInputParameter: study_spec_algorithm + pipelinechannel--study_spec_measurement_selection_type: + componentInputParameter: study_spec_measurement_selection_type + pipelinechannel--study_spec_metric_goal: + componentInputParameter: study_spec_metric_goal + pipelinechannel--study_spec_metric_id: + componentInputParameter: study_spec_metric_id + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--training_accelerator_count: + componentInputParameter: training_accelerator_count + pipelinechannel--training_accelerator_type: + componentInputParameter: training_accelerator_type + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--training_machine_type: + componentInputParameter: training_machine_type + pipelinechannel--training_total_replica_count: + componentInputParameter: training_total_replica_count + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: The BigQuery data source. + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: The CSV data source. + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_failed_trial_count: + defaultValue: 0.0 + description: 'The number of failed trials that need to be seen + + before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides + + how many trials must fail before the whole job fails.' + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + max_trial_count: + description: The desired total number of trials. + parameterType: NUMBER_INTEGER + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + objective: + description: 'Specifies the learning task and the learning objective. Must + be + + one of [reg:squarederror, reg:squaredlogerror, + + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + + binary:logistic, multi:softprob].' + parameterType: STRING + parallel_trial_count: + description: The desired number of trials to run in parallel. + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + study_spec_algorithm: + defaultValue: ALGORITHM_UNSPECIFIED + description: 'The search algorithm specified for the study. One of + + ''ALGORITHM_UNSPECIFIED'', ''GRID_SEARCH'', or ''RANDOM_SEARCH''.' + isOptional: true + parameterType: STRING + study_spec_measurement_selection_type: + defaultValue: BEST_MEASUREMENT + description: ' Which measurement to use if/when the + + service automatically selects the final measurement from previously + + reported intermediate measurements. One of "BEST_MEASUREMENT" or + + "LAST_MEASUREMENT".' + isOptional: true + parameterType: STRING + study_spec_metric_goal: + description: 'Optimization goal of the metric, possible values: + + "MAXIMIZE", "MINIMIZE".' + parameterType: STRING + study_spec_metric_id: + description: 'Metric to optimize. For options, please look under + + ''eval_metrics'' at + + https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters.' + parameterType: STRING + study_spec_parameters_override: + description: 'List of dictionaries representing parameters + + to optimize. The dictionary key is the parameter_id, which is passed to + + training job as a command line argument, and the dictionary value is the + + parameter specification of the metric.' + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + training_accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + training_machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + training_total_replica_count: + defaultValue: 1.0 + description: Number of workers. + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py new file mode 100644 index 0000000000..e03036c353 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py @@ -0,0 +1,77 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML XGBoost Trainer component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def xgboost_trainer( + project: str, + location: str, + worker_pool_specs: list, + gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Trains an XGBoost model using Vertex CustomJob API. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + worker_pool_specs: The worker pool specs. + encryption_spec_key_name: The KMS key name. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "xgboost-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + '"}, "job_spec": {"worker_pool_specs": ', + worker_pool_specs, + '}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml new file mode 100644 index 0000000000..0fc86f8c67 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -0,0 +1,4396 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-xgboost-trainer +# Description: The XGBoost training pipeline. +# Inputs: +# base_score: float [Default: 0.5] +# bigquery_staging_full_dataset_id: str [Default: ''] +# booster: str [Default: 'gbtree'] +# colsample_bylevel: float [Default: 1.0] +# colsample_bynode: float [Default: 1.0] +# colsample_bytree: float [Default: 1.0] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# disable_default_eval_metric: int [Default: 0.0] +# early_stopping_rounds: int [Default: -1.0] +# encryption_spec_key_name: str [Default: ''] +# eta: float [Default: 0.3] +# eval_metric: str [Default: ''] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# feature_selection_algorithm: str [Default: 'AMI'] +# feature_selector: str [Default: 'cyclic'] +# gamma: float [Default: 0.0] +# grow_policy: str [Default: 'depthwise'] +# huber_slope: float [Default: 1.0] +# interaction_constraints: str [Default: ''] +# location: str +# max_bin: int [Default: 256.0] +# max_cat_to_onehot: int [Default: -1.0] +# max_delta_step: float [Default: 0.0] +# max_depth: int [Default: 6.0] +# max_leaves: int [Default: 0.0] +# max_selected_features: int [Default: -1.0] +# min_child_weight: float [Default: 1.0] +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# monotone_constraints: str [Default: ''] +# normalize_type: str [Default: 'tree'] +# num_boost_round: int [Default: 10.0] +# num_parallel_tree: int [Default: 1.0] +# objective: str +# one_drop: int [Default: 0.0] +# predefined_split_key: str [Default: ''] +# process_type: str [Default: 'default'] +# project: str +# rate_drop: float [Default: 0.0] +# refresh_leaf: int [Default: 1.0] +# reg_alpha: float [Default: 0.0] +# reg_lambda: float [Default: 1.0] +# root_dir: str +# run_evaluation: bool [Default: True] +# run_feature_selection: bool [Default: False] +# sample_type: str [Default: 'uniform'] +# sampling_method: str [Default: 'uniform'] +# scale_pos_weight: float [Default: 1.0] +# seed: int [Default: 0.0] +# seed_per_iteration: bool [Default: False] +# skip_drop: float [Default: 0.0] +# stratified_split_key: str [Default: ''] +# subsample: float [Default: 1.0] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# tf_custom_transformation_definitions: list +# tf_transformations_path: str [Default: ''] +# top_k: int [Default: 0.0] +# training_accelerator_count: int [Default: 0.0] +# training_accelerator_type: str [Default: ''] +# training_fraction: float [Default: -1.0] +# training_machine_type: str [Default: 'c2-standard-16'] +# training_total_replica_count: int [Default: 1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# tree_method: str [Default: 'auto'] +# tweedie_variance_power: float [Default: 1.5] +# updater: str [Default: ''] +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--get-prediction-type-for-xgboost-Output + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + inputDefinitions: + artifacts: + pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--get-prediction-type-for-xgboost-Output: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + dependentTasks: + - xgboost-trainer + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bool-identity + - feature-transform-engine + - generate-xgboost-trainer-worker-pool-specs + - get-prediction-type-for-xgboost + inputs: + artifacts: + pipelinechannel--generate-xgboost-trainer-worker-pool-specs-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: generate-xgboost-trainer-worker-pool-specs + parameters: + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--get-prediction-type-for-xgboost-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - get-prediction-type-for-xgboost + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + location: + componentInputParameter: pipelinechannel--location + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + model_type: + runtimeValue: + constant: boosted_trees + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + generate-xgboost-trainer-worker-pool-specs: + cachingOptions: + enableCache: true + componentRef: + name: comp-generate-xgboost-trainer-worker-pool-specs + dependentTasks: + - feature-transform-engine + - split-materialized-data + - training-configurator-and-validator + inputs: + artifacts: + instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + training_schema_uri: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + parameters: + accelerator_count: + componentInputParameter: pipelinechannel--training_accelerator_count + accelerator_type: + componentInputParameter: pipelinechannel--training_accelerator_type + base_score: + componentInputParameter: pipelinechannel--base_score + booster: + componentInputParameter: pipelinechannel--booster + colsample_bylevel: + componentInputParameter: pipelinechannel--colsample_bylevel + colsample_bynode: + componentInputParameter: pipelinechannel--colsample_bynode + colsample_bytree: + componentInputParameter: pipelinechannel--colsample_bytree + disable_default_eval_metric: + componentInputParameter: pipelinechannel--disable_default_eval_metric + early_stopping_rounds: + componentInputParameter: pipelinechannel--early_stopping_rounds + eta: + componentInputParameter: pipelinechannel--eta + eval_metric: + componentInputParameter: pipelinechannel--eval_metric + feature_selector: + componentInputParameter: pipelinechannel--feature_selector + gamma: + componentInputParameter: pipelinechannel--gamma + grow_policy: + componentInputParameter: pipelinechannel--grow_policy + huber_slope: + componentInputParameter: pipelinechannel--huber_slope + interaction_constraints: + componentInputParameter: pipelinechannel--interaction_constraints + machine_type: + componentInputParameter: pipelinechannel--training_machine_type + max_bin: + componentInputParameter: pipelinechannel--max_bin + max_cat_to_onehot: + componentInputParameter: pipelinechannel--max_cat_to_onehot + max_delta_step: + componentInputParameter: pipelinechannel--max_delta_step + max_depth: + componentInputParameter: pipelinechannel--max_depth + max_leaves: + componentInputParameter: pipelinechannel--max_leaves + min_child_weight: + componentInputParameter: pipelinechannel--min_child_weight + monotone_constraints: + componentInputParameter: pipelinechannel--monotone_constraints + normalize_type: + componentInputParameter: pipelinechannel--normalize_type + num_boost_round: + componentInputParameter: pipelinechannel--num_boost_round + num_parallel_tree: + componentInputParameter: pipelinechannel--num_parallel_tree + objective: + componentInputParameter: pipelinechannel--objective + one_drop: + componentInputParameter: pipelinechannel--one_drop + process_type: + componentInputParameter: pipelinechannel--process_type + rate_drop: + componentInputParameter: pipelinechannel--rate_drop + refresh_leaf: + componentInputParameter: pipelinechannel--refresh_leaf + reg_alpha: + componentInputParameter: pipelinechannel--reg_alpha + reg_lambda: + componentInputParameter: pipelinechannel--reg_lambda + sample_type: + componentInputParameter: pipelinechannel--sample_type + sampling_method: + componentInputParameter: pipelinechannel--sampling_method + scale_pos_weight: + componentInputParameter: pipelinechannel--scale_pos_weight + seed: + componentInputParameter: pipelinechannel--seed + seed_per_iteration: + componentInputParameter: pipelinechannel--seed_per_iteration + skip_drop: + componentInputParameter: pipelinechannel--skip_drop + subsample: + componentInputParameter: pipelinechannel--subsample + target_column: + componentInputParameter: pipelinechannel--target_column + top_k: + componentInputParameter: pipelinechannel--top_k + total_replica_count: + componentInputParameter: pipelinechannel--training_total_replica_count + tree_method: + componentInputParameter: pipelinechannel--tree_method + tweedie_variance_power: + componentInputParameter: pipelinechannel--tweedie_variance_power + updater: + componentInputParameter: pipelinechannel--updater + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: generate-xgboost-trainer-worker-pool-specs + get-prediction-type-for-xgboost: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-prediction-type-for-xgboost + inputs: + parameters: + objective: + componentInputParameter: pipelinechannel--objective + taskInfo: + name: get-prediction-type-for-xgboost + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - generate-xgboost-trainer-worker-pool-specs + - xgboost-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: generate-xgboost-trainer-worker-pool-specs + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + - get-prediction-type-for-xgboost + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + prediction_type: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-prediction-type-for-xgboost + run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + target_column: + componentInputParameter: pipelinechannel--target_column + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: training-configurator-and-validator + xgboost-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-xgboost-trainer + dependentTasks: + - generate-xgboost-trainer-worker-pool-specs + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + worker_pool_specs: + taskOutputParameter: + outputParameterKey: worker_pool_specs + producerTask: generate-xgboost-trainer-worker-pool-specs + taskInfo: + name: xgboost-trainer + inputDefinitions: + parameters: + pipelinechannel--base_score: + parameterType: NUMBER_DOUBLE + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--booster: + parameterType: STRING + pipelinechannel--colsample_bylevel: + parameterType: NUMBER_DOUBLE + pipelinechannel--colsample_bynode: + parameterType: NUMBER_DOUBLE + pipelinechannel--colsample_bytree: + parameterType: NUMBER_DOUBLE + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_default_eval_metric: + parameterType: NUMBER_INTEGER + pipelinechannel--early_stopping_rounds: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--eta: + parameterType: NUMBER_DOUBLE + pipelinechannel--eval_metric: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--feature_selector: + parameterType: STRING + pipelinechannel--gamma: + parameterType: NUMBER_DOUBLE + pipelinechannel--grow_policy: + parameterType: STRING + pipelinechannel--huber_slope: + parameterType: NUMBER_DOUBLE + pipelinechannel--interaction_constraints: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_bin: + parameterType: NUMBER_INTEGER + pipelinechannel--max_cat_to_onehot: + parameterType: NUMBER_INTEGER + pipelinechannel--max_delta_step: + parameterType: NUMBER_DOUBLE + pipelinechannel--max_depth: + parameterType: NUMBER_INTEGER + pipelinechannel--max_leaves: + parameterType: NUMBER_INTEGER + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--min_child_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--monotone_constraints: + parameterType: STRING + pipelinechannel--normalize_type: + parameterType: STRING + pipelinechannel--num_boost_round: + parameterType: NUMBER_INTEGER + pipelinechannel--num_parallel_tree: + parameterType: NUMBER_INTEGER + pipelinechannel--objective: + parameterType: STRING + pipelinechannel--one_drop: + parameterType: NUMBER_INTEGER + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--process_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--rate_drop: + parameterType: NUMBER_DOUBLE + pipelinechannel--refresh_leaf: + parameterType: NUMBER_INTEGER + pipelinechannel--reg_alpha: + parameterType: NUMBER_DOUBLE + pipelinechannel--reg_lambda: + parameterType: NUMBER_DOUBLE + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--sample_type: + parameterType: STRING + pipelinechannel--sampling_method: + parameterType: STRING + pipelinechannel--scale_pos_weight: + parameterType: NUMBER_DOUBLE + pipelinechannel--seed: + parameterType: NUMBER_INTEGER + pipelinechannel--seed_per_iteration: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--skip_drop: + parameterType: NUMBER_DOUBLE + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--subsample: + parameterType: NUMBER_DOUBLE + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--top_k: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_count: + parameterType: NUMBER_INTEGER + pipelinechannel--training_accelerator_type: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_machine_type: + parameterType: STRING + pipelinechannel--training_total_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--tree_method: + parameterType: STRING + pipelinechannel--tweedie_variance_power: + parameterType: NUMBER_DOUBLE + pipelinechannel--updater: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-generate-xgboost-trainer-worker-pool-specs: + executorLabel: exec-generate-xgboost-trainer-worker-pool-specs + inputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path to JSON file for baseline values. + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized validation + + split.' + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Required. The path to the materialized train + + split.' + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to the training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Required. The path to transform output. + parameters: + accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + base_score: + defaultValue: 0.5 + description: The initial prediction score of all instances, global bias. + isOptional: true + parameterType: NUMBER_DOUBLE + booster: + defaultValue: gbtree + description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree + and + + dart use tree based model while gblinear uses linear function.' + isOptional: true + parameterType: STRING + colsample_bylevel: + defaultValue: 1.0 + description: Subsample ratio of columns for each split, in each level. + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bynode: + defaultValue: 1.0 + description: Subsample ratio of columns for each node (split). + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bytree: + defaultValue: 1.0 + description: Subsample ratio of columns when constructing each tree. + isOptional: true + parameterType: NUMBER_DOUBLE + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + early_stopping_rounds: + defaultValue: -1.0 + description: 'Activates early stopping. Validation error needs to + + decrease at least every early_stopping_rounds round(s) to continue + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + eta: + defaultValue: 0.3 + description: Learning rate. + isOptional: true + parameterType: NUMBER_DOUBLE + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + feature_selector: + defaultValue: cyclic + description: '[linear booster only] Feature selection and ordering + + method.' + isOptional: true + parameterType: STRING + gamma: + defaultValue: 0.0 + description: 'Minimum loss reduction required to make a further partition + on a leaf + + node of the tree.' + isOptional: true + parameterType: NUMBER_DOUBLE + grow_policy: + defaultValue: depthwise + description: 'Controls a way new nodes are added to the tree. Only supported + + if tree_method is hist. Choices:["depthwise", "lossguide"]' + isOptional: true + parameterType: STRING + huber_slope: + defaultValue: 1.0 + description: 'A parameter used for Pseudo-Huber loss to define the delta + + term.' + isOptional: true + parameterType: NUMBER_DOUBLE + interaction_constraints: + defaultValue: '' + description: 'Constraints for interaction representing permitted + + interactions.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + max_bin: + defaultValue: 256.0 + description: Maximum number of discrete bins to bucket continuous features. + isOptional: true + parameterType: NUMBER_INTEGER + max_cat_to_onehot: + defaultValue: -1.0 + description: 'A threshold for deciding whether XGBoost should use + + one-hot encoding based split for categorical data.' + isOptional: true + parameterType: NUMBER_INTEGER + max_delta_step: + defaultValue: 0.0 + description: 'Maximum delta step we allow each tree''s weight estimation + to + + be.' + isOptional: true + parameterType: NUMBER_DOUBLE + max_depth: + defaultValue: 6.0 + description: Maximum depth of a tree. + isOptional: true + parameterType: NUMBER_INTEGER + max_leaves: + defaultValue: 0.0 + description: Maximum number of nodes to be added. + isOptional: true + parameterType: NUMBER_INTEGER + min_child_weight: + defaultValue: 1.0 + description: Minimum sum of instance weight(hessian) needed in a child. + isOptional: true + parameterType: NUMBER_DOUBLE + monotone_constraints: + defaultValue: '' + description: Constraint of variable monotonicity. + isOptional: true + parameterType: STRING + normalize_type: + defaultValue: tree + description: '[dart booster only] Type of normalization algorithm, + + Choices:["tree", "forest"]' + isOptional: true + parameterType: STRING + num_boost_round: + defaultValue: 10.0 + description: Number of boosting iterations. + isOptional: true + parameterType: NUMBER_INTEGER + num_parallel_tree: + defaultValue: 1.0 + description: 'Number of parallel trees constructed during each + + iteration. This option is used to support boosted random forest.' + isOptional: true + parameterType: NUMBER_INTEGER + objective: + description: Required. Specifies the learning task and the learning objective. + parameterType: STRING + one_drop: + defaultValue: 0.0 + description: '[dart booster only] When this flag is enabled, at least one + tree + + is always dropped during the dropout (allows Binomial-plus-one or + + epsilon-dropout from the original DART paper).' + isOptional: true + parameterType: NUMBER_INTEGER + process_type: + defaultValue: default + description: 'A type of boosting process to run. Choices:["default", + + "update"]' + isOptional: true + parameterType: STRING + rate_drop: + defaultValue: 0.0 + description: '[dart booster only] Dropout rate.''' + isOptional: true + parameterType: NUMBER_DOUBLE + refresh_leaf: + defaultValue: 1.0 + description: 'Refresh updater plugin. Update tree leaf and nodes''s stats + if + + True. When it is False, only node stats are updated.' + isOptional: true + parameterType: NUMBER_INTEGER + reg_alpha: + defaultValue: 0.0 + description: L1 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + reg_lambda: + defaultValue: 1.0 + description: L2 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + sample_type: + defaultValue: uniform + description: '[dart booster only] Type of sampling algorithm. + + Choices:["uniform", "weighted"]' + isOptional: true + parameterType: STRING + sampling_method: + defaultValue: uniform + description: The method to use to sample the training instances. + isOptional: true + parameterType: STRING + scale_pos_weight: + defaultValue: 1.0 + description: Control the balance of positive and negative weights. + isOptional: true + parameterType: NUMBER_DOUBLE + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + skip_drop: + defaultValue: 0.0 + description: '[dart booster only] Probability of skipping the dropout procedure + + during a boosting iteration.' + isOptional: true + parameterType: NUMBER_DOUBLE + subsample: + defaultValue: 1.0 + description: Subsample ratio of the training instance. + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: Required. Target column name. + parameterType: STRING + top_k: + defaultValue: 0.0 + description: 'The number of top features to select in greedy and thrifty + feature + + selector. The value of 0 means using all the features.' + isOptional: true + parameterType: NUMBER_INTEGER + total_replica_count: + description: Number of workers. + parameterType: NUMBER_INTEGER + tree_method: + defaultValue: auto + description: 'The tree construction algorithm used in XGBoost. Choices: + + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' + isOptional: true + parameterType: STRING + tweedie_variance_power: + defaultValue: 1.5 + description: 'Parameter that controls the variance of the Tweedie + + distribution.' + isOptional: true + parameterType: NUMBER_DOUBLE + updater: + defaultValue: '' + description: 'A comma separated string defining the sequence of tree updaters + to + + run.' + isOptional: true + parameterType: STRING + weight_column: + defaultValue: '' + description: Weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + job_dir: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + unmanaged_container_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + worker_pool_specs: + parameterType: LIST + comp-get-prediction-type-for-xgboost: + executorLabel: exec-get-prediction-type-for-xgboost + inputDefinitions: + parameters: + objective: + description: The XGBoost training objective + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-split-materialized-data: + executorLabel: exec-split-materialized-data + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + defaultValue: '' + description: 'Time series idenfier column. Used by + + forecasting only.' + isOptional: true + parameterType: STRING + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + comp-xgboost-trainer: + executorLabel: exec-xgboost-trainer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + worker_pool_specs: + description: The worker pool specs. + parameterType: LIST + outputDefinitions: + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING +deploymentSpec: + executors: + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-generate-xgboost-trainer-worker-pool-specs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _generate_xgboost_trainer_worker_pool_specs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _generate_xgboost_trainer_worker_pool_specs(\n total_replica_count:\ + \ int,\n target_column: str,\n objective: str,\n materialized_train_split:\ + \ dsl.InputPath('MaterializedSplit'),\n materialized_eval_split: dsl.InputPath('MaterializedSplit'),\n\ + \ transform_output: dsl.InputPath('TransformOutput'),\n training_schema_uri:\ + \ dsl.InputPath('DatasetSchema'),\n instance_baseline: dsl.InputPath('AutoMLTabularInstanceBaseline'),\n\ + \ job_dir: dsl.OutputPath('JobDir'),\n unmanaged_container_model:\ + \ dsl.Output[dsl.Artifact],\n machine_type: str = 'c2-standard-16',\n\ + \ accelerator_type: str = '',\n accelerator_count: int = 0,\n weight_column:\ + \ str = '',\n eval_metric: str = '',\n num_boost_round: int = 10,\n\ + \ early_stopping_rounds: int = -1,\n base_score: float = 0.5,\n \ + \ disable_default_eval_metric: int = 0,\n seed: int = 0,\n seed_per_iteration:\ + \ bool = False,\n booster: str = 'gbtree',\n eta: float = 0.3,\n \ + \ gamma: float = 0.0,\n max_depth: int = 6,\n min_child_weight:\ + \ float = 1.0,\n max_delta_step: float = 0.0,\n subsample: float =\ + \ 1.0,\n colsample_bytree: float = 1.0,\n colsample_bylevel: float\ + \ = 1.0,\n colsample_bynode: float = 1.0,\n reg_lambda: float = 1.0,\n\ + \ reg_alpha: float = 0.0,\n tree_method: str = 'auto',\n scale_pos_weight:\ + \ float = 1.0,\n updater: str = '',\n refresh_leaf: int = 1,\n \ + \ process_type: str = 'default',\n grow_policy: str = 'depthwise',\n\ + \ sampling_method: str = 'uniform',\n monotone_constraints: str =\ + \ '',\n interaction_constraints: str = '',\n sample_type: str = 'uniform',\n\ + \ normalize_type: str = 'tree',\n rate_drop: float = 0.0,\n one_drop:\ + \ int = 0,\n skip_drop: float = 0.0,\n num_parallel_tree: int = 1,\n\ + \ feature_selector: str = 'cyclic',\n top_k: int = 0,\n max_cat_to_onehot:\ + \ int = -1,\n max_leaves: int = 0,\n max_bin: int = 256,\n tweedie_variance_power:\ + \ float = 1.5,\n huber_slope: float = 1.0,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('worker_pool_specs', list), # pylint:disable=g-bare-generic\n\ + \ ],\n):\n \"\"\"Generates worker pool specs for XGBoost training.\n\ + \n For single machine XGBoost training, returns one worker pool spec for\ + \ master.\n For distributed XGBoost training, returns two worker pool specs,\ + \ the first one\n for master and the second one for the remaining workers.\n\ + \n Args:\n total_replica_count: Number of workers.\n target_column:\ + \ Required. Target column name.\n objective: Required. Specifies the\ + \ learning task and the learning objective.\n materialized_train_split:\ + \ Required. The path to the materialized train\n split.\n materialized_eval_split:\ + \ Required. The path to the materialized validation\n split.\n transform_output:\ + \ Required. The path to transform output.\n training_schema_uri: Required.\ + \ The path to the training schema.\n instance_baseline: Path to JSON\ + \ file for baseline values.\n job_dir: Job dir path.\n unmanaged_container_model:\ + \ The unmanaged model.\n machine_type: Machine type.\n accelerator_type:\ + \ Accelerator type.\n accelerator_count: Accelerator count.\n weight_column:\ + \ Weight column name.\n eval_metric: Evaluation metrics for validation\ + \ data represented as a\n comma-separated string.\n num_boost_round:\ + \ Number of boosting iterations.\n early_stopping_rounds: Activates early\ + \ stopping. Validation error needs to\n decrease at least every early_stopping_rounds\ + \ round(s) to continue\n training.\n base_score: The initial prediction\ + \ score of all instances, global bias.\n disable_default_eval_metric:\ + \ Flag to disable default metric. Set to >0 to\n disable. Default to\ + \ 0.\n seed: Random seed.\n seed_per_iteration: Seed PRNG determnisticly\ + \ via iterator number.\n booster: Which booster to use, can be gbtree,\ + \ gblinear or dart. gbtree and\n dart use tree based model while gblinear\ + \ uses linear function.\n eta: Learning rate.\n gamma: Minimum loss\ + \ reduction required to make a further partition on a leaf\n node of\ + \ the tree.\n max_depth: Maximum depth of a tree.\n min_child_weight:\ + \ Minimum sum of instance weight(hessian) needed in a child.\n max_delta_step:\ + \ Maximum delta step we allow each tree's weight estimation to\n be.\n\ + \ subsample: Subsample ratio of the training instance.\n colsample_bytree:\ + \ Subsample ratio of columns when constructing each tree.\n colsample_bylevel:\ + \ Subsample ratio of columns for each split, in each level.\n colsample_bynode:\ + \ Subsample ratio of columns for each node (split).\n reg_lambda: L2\ + \ regularization term on weights.\n reg_alpha: L1 regularization term\ + \ on weights.\n tree_method: The tree construction algorithm used in\ + \ XGBoost. Choices:\n [\"auto\", \"exact\", \"approx\", \"hist\", \"\ + gpu_exact\", \"gpu_hist\"].\n scale_pos_weight: Control the balance of\ + \ positive and negative weights.\n updater: A comma separated string\ + \ defining the sequence of tree updaters to\n run.\n refresh_leaf:\ + \ Refresh updater plugin. Update tree leaf and nodes's stats if\n True.\ + \ When it is False, only node stats are updated.\n process_type: A type\ + \ of boosting process to run. Choices:[\"default\",\n \"update\"]\n\ + \ grow_policy: Controls a way new nodes are added to the tree. Only supported\n\ + \ if tree_method is hist. Choices:[\"depthwise\", \"lossguide\"]\n\ + \ sampling_method: The method to use to sample the training instances.\n\ + \ monotone_constraints: Constraint of variable monotonicity.\n interaction_constraints:\ + \ Constraints for interaction representing permitted\n interactions.\n\ + \ sample_type: [dart booster only] Type of sampling algorithm.\n \ + \ Choices:[\"uniform\", \"weighted\"]\n normalize_type: [dart booster\ + \ only] Type of normalization algorithm,\n Choices:[\"tree\", \"forest\"\ + ]\n rate_drop: [dart booster only] Dropout rate.'\n one_drop: [dart\ + \ booster only] When this flag is enabled, at least one tree\n is always\ + \ dropped during the dropout (allows Binomial-plus-one or\n epsilon-dropout\ + \ from the original DART paper).\n skip_drop: [dart booster only] Probability\ + \ of skipping the dropout procedure\n during a boosting iteration.\n\ + \ num_parallel_tree: Number of parallel trees constructed during each\n\ + \ iteration. This option is used to support boosted random forest.\n\ + \ feature_selector: [linear booster only] Feature selection and ordering\n\ + \ method.\n top_k: The number of top features to select in greedy\ + \ and thrifty feature\n selector. The value of 0 means using all the\ + \ features.\n max_cat_to_onehot: A threshold for deciding whether XGBoost\ + \ should use\n one-hot encoding based split for categorical data.\n\ + \ max_leaves: Maximum number of nodes to be added.\n max_bin: Maximum\ + \ number of discrete bins to bucket continuous features.\n tweedie_variance_power:\ + \ Parameter that controls the variance of the Tweedie\n distribution.\n\ + \ huber_slope: A parameter used for Pseudo-Huber loss to define the delta\n\ + \ term.\n\n Raises:\n ValueError: If accelerator_count <= 0 and\ + \ accelerator_type is specified.\n\n Returns:\n Outputs containing the\ + \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ + \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ + \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ + \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325'\n\ + \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ + \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ + \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ + \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ + \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ + \ f'--validation_data_path={get_gcs_path(materialized_eval_split)}',\n\ + \ f'--transform_output_path={get_gcs_path(transform_output)}',\n\ + \ f'--training_schema_path={get_gcs_path(training_schema_uri)}',\n\ + \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ + \ f'--eval_metric={eval_metric}',\n f'--num_boost_round={num_boost_round}',\n\ + \ f'--base_score={base_score}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ + \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ + \ f'--booster={booster}',\n f'--eta={eta}',\n\ + \ f'--gamma={gamma}',\n f'--max_depth={max_depth}',\n\ + \ f'--min_child_weight={min_child_weight}',\n \ + \ f'--max_delta_step={max_delta_step}',\n f'--subsample={subsample}',\n\ + \ f'--colsample_bytree={colsample_bytree}',\n \ + \ f'--colsample_bylevel={colsample_bylevel}',\n f'--colsample_bynode={colsample_bynode}',\n\ + \ f'--lambda={reg_lambda}',\n f'--alpha={reg_alpha}',\n\ + \ f'--tree_method={tree_method}',\n f'--scale_pos_weight={scale_pos_weight}',\n\ + \ f'--refresh_leaf={refresh_leaf}',\n f'--process_type={process_type}',\n\ + \ f'--grow_policy={grow_policy}',\n f'--sampling_method={sampling_method}',\n\ + \ f'--sample_type={sample_type}',\n f'--normalize_type={normalize_type}',\n\ + \ f'--rate_drop={rate_drop}',\n f'--one_drop={one_drop}',\n\ + \ f'--skip_drop={skip_drop}',\n f'--num_parallel_tree={num_parallel_tree}',\n\ + \ f'--feature_selector={feature_selector}',\n \ + \ f'--top_k={top_k}',\n f'--max_leaves={max_leaves}',\n \ + \ f'--max_bin={max_bin}',\n f'--tweedie_variance_power={tweedie_variance_power}',\n\ + \ f'--huber_slope={huber_slope}',\n f'--prediction_docker_uri={prediction_docker_uri}',\n\ + \ '--executor_input={{$.json_escape[1]}}',\n ],\n\ + \ },\n }\n\n # Add optional arguments if set\n if weight_column:\n\ + \ master_worker_pool_spec['container_spec']['args'].append(\n \ + \ f'--weight_column={weight_column}'\n )\n if early_stopping_rounds\ + \ >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--early_stopping_rounds={early_stopping_rounds}'\n )\n if\ + \ updater:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--updater={updater}'\n )\n if monotone_constraints:\n \ + \ master_worker_pool_spec['container_spec']['args'].append(\n f'--monotone_constraints={monotone_constraints}'\n\ + \ )\n if interaction_constraints:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--interaction_constraints={interaction_constraints}'\n )\n\ + \ if max_cat_to_onehot >= 0:\n master_worker_pool_spec['container_spec']['args'].append(\n\ + \ f'--max_cat_to_onehot={max_cat_to_onehot}'\n )\n\n # Add accelerator_type\ + \ and accelerator_count if set.\n if accelerator_type:\n if accelerator_count\ + \ <= 0:\n raise ValueError(\n 'Accelerator count must be greator\ + \ than 0 when type is specified.'\n )\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_type'\n ] = accelerator_type\n master_worker_pool_spec['machine_spec'][\n\ + \ 'accelerator_count'\n ] = accelerator_count\n\n worker_pool_specs_lst\ + \ = [master_worker_pool_spec]\n\n # Add an additional worker pool spec\ + \ for distributed training.\n if total_replica_count > 1:\n additional_replica\ + \ = total_replica_count - 1\n additional_worker_spec = copy.deepcopy(master_worker_pool_spec)\n\ + \ additional_worker_spec['replica_count'] = additional_replica\n worker_pool_specs_lst.append(additional_worker_spec)\n\ + \n # Build unmanaged_container_model\n model_dir = os.path.join(formatted_job_dir,\ + \ 'model')\n unmanaged_container_model.metadata['containerSpec'] = {\n\ + \ 'imageUri': prediction_docker_uri,\n 'healthRoute': '/health',\n\ + \ 'predictRoute': '/predict',\n }\n unmanaged_container_model.metadata['predictSchemata']\ + \ = {\n 'instanceSchemaUri': os.path.join(model_dir, 'instance.yaml'),\n\ + \ 'predictionSchemaUri': os.path.join(model_dir, 'prediction_schema.yaml'),\n\ + \ }\n unmanaged_container_model.uri = model_dir\n\n return collections.namedtuple('Outputs',\ + \ ['worker_pool_specs'])(\n worker_pool_specs_lst\n )\n\n" + image: python:3.7 + exec-get-prediction-type-for-xgboost: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_prediction_type_for_xgboost + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_prediction_type_for_xgboost(objective: str) -> str:\n \"\ + \"\"Returns prediction_type given XGBoost training objective..\n\n Args:\n\ + \ objective: The XGBoost training objective\n\n Returns:\n A string.\ + \ One of 'regression' or 'classification'\n \"\"\"\n if objective.startswith('binary')\ + \ or objective.startswith('multi'):\n return 'classification'\n elif\ + \ objective.startswith('reg'):\n return 'regression'\n else:\n raise\ + \ ValueError(\n f'Unsupported XGBoost training objective: {objective}.\ + \ Must be one of'\n ' [reg:squarederror, reg:squaredlogerror, reg:logistic,\ + \ reg:gamma,'\n ' reg:tweedie, reg:pseudohubererror, binary:logistic,'\n\ + \ ' multi:softprob].'\n )\n\n" + image: python:3.7 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-split-materialized-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-xgboost-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"xgboost-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": ", "{{$.inputs.parameters[''worker_pool_specs'']}}", + "}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 +pipelineInfo: + description: The XGBoost training pipeline. + name: automl-tabular-xgboost-trainer +root: + dag: + outputs: + artifacts: + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--base_score: + componentInputParameter: base_score + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + pipelinechannel--booster: + componentInputParameter: booster + pipelinechannel--colsample_bylevel: + componentInputParameter: colsample_bylevel + pipelinechannel--colsample_bynode: + componentInputParameter: colsample_bynode + pipelinechannel--colsample_bytree: + componentInputParameter: colsample_bytree + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + pipelinechannel--dataset_level_transformations: + componentInputParameter: dataset_level_transformations + pipelinechannel--disable_default_eval_metric: + componentInputParameter: disable_default_eval_metric + pipelinechannel--early_stopping_rounds: + componentInputParameter: early_stopping_rounds + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--eta: + componentInputParameter: eta + pipelinechannel--eval_metric: + componentInputParameter: eval_metric + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + pipelinechannel--feature_selector: + componentInputParameter: feature_selector + pipelinechannel--gamma: + componentInputParameter: gamma + pipelinechannel--grow_policy: + componentInputParameter: grow_policy + pipelinechannel--huber_slope: + componentInputParameter: huber_slope + pipelinechannel--interaction_constraints: + componentInputParameter: interaction_constraints + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_bin: + componentInputParameter: max_bin + pipelinechannel--max_cat_to_onehot: + componentInputParameter: max_cat_to_onehot + pipelinechannel--max_delta_step: + componentInputParameter: max_delta_step + pipelinechannel--max_depth: + componentInputParameter: max_depth + pipelinechannel--max_leaves: + componentInputParameter: max_leaves + pipelinechannel--max_selected_features: + componentInputParameter: max_selected_features + pipelinechannel--min_child_weight: + componentInputParameter: min_child_weight + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--monotone_constraints: + componentInputParameter: monotone_constraints + pipelinechannel--normalize_type: + componentInputParameter: normalize_type + pipelinechannel--num_boost_round: + componentInputParameter: num_boost_round + pipelinechannel--num_parallel_tree: + componentInputParameter: num_parallel_tree + pipelinechannel--objective: + componentInputParameter: objective + pipelinechannel--one_drop: + componentInputParameter: one_drop + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--process_type: + componentInputParameter: process_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--rate_drop: + componentInputParameter: rate_drop + pipelinechannel--refresh_leaf: + componentInputParameter: refresh_leaf + pipelinechannel--reg_alpha: + componentInputParameter: reg_alpha + pipelinechannel--reg_lambda: + componentInputParameter: reg_lambda + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--run_feature_selection: + componentInputParameter: run_feature_selection + pipelinechannel--sample_type: + componentInputParameter: sample_type + pipelinechannel--sampling_method: + componentInputParameter: sampling_method + pipelinechannel--scale_pos_weight: + componentInputParameter: scale_pos_weight + pipelinechannel--seed: + componentInputParameter: seed + pipelinechannel--seed_per_iteration: + componentInputParameter: seed_per_iteration + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--skip_drop: + componentInputParameter: skip_drop + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--subsample: + componentInputParameter: subsample + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + pipelinechannel--tf_custom_transformation_definitions: + componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transformations_path: + componentInputParameter: tf_transformations_path + pipelinechannel--top_k: + componentInputParameter: top_k + pipelinechannel--training_accelerator_count: + componentInputParameter: training_accelerator_count + pipelinechannel--training_accelerator_type: + componentInputParameter: training_accelerator_type + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--training_machine_type: + componentInputParameter: training_machine_type + pipelinechannel--training_total_replica_count: + componentInputParameter: training_total_replica_count + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--tree_method: + componentInputParameter: tree_method + pipelinechannel--tweedie_variance_power: + componentInputParameter: tweedie_variance_power + pipelinechannel--updater: + componentInputParameter: updater + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact. + parameters: + base_score: + defaultValue: 0.5 + description: The initial prediction score of all instances, global bias. + isOptional: true + parameterType: NUMBER_DOUBLE + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'The BigQuery staging full dataset id for + + storing intermediate tables.' + isOptional: true + parameterType: STRING + booster: + defaultValue: gbtree + description: 'Which booster to use, can be gbtree, gblinear or dart. gbtree + and + + dart use tree based model while gblinear uses linear function.' + isOptional: true + parameterType: STRING + colsample_bylevel: + defaultValue: 1.0 + description: Subsample ratio of columns for each split, in each level. + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bynode: + defaultValue: 1.0 + description: Subsample ratio of columns for each node (split). + isOptional: true + parameterType: NUMBER_DOUBLE + colsample_bytree: + defaultValue: 1.0 + description: Subsample ratio of columns when constructing each tree. + isOptional: true + parameterType: NUMBER_DOUBLE + data_source_bigquery_table_path: + defaultValue: '' + description: The BigQuery data source. + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: The CSV data source. + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + description: 'Dataset-level custom + + transformation definitions in string format.' + isOptional: true + parameterType: LIST + dataset_level_transformations: + description: 'Dataset-level transformation configuration in + + string format.' + isOptional: true + parameterType: LIST + disable_default_eval_metric: + defaultValue: 0.0 + description: 'Flag to disable default metric. Set to >0 to + + disable. Default to 0.' + isOptional: true + parameterType: NUMBER_INTEGER + early_stopping_rounds: + defaultValue: -1.0 + description: 'Activates early stopping. Validation error needs to + + decrease at least every early_stopping_rounds round(s) to continue + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + eta: + defaultValue: 0.3 + description: Learning rate. + isOptional: true + parameterType: NUMBER_DOUBLE + eval_metric: + defaultValue: '' + description: 'Evaluation metrics for validation data represented as a + + comma-separated string.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + feature_selection_algorithm: + defaultValue: AMI + description: Feature selection algorithm. + isOptional: true + parameterType: STRING + feature_selector: + defaultValue: cyclic + description: '[linear booster only] Feature selection and ordering + + method.' + isOptional: true + parameterType: STRING + gamma: + defaultValue: 0.0 + description: 'Minimum loss reduction required to make a further partition + on a leaf + + node of the tree.' + isOptional: true + parameterType: NUMBER_DOUBLE + grow_policy: + defaultValue: depthwise + description: 'Controls a way new nodes are added to the tree. Only supported + + if tree_method is hist. Choices:["depthwise", "lossguide"]' + isOptional: true + parameterType: STRING + huber_slope: + defaultValue: 1.0 + description: 'A parameter used for Pseudo-Huber loss to define the delta + + term.' + isOptional: true + parameterType: NUMBER_DOUBLE + interaction_constraints: + defaultValue: '' + description: 'Constraints for interaction representing permitted + + interactions.' + isOptional: true + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + max_bin: + defaultValue: 256.0 + description: Maximum number of discrete bins to bucket continuous features. + isOptional: true + parameterType: NUMBER_INTEGER + max_cat_to_onehot: + defaultValue: -1.0 + description: 'A threshold for deciding whether XGBoost should use + + one-hot encoding based split for categorical data.' + isOptional: true + parameterType: NUMBER_INTEGER + max_delta_step: + defaultValue: 0.0 + description: 'Maximum delta step we allow each tree''s weight estimation to + + be.' + isOptional: true + parameterType: NUMBER_DOUBLE + max_depth: + defaultValue: 6.0 + description: Maximum depth of a tree. + isOptional: true + parameterType: NUMBER_INTEGER + max_leaves: + defaultValue: 0.0 + description: Maximum number of nodes to be added. + isOptional: true + parameterType: NUMBER_INTEGER + max_selected_features: + defaultValue: -1.0 + description: Maximum number of features to select. + isOptional: true + parameterType: NUMBER_INTEGER + min_child_weight: + defaultValue: 1.0 + description: Minimum sum of instance weight(hessian) needed in a child. + isOptional: true + parameterType: NUMBER_DOUBLE + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model. + isOptional: true + parameterType: STRING + monotone_constraints: + defaultValue: '' + description: Constraint of variable monotonicity. + isOptional: true + parameterType: STRING + normalize_type: + defaultValue: tree + description: '[dart booster only] Type of normalization algorithm, + + Choices:["tree", "forest"]' + isOptional: true + parameterType: STRING + num_boost_round: + defaultValue: 10.0 + description: Number of boosting iterations. + isOptional: true + parameterType: NUMBER_INTEGER + num_parallel_tree: + defaultValue: 1.0 + description: 'Number of parallel trees constructed during each + + iteration. This option is used to support boosted random forest.' + isOptional: true + parameterType: NUMBER_INTEGER + objective: + description: 'Specifies the learning task and the learning objective. Must + be + + one of [reg:squarederror, reg:squaredlogerror, + + reg:logistic, reg:gamma, reg:tweedie, reg:pseudohubererror, + + binary:logistic, multi:softprob].' + parameterType: STRING + one_drop: + defaultValue: 0.0 + description: '[dart booster only] When this flag is enabled, at least one + tree + + is always dropped during the dropout (allows Binomial-plus-one or + + epsilon-dropout from the original DART paper).' + isOptional: true + parameterType: NUMBER_INTEGER + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + process_type: + defaultValue: default + description: 'A type of boosting process to run. Choices:["default", + + "update"]' + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + rate_drop: + defaultValue: 0.0 + description: '[dart booster only] Dropout rate.''' + isOptional: true + parameterType: NUMBER_DOUBLE + refresh_leaf: + defaultValue: 1.0 + description: 'Refresh updater plugin. Update tree leaf and nodes''s stats + if + + True. When it is False, only node stats are updated.' + isOptional: true + parameterType: NUMBER_INTEGER + reg_alpha: + defaultValue: 0.0 + description: L1 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + reg_lambda: + defaultValue: 1.0 + description: L2 regularization term on weights. + isOptional: true + parameterType: NUMBER_DOUBLE + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: Whether to enable feature selection. + isOptional: true + parameterType: BOOLEAN + sample_type: + defaultValue: uniform + description: '[dart booster only] Type of sampling algorithm. + + Choices:["uniform", "weighted"]' + isOptional: true + parameterType: STRING + sampling_method: + defaultValue: uniform + description: The method to use to sample the training instances. + isOptional: true + parameterType: STRING + scale_pos_weight: + defaultValue: 1.0 + description: Control the balance of positive and negative weights. + isOptional: true + parameterType: NUMBER_DOUBLE + seed: + defaultValue: 0.0 + description: Random seed. + isOptional: true + parameterType: NUMBER_INTEGER + seed_per_iteration: + defaultValue: false + description: Seed PRNG determnisticly via iterator number. + isOptional: true + parameterType: BOOLEAN + skip_drop: + defaultValue: 0.0 + description: '[dart booster only] Probability of skipping the dropout procedure + + during a boosting iteration.' + isOptional: true + parameterType: NUMBER_DOUBLE + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + subsample: + defaultValue: 1.0 + description: Subsample ratio of the training instance. + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: Test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + description: 'List of auto transform features in the + + comma-separated string format.' + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + description: 'TF custom transformation definitions + + in string format.' + isOptional: true + parameterType: LIST + tf_transformations_path: + defaultValue: '' + description: Path to TF transformation configuration. + isOptional: true + parameterType: STRING + top_k: + defaultValue: 0.0 + description: 'The number of top features to select in greedy and thrifty feature + + selector. The value of 0 means using all the features.' + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_count: + defaultValue: 0.0 + description: Accelerator count. + isOptional: true + parameterType: NUMBER_INTEGER + training_accelerator_type: + defaultValue: '' + description: Accelerator type. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + training_machine_type: + defaultValue: c2-standard-16 + description: Machine type. + isOptional: true + parameterType: STRING + training_total_replica_count: + defaultValue: 1.0 + description: Number of workers. + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + tree_method: + defaultValue: auto + description: 'The tree construction algorithm used in XGBoost. Choices: + + ["auto", "exact", "approx", "hist", "gpu_exact", "gpu_hist"].' + isOptional: true + parameterType: STRING + tweedie_variance_power: + defaultValue: 1.5 + description: 'Parameter that controls the variance of the Tweedie + + distribution.' + isOptional: true + parameterType: NUMBER_DOUBLE + updater: + defaultValue: '' + description: 'A comma separated string defining the sequence of tree updaters + to + + run.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: Validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py new file mode 100644 index 0000000000..6dbcd85caf --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py @@ -0,0 +1,21 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GA AutoML forecasting components.""" + +from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp + +__all__ = [ + 'ProphetTrainerOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml new file mode 100644 index 0000000000..14c7dd13b2 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml @@ -0,0 +1,1159 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-bqml-arima-prediction +# Description: Forecasts using a BQML ARIMA_PLUS model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# generate_explanation: bool [Default: False] +# location: str +# model_name: str +# project: str +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-dataset-2: + executorLabel: exec-bigquery-create-dataset-2 + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-create-dataset-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-dataset-2 + dependentTasks: + - get-table-location + - maybe-replace-with-default + - validate-inputs + inputs: + parameters: + dataset: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + exists_ok: + runtimeValue: + constant: 1.0 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-prediction-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - get-first-valid + - get-model-metadata + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--get-first-valid-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-first-valid + pipelinechannel--get-model-metadata-forecast_horizon: + taskOutputParameter: + outputParameterKey: forecast_horizon + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-target_column: + taskOutputParameter: + outputParameterKey: target_column + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-time_column: + taskOutputParameter: + outputParameterKey: time_column + producerTask: get-model-metadata + pipelinechannel--get-model-metadata-time_series_identifier_column: + taskOutputParameter: + outputParameterKey: time_series_identifier_column + producerTask: get-model-metadata + pipelinechannel--model_name: + componentInputParameter: pipelinechannel--model_name + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n target.*,\n STRUCT(prediction.time_series_adjusted_data\ + \ AS value)\n AS predicted_{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}},\n\ + \ prediction.* EXCEPT (\n {{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}},\n\ + \ time_series_timestamp,\n time_series_adjusted_data\n\ + \ ),\n FROM\n ML.EXPLAIN_FORECAST(\n \ + \ MODEL `{{$.inputs.parameters['pipelinechannel--model_name']}}`,\n\ + \ STRUCT({{$.inputs.parameters['pipelinechannel--get-model-metadata-forecast_horizon']}}\ + \ AS horizon)) AS prediction\n RIGHT JOIN `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\ + \ AS target\n ON\n CAST(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ + \ AS STRING)\n = CAST(prediction.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_series_identifier_column']}}\ + \ AS STRING)\n AND TIMESTAMP(target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-time_column']}})\ + \ = prediction.time_series_timestamp\n WHERE target.{{$.inputs.parameters['pipelinechannel--get-model-metadata-target_column']}}\ + \ IS NULL\n " + taskInfo: + name: predictions-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset-2 + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' + table_id: + runtimeValue: + constant: predictions_{{$.pipeline_job_uuid}} + taskInfo: + name: build-job-configuration-query + get-first-valid: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-first-valid + dependentTasks: + - load-table-from-uri + inputs: + parameters: + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + pipelinechannel--load-table-from-uri-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: load-table-from-uri + values: + runtimeValue: + constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", + "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' + taskInfo: + name: get-first-valid + get-model-metadata: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-model-metadata + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + model: + componentInputParameter: pipelinechannel--model_name + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-model-metadata + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + load-table-from-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-table-from-uri + dependentTasks: + - bigquery-create-dataset + - get-table-location + inputs: + parameters: + destination: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + source_format: + runtimeValue: + constant: CSV + source_uris: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: load-table-from-uri + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + runtimeValue: + constant: prediction_{{$.pipeline_job_uuid}} + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + source_model_uri: + componentInputParameter: pipelinechannel--model_name + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + comp-get-first-valid: + executorLabel: exec-get-first-valid + inputDefinitions: + parameters: + values: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-model-metadata: + executorLabel: exec-get-model-metadata + inputDefinitions: + parameters: + location: + parameterType: STRING + model: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + forecast_horizon: + parameterType: NUMBER_INTEGER + target_column: + parameterType: STRING + time_column: + parameterType: STRING + time_series_identifier_column: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-load-table-from-uri: + executorLabel: exec-load-table-from-uri + inputDefinitions: + parameters: + destination: + description: Table into which data is to be loaded. + parameterType: STRING + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + source_format: + defaultValue: CSV + description: 'The file format for the files being imported. Only CSV is + + supported.' + isOptional: true + parameterType: STRING + source_uris: + description: 'URIs of data files to be loaded; in format + + gs:///.' + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-dataset-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-get-first-valid: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_first_valid + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ + \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n for value in json.loads(values):\n if value:\n return value\n\ + \ raise ValueError('No valid values.')\n\n" + image: python:3.7-slim + exec-get-model-metadata: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_model_metadata + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_model_metadata(\n project: str,\n location: str,\n\ + \ model: str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('time_column',\ + \ str),\n ('time_series_identifier_column', str),\n ('target_column',\ + \ str),\n ('forecast_horizon', int),\n ],\n):\n \"\"\"Retrieves\ + \ training options for a BQML model.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n options\ + \ = client.get_model(model).training_runs[0].training_options\n return\ + \ collections.namedtuple(\n 'Outputs', [\n 'time_column',\n\ + \ 'time_series_identifier_column',\n 'target_column',\n\ + \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ + \ options.time_series_id_column,\n options.time_series_data_column,\n\ + \ options.horizon,\n )\n\n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-load-table-from-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_table_from_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ + \ source_uris: str,\n destination: str,\n source_format: str =\ + \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ + \ project: The GCP project.\n location: The GCP region.\n source_uris:\ + \ URIs of data files to be loaded; in format\n gs:///.\n\ + \ destination: Table into which data is to be loaded.\n source_format:\ + \ The file format for the files being imported. Only CSV is\n supported.\n\ + \n Returns:\n The destination table containing imported data.\n \"\"\ + \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ + \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ + \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ + \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ + \ destination=destination,\n project=project,\n location=location,\n\ + \ job_config=job_config).result()\n return destination\n\n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Forecasts using a BQML ARIMA_PLUS model. + name: automl-tabular-bqml-arima-prediction +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_name: + componentInputParameter: model_name + pipelinechannel--project: + componentInputParameter: project + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, a resource will be created under a new dataset in the project.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + generate_explanation: + defaultValue: false + description: 'Generate explanation along with the batch prediction + + results. This will cause the batch prediction output to include + + explanations.' + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region for Vertex AI. + parameterType: STRING + model_name: + description: ARIMA_PLUS BQML model URI. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml new file mode 100644 index 0000000000..1d23bd2993 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -0,0 +1,5085 @@ +# PIPELINE DEFINITION +# Name: automl-tabular-bqml-arima-train +# Description: Trains a BQML ARIMA_PLUS model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_granularity_unit: str +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# forecast_horizon: int +# location: str +# max_order: int [Default: 5.0] +# override_destination: bool [Default: False] +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: True] +# target_column: str +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# window_column: str [Default: ''] +# window_max_count: int [Default: -1.0] +# window_stride_length: int [Default: -1.0] +# Outputs: +# create-metrics-artifact-evaluation_metrics: system.Metrics +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-dataset-2: + executorLabel: exec-bigquery-create-dataset-2 + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-create-model-job: + executorLabel: exec-bigquery-create-model-job + inputDefinitions: + parameters: + job_configuration_query: + defaultValue: {} + description: 'A json formatted string describing the rest of the job configuration. + + For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: "The labels associated with this job. You can\nuse these to\ + \ organize and group your jobs. Label keys and values can\nbe no longer\ + \ than 63 characters, can only containlowercase letters,\nnumeric characters,\ + \ underscores and dashes. International characters\nare allowed. Label\ + \ values are optional. Label keys must start with a\nletter and each label\ + \ in the list must have a different key.\n Example: { \"name\": \"wrench\"\ + , \"mass\": \"1.3kg\", \"count\": \"3\" }." + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location of the job to create the BigQuery model. If not set, + default to + + `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run BigQuery model creation job. + parameterType: STRING + query: + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'Query parameters for standard SQL queries. + + If query_parameters are both specified in here and in + + job_configuration_query, the value in here will override the other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.BQMLModel + schemaVersion: 0.0.1 + description: Describes the model which is created. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-list-rows: + executorLabel: exec-bigquery-list-rows + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A google.BQTable artifact. + parameters: + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-bigquery-list-rows-2: + executorLabel: exec-bigquery-list-rows-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: A google.BQTable artifact. + parameters: + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-2: + executorLabel: exec-bigquery-query-job-2 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-3: + executorLabel: exec-bigquery-query-job-3 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-4: + executorLabel: exec-bigquery-query-job-4 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-5: + executorLabel: exec-bigquery-query-job-5 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-2: + executorLabel: exec-build-job-configuration-query-2 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-3: + executorLabel: exec-build-job-configuration-query-3 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-4: + executorLabel: exec-build-job-configuration-query-4 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-5: + executorLabel: exec-build-job-configuration-query-5 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-6: + executorLabel: exec-build-job-configuration-query-6 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-serialized-query-parameters: + executorLabel: exec-build-serialized-query-parameters + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-build-serialized-query-parameters-2: + executorLabel: exec-build-serialized-query-parameters-2 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-build-serialized-query-parameters-3: + executorLabel: exec-build-serialized-query-parameters-3 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon_off_by_one: + defaultValue: false + description: 'If True, subtract 1 from the forecast horizon + + in the query parameters.' + isOptional: true + parameterType: BOOLEAN + max_order: + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + splits: + description: Dataset splits to be used to train the model. + isOptional: true + parameterType: LIST + window: + description: 'Dict containing information about the forecast window the + model + + should have. If no window is provided, the window will start after the + + latest period in the available data.' + isOptional: true + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-cond: + executorLabel: exec-cond + inputDefinitions: + parameters: + false_str: + parameterType: STRING + predicate: + parameterType: BOOLEAN + true_str: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-condition-2: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: create-metrics-artifact + tasks: + bigquery-list-rows: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-list-rows + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + parameters: + location: + componentInputParameter: pipelinechannel--get-table-location-Output + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: bigquery-list-rows + bigquery-list-rows-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-list-rows-2 + dependentTasks: + - bigquery-query-job-4 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-4 + parameters: + location: + componentInputParameter: pipelinechannel--get-table-location-Output + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: bigquery-list-rows-2 + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - build-serialized-query-parameters + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n time_series_windows AS (\n \ + \ SELECT\n FIRST_VALUE({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ OVER (horizon) AS start_time,\n COUNT(*) OVER (horizon)\ + \ AS count,\n FIRST_VALUE(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ OVER (horizon) AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n WINDOW horizon AS (\n \ + \ PARTITION BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}}\n\ + \ ROWS BETWEEN 0 PRECEDING AND @forecast_horizon FOLLOWING)\n\ + \ )\n SELECT\n start_time,\n TIMESTAMP(DATETIME_ADD(\n\ + \ DATETIME(start_time),\n INTERVAL @forecast_horizon\ + \ {{$.inputs.parameters['pipelinechannel--data_granularity_unit']}}\n\ + \ )) AS end_time,\n SUM(count) AS count,\n \ + \ ROW_NUMBER() OVER () AS window_number,\n FROM time_series_windows\n\ + \ WHERE window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\n\ + \ GROUP BY start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters + taskInfo: + name: create-eval-windows-table + bigquery-query-job-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.metrics`\ + \ (\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n MAE FLOAT64,\n MSE\ + \ FLOAT64,\n MAPE FLOAT64,\n prediction_count\ + \ INT64\n )\n " + taskInfo: + name: create-tmp-metrics-table + bigquery-query-job-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-3 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.evaluated_examples`\ + \ (\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ STRING,\n {{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\ + \ TIMESTAMP,\n {{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ FLOAT64,\n predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ STRUCT\n )\n " + taskInfo: + name: create-evaluated-examples-table + bigquery-query-job-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-4 + dependentTasks: + - build-job-configuration-query-5 + - for-loop-3 + - table-to-uri + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-5 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n SUM(MAE * prediction_count) /\ + \ SUM(prediction_count) AS MAE,\n SQRT(SUM(MSE * prediction_count)\ + \ / SUM(prediction_count)) AS RMSE,\n SUM(MAPE * prediction_count)\ + \ / SUM(prediction_count) AS MAPE,\n FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}}`\n\ + \ " + taskInfo: + name: create-backtest-table + bigquery-query-job-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-5 + dependentTasks: + - build-job-configuration-query-6 + - for-loop-3 + - table-to-uri-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-6 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--table-to-uri-2-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: SELECT * FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}` + taskInfo: + name: export-evaluated-examples-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: windows + taskInfo: + name: build-job-configuration-query + build-job-configuration-query-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-5 + dependentTasks: + - cond + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--cond-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: cond + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: final_metrics + write_disposition: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' + taskInfo: + name: build-job-configuration-query-5 + build-job-configuration-query-6: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-6 + dependentTasks: + - cond + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--cond-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: cond + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}' + table_id: + runtimeValue: + constant: evaluated_examples + write_disposition: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--cond-Output'']}}' + taskInfo: + name: build-job-configuration-query-6 + build-serialized-query-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters + inputs: + parameters: + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecast_horizon_off_by_one: + runtimeValue: + constant: 1.0 + splits: + runtimeValue: + constant: + - TEST + taskInfo: + name: build-serialized-query-parameters + cond: + cachingOptions: + enableCache: true + componentRef: + name: comp-cond + inputs: + parameters: + false_str: + runtimeValue: + constant: WRITE_EMPTY + predicate: + componentInputParameter: pipelinechannel--override_destination + true_str: + runtimeValue: + constant: WRITE_TRUNCATE + taskInfo: + name: cond + create-metrics-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-metrics-artifact + dependentTasks: + - bigquery-list-rows-2 + inputs: + parameters: + metrics_rows: + taskOutputParameter: + outputParameterKey: Output + producerTask: bigquery-list-rows-2 + taskInfo: + name: create-metrics-artifact + for-loop-3: + componentRef: + name: comp-for-loop-3 + dependentTasks: + - bigquery-list-rows + - table-to-uri + - table-to-uri-2 + inputs: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--bigquery-list-rows-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bigquery-list-rows + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--get-table-location-Output: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--max_order: + componentInputParameter: pipelinechannel--max_order + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--table-to-uri-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri + pipelinechannel--table-to-uri-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri + pipelinechannel--table-to-uri-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + iteratorPolicy: + parallelismLimit: 50 + parameterIterator: + itemInput: pipelinechannel--bigquery-list-rows-Output-loop-item + items: + inputParameter: pipelinechannel--bigquery-list-rows-Output + taskInfo: + name: for-loop-3 + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job-2 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-2 + taskInfo: + name: table-to-uri + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - bigquery-query-job-3 + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job-3 + taskInfo: + name: table-to-uri-2 + inputDefinitions: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-2-project_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-project_id: + parameterType: STRING + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--get-fte-suffix-Output: + parameterType: STRING + pipelinechannel--get-table-location-Output: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--override_destination: + parameterType: BOOLEAN + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-create-metrics-artifact: + executorLabel: exec-create-metrics-artifact + inputDefinitions: + parameters: + metrics_rows: + parameterType: LIST + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: create-metrics-artifact-evaluation_metrics + producerSubtask: condition-2 + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-create-dataset-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-dataset-2 + dependentTasks: + - get-table-location + - maybe-replace-with-default + - validate-inputs + inputs: + parameters: + dataset: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + exists_ok: + runtimeValue: + constant: 1.0 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-export-dataset + bigquery-create-model-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-create-model-job + dependentTasks: + - bigquery-create-dataset-2 + - build-serialized-query-parameters-3 + - get-fte-suffix + - get-table-location + inputs: + parameters: + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.model_{{$.pipeline_job_uuid}}`\n\ + \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ + \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ + \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ + \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ + \ horizon = @forecast_horizon,\n auto_arima\ + \ = True,\n auto_arima_max_order = @max_order,\n \ + \ data_frequency = @data_granularity_unit,\n holiday_region\ + \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ + \ adjust_step_changes = True,\n decompose_time_series\ + \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ < @start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-3 + taskInfo: + name: create-serving-model + build-serialized-query-parameters-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters-3 + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + max_order: + componentInputParameter: pipelinechannel--max_order + splits: + runtimeValue: + constant: + - TRAIN + - VALIDATE + - TEST + taskInfo: + name: build-serialized-query-parameters-3 + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - bigquery-create-dataset + - bigquery-create-dataset-2 + - get-fte-suffix + - get-table-location + inputs: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + pipelinechannel--data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--get-table-location-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--max_order: + componentInputParameter: pipelinechannel--max_order + pipelinechannel--override_destination: + componentInputParameter: pipelinechannel--override_destination + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - bigquery-create-dataset-2 + inputs: + parameters: + autodetect_csv_schema: + runtimeValue: + constant: 1.0 + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + forecasting_apply_windowing: + runtimeValue: + constant: 0.0 + forecasting_context_window: + runtimeValue: + constant: 0.0 + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + runtimeValue: + constant: {} + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + taskInfo: + name: feature-transform-engine + get-fte-suffix: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-fte-suffix + dependentTasks: + - bigquery-create-dataset-2 + - feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' + fte_table: + runtimeValue: + constant: fte_time_series_output + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset-2 + pipelinechannel--bigquery-create-dataset-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset-2 + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-fte-suffix + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + runtimeValue: + constant: export_{{$.pipeline_job_uuid}} + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + window_column: + componentInputParameter: pipelinechannel--window_column + window_max_count: + componentInputParameter: pipelinechannel--window_max_count + window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--override_destination: + parameterType: BOOLEAN + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--window_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-for-loop-3: + dag: + tasks: + build-job-configuration-query-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-2 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + taskInfo: + name: build-job-configuration-query-2 + build-job-configuration-query-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-3 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-dataset_id'']}}' + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + pipelinechannel--table-to-uri-dataset_id: + componentInputParameter: pipelinechannel--table-to-uri-dataset_id + pipelinechannel--table-to-uri-project_id: + componentInputParameter: pipelinechannel--table-to-uri-project_id + pipelinechannel--table-to-uri-table_id: + componentInputParameter: pipelinechannel--table-to-uri-table_id + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_APPEND + taskInfo: + name: build-job-configuration-query-3 + build-job-configuration-query-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-4 + dependentTasks: + - get-window-query-priority + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' + pipelinechannel--get-window-query-priority-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-window-query-priority + pipelinechannel--table-to-uri-2-dataset_id: + componentInputParameter: pipelinechannel--table-to-uri-2-dataset_id + pipelinechannel--table-to-uri-2-project_id: + componentInputParameter: pipelinechannel--table-to-uri-2-project_id + pipelinechannel--table-to-uri-2-table_id: + componentInputParameter: pipelinechannel--table-to-uri-2-table_id + priority: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--get-window-query-priority-Output'']}}' + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_APPEND + taskInfo: + name: build-job-configuration-query-4 + build-serialized-query-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-serialized-query-parameters-2 + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + max_order: + componentInputParameter: pipelinechannel--max_order + splits: + runtimeValue: + constant: + - TRAIN + - VALIDATE + - TEST + window: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + taskInfo: + name: build-serialized-query-parameters-2 + get-value: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-value + inputs: + parameters: + d: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + key: + runtimeValue: + constant: window_number + taskInfo: + name: get_window_number + get-window-query-priority: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-window-query-priority + inputs: + parameters: + max_interactive: + runtimeValue: + constant: 50.0 + window: + componentInputParameter: pipelinechannel--bigquery-list-rows-Output-loop-item + taskInfo: + name: get-window-query-priority + query-with-retry: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry + dependentTasks: + - build-job-configuration-query-2 + - build-serialized-query-parameters-2 + - get-value + inputs: + parameters: + destination_uri: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.model_{{$.inputs.parameters[''pipelinechannel--get-value-Output'']}}' + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-2 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--bigquery-create-dataset-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-dataset_id + pipelinechannel--bigquery-create-dataset-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-project_id + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--get-value-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-value + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n CREATE MODEL `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.model_{{$.inputs.parameters['pipelinechannel--get-value-Output']}}`\n\ + \ OPTIONS (\n model_type = 'ARIMA_PLUS',\n \ + \ time_series_timestamp_col = '{{$.inputs.parameters['pipelinechannel--time_column']}}',\n\ + \ time_series_id_col = '{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}',\n\ + \ time_series_data_col = '{{$.inputs.parameters['pipelinechannel--target_column']}}',\n\ + \ horizon = @forecast_horizon,\n auto_arima\ + \ = True,\n auto_arima_max_order = @max_order,\n \ + \ data_frequency = @data_granularity_unit,\n holiday_region\ + \ = 'GLOBAL',\n clean_spikes_and_dips = True,\n \ + \ adjust_step_changes = True,\n decompose_time_series\ + \ = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ WHERE\n UPPER(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}})\ + \ IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ < @start_time\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: create-eval-model + query-with-retry-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry-2 + dependentTasks: + - build-job-configuration-query-3 + - build-serialized-query-parameters-2 + - query-with-retry + inputs: + parameters: + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-3 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--query-with-retry-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: query-with-retry + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ AVG(mean_absolute_error) AS MAE,\n AVG(mean_squared_error)\ + \ AS MSE,\n AVG(mean_absolute_percentage_error) AS MAPE,\n\ + \ @prediction_count AS prediction_count,\n FROM ML.EVALUATE(\n\ + \ MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ + \ TABLE `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`,\n\ + \ STRUCT(True AS perform_aggregation, {{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ + \ as horizon))\n " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: append-evaluation-metrics + query-with-retry-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-query-with-retry-3 + dependentTasks: + - build-job-configuration-query-4 + - build-serialized-query-parameters-2 + - query-with-retry + inputs: + parameters: + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-4 + location: + componentInputParameter: pipelinechannel--get-table-location-Output + pipelinechannel--bigquery-create-dataset-2-dataset_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-dataset_id + pipelinechannel--bigquery-create-dataset-2-project_id: + componentInputParameter: pipelinechannel--bigquery-create-dataset-2-project_id + pipelinechannel--forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + pipelinechannel--get-fte-suffix-Output: + componentInputParameter: pipelinechannel--get-fte-suffix-Output + pipelinechannel--query-with-retry-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: query-with-retry + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n SELECT\n CAST(actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING)\n AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ @start_time AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ CAST(actual.{{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ AS FLOAT64) AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ STRUCT(pred.forecast_value AS value) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM\n ML.FORECAST(\n MODEL `{{$.inputs.parameters['pipelinechannel--query-with-retry-Output']}}`,\n\ + \ STRUCT({{$.inputs.parameters['pipelinechannel--forecast_horizon']}}\ + \ AS horizon)) pred\n JOIN `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-2-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\ + \ actual\n ON\n pred.forecast_timestamp = TIMESTAMP(actual.{{$.inputs.parameters['pipelinechannel--time_column']}})\n\ + \ AND pred.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ = actual.{{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + query_parameters: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-serialized-query-parameters-2 + taskInfo: + name: append-evaluated-examples + inputDefinitions: + parameters: + pipelinechannel--bigquery-create-dataset-2-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-2-project_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-dataset_id: + parameterType: STRING + pipelinechannel--bigquery-create-dataset-project_id: + parameterType: STRING + pipelinechannel--bigquery-list-rows-Output: + parameterType: LIST + pipelinechannel--bigquery-list-rows-Output-loop-item: + parameterType: STRUCT + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--get-fte-suffix-Output: + parameterType: STRING + pipelinechannel--get-table-location-Output: + parameterType: STRING + pipelinechannel--max_order: + parameterType: NUMBER_INTEGER + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--table-to-uri-2-dataset_id: + parameterType: STRING + pipelinechannel--table-to-uri-2-project_id: + parameterType: STRING + pipelinechannel--table-to-uri-2-table_id: + parameterType: STRING + pipelinechannel--table-to-uri-dataset_id: + parameterType: STRING + pipelinechannel--table-to-uri-project_id: + parameterType: STRING + pipelinechannel--table-to-uri-table_id: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + comp-get-fte-suffix: + executorLabel: exec-get-fte-suffix + inputDefinitions: + parameters: + bigquery_staging_full_dataset_id: + parameterType: STRING + fte_table: + parameterType: STRING + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-value: + executorLabel: exec-get-value + inputDefinitions: + parameters: + d: + parameterType: STRUCT + key: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-window-query-priority: + executorLabel: exec-get-window-query-priority + inputDefinitions: + parameters: + max_interactive: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + window: + parameterType: STRUCT + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry: + executorLabel: exec-query-with-retry + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry-2: + executorLabel: exec-query-with-retry-2 + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-query-with-retry-3: + executorLabel: exec-query-with-retry-3 + inputDefinitions: + parameters: + destination_uri: + defaultValue: '' + description: Optional BigQuery URI to output if the query succeeds. + isOptional: true + parameterType: STRING + job_configuration_query: + description: Additional query job configurations. + isOptional: true + parameterType: STRUCT + location: + description: The GCP region. + parameterType: STRING + max_retry_count: + defaultValue: 5.0 + description: Maximum number of times to retry the query. + isOptional: true + parameterType: NUMBER_INTEGER + project: + description: The GCP project. + parameterType: STRING + query: + description: The query to run. + parameterType: STRING + query_parameters: + description: A list of query parameters. + isOptional: true + parameterType: LIST + retry_wait_seconds: + defaultValue: 10.0 + description: 'Approximate initial number of seconds to wait before + + making another query attempt with exponential backoff.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-dataset-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-create-model-job: + container: + args: + - --type + - BigqueryCreateModelJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.create_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-list-rows: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_list_rows + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ + \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ + \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ + \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ + \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ + \ Rows are keyed by column, and\n all values are stored as strings.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n metadata\ + \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ + \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ + \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ + \ return result\n\n" + image: python:3.7-slim + exec-bigquery-list-rows-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_list_rows + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n\ + \ table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\ + \"Lists the rows of the given BigQuery table.\n\n Args:\n project: The\ + \ GCP project.\n location: The GCP region.\n table: A google.BQTable\ + \ artifact.\n\n Returns:\n A list of dicts representing BigQuery rows.\ + \ Rows are keyed by column, and\n all values are stored as strings.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n metadata\ + \ = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'],\ + \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ + \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ + \ return result\n\n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-2: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-3: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-4: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-5: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-4: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-5: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-6: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-build-serialized-query-parameters-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_serialized_query_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int]\ + \ = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit:\ + \ Optional[str] = None,\n splits: Optional[List[str]] = None,\n window:\ + \ Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n\ + ) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration\ + \ JSON objects for BQML queries.\n\n All query parameters will be stored\ + \ in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\ + \n Args:\n forecast_horizon: The number of time periods into the future\ + \ for which\n forecasts will be created. Future periods start after\ + \ the latest timestamp\n for each time series.\n forecast_horizon_off_by_one:\ + \ If True, subtract 1 from the forecast horizon\n in the query parameters.\n\ + \ data_granularity_unit: The data granularity unit. Accepted values are:\n\ + \ minute, hour, day, week, month, year.\n splits: Dataset splits\ + \ to be used to train the model.\n window: Dict containing information\ + \ about the forecast window the model\n should have. If no window is\ + \ provided, the window will start after the\n latest period in the\ + \ available data.\n max_order: Integer between 1 and 5 representing the\ + \ size of the parameter\n search space for ARIMA_PLUS. 5 would result\ + \ in the highest accuracy model,\n but also the longest training runtime.\n\ + \n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n # Maps Vertex Forecasting time units to BQML time units.\n unit_map\ + \ = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day':\ + \ 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year':\ + \ 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is\ + \ not None:\n if data_granularity_unit.lower() not in unit_map:\n \ + \ raise ValueError(\n f'{data_granularity_unit} is not a valid\ + \ time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n\ + \ query_parameters.append({\n 'name': 'data_granularity_unit',\n\ + \ 'parameterType': {\n 'type': 'STRING'\n },\n\ + \ 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n\ + \ },\n })\n if max_order is not None:\n query_parameters.append({\n\ + \ 'name': 'max_order',\n 'parameterType': {\n 'type':\ + \ 'INTEGER'\n },\n 'parameterValue': {\n 'value':\ + \ str(max_order)\n },\n })\n if forecast_horizon is not None:\n\ + \ if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n\ + \ 'name': 'forecast_horizon',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': str(forecast_horizon)\n },\n })\n if splits\ + \ is not None:\n query_parameters.append({\n 'name': 'splits',\n\ + \ 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType':\ + \ {\n 'type': 'STRING'\n },\n },\n \ + \ 'parameterValue': {\n 'arrayValues': [{\n \ + \ 'value': split\n } for split in splits],\n },\n \ + \ })\n\n if window is not None:\n query_parameters.append({\n \ + \ 'name': 'prediction_count',\n 'parameterType': {\n \ + \ 'type': 'INTEGER'\n },\n 'parameterValue': {\n \ + \ 'value': window['count']\n },\n })\n\n start_time = window['start_time']\ + \ if window else str(datetime.datetime.max)\n query_parameters.append({\n\ + \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ + \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ + \ },\n })\n return query_parameters\n\n" + image: python:3.7-slim + exec-cond: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - cond + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ + \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ + \ return true_str if predicate else false_str\n\n" + image: python:3.7-slim + exec-create-metrics-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - create_metrics_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_metrics_artifact(\n metrics_rows: List[Dict[str, str]],\n\ + \ evaluation_metrics: dsl.Output[dsl.Metrics],\n) -> None:\n \"\"\"\ + Converts the rows of a metrics table into an Artifact.\"\"\"\n # Use the\ + \ Vertex Eval component's Metrics metadata naming from\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/metadata/schema/google/artifact_schema.py?cl=467006447&l=344\n\ + \ metric_name_map = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE':\ + \ 'rootMeanSquaredError',\n 'MAPE': 'meanAbsolutePercentageError',\n\ + \ }\n metrics = {metric_name_map[k]: v for k, v in dict(metrics_rows[0]).items()}\n\ + \ evaluation_metrics.metadata = metrics\n\n" + image: python:3.7-slim + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-get-fte-suffix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_fte_suffix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ + \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ + \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n for\ + \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ + \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ + \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ + \n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-get-value: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_value + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ + \n" + image: python:3.7-slim + exec-get-window-query-priority: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_window_query_priority + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_window_query_priority(\n window: Dict[str, str],\n \ + \ max_interactive: int = 100,\n) -> str:\n \"\"\"Returns a query priority\ + \ depending on the window number.\"\"\"\n if int(window['window_number'])\ + \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ + \n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-query-with-retry: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-query-with-retry-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-query-with-retry-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - query_with_retry + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef query_with_retry(\n project: str,\n location: str,\n \ + \ query: str,\n query_parameters: Optional[list] = None, # pylint:\ + \ disable=g-bare-generic\n job_configuration_query: Optional[dict] =\ + \ None, # pylint: disable=g-bare-generic\n max_retry_count: int = 5,\n\ + \ retry_wait_seconds: int = 10, # Waits up to 4 minutes before 5th retry.\n\ + \ destination_uri: str = '',\n) -> str:\n \"\"\"Runs a query and retries\ + \ on failure.\n\n Args:\n project: The GCP project.\n location: The\ + \ GCP region.\n query: The query to run.\n query_parameters: A list\ + \ of query parameters.\n job_configuration_query: Additional query job\ + \ configurations.\n max_retry_count: Maximum number of times to retry\ + \ the query.\n retry_wait_seconds: Approximate initial number of seconds\ + \ to wait before\n making another query attempt with exponential backoff.\n\ + \ destination_uri: Optional BigQuery URI to output if the query succeeds.\n\ + \n Returns:\n The given destination URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import logging\n import random\n import time\n\n from google.api_core\ + \ import exceptions\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n query_parameters = query_parameters or []\n job_configuration_query\ + \ = job_configuration_query or {}\n client = bigquery.Client(project=project,\ + \ location=location)\n\n job_configuration_query['queryParameters'] = query_parameters\n\ + \ job_config = bigquery.QueryJobConfig.from_api_repr(\n {'query':\ + \ job_configuration_query})\n retry_count = 0\n while True:\n try:\n\ + \ client.query(query, job_config=job_config).result()\n break\n\ + \ except (exceptions.BadRequest, exceptions.Forbidden) as e:\n if\ + \ retry_count >= max_retry_count:\n logging.info('Maximum retries\ + \ reached.')\n raise\n wait_time = (\n retry_wait_seconds\ + \ * (2 ** retry_count) * random.uniform(1, 1.5))\n logging.info(\n\ + \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ + \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ + \n" + image: python:3.7-slim + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Trains a BQML ARIMA_PLUS model. + name: automl-tabular-bqml-arima-train +root: + dag: + outputs: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: create-metrics-artifact-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_granularity_unit: + componentInputParameter: data_granularity_unit + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_order: + componentInputParameter: max_order + pipelinechannel--override_destination: + componentInputParameter: override_destination + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--window_column: + componentInputParameter: window_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, resources will be created under a new dataset in the project. + + Unlike in Vertex Forecasting, all resources will be given hardcoded names + + under this dataset, and the model artifact will also be exported here.' + isOptional: true + parameterType: STRING + data_granularity_unit: + description: 'The data granularity unit. Accepted values are: + + minute, hour, day, week, month, year.' + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_order: + defaultValue: 5.0 + description: 'Integer between 1 and 5 representing the size of the parameter + + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + + but also the longest training runtime.' + isOptional: true + parameterType: NUMBER_INTEGER + override_destination: + defaultValue: false + description: 'Whether to overwrite the metrics and evaluated + + examples tables if they already exist. If this is False and the tables + + exist, this pipeline will fail.' + isOptional: true + parameterType: BOOLEAN + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + defaultValue: '' + description: 'Name of the column that should be used to filter input rows. + + The column should contain either booleans or string booleans; if the value + + of the row is True, generate a sliding window from that row.' + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: -1.0 + description: 'Number of rows that should be used to generate input + + examples. If the total row count is larger than this number, the input + + data will be randomly sampled to hit the count.' + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + defaultValue: -1.0 + description: 'Step length used to generate input examples. Every + + window_stride_length rows will be used to generate a sliding window.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + create-metrics-artifact-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml new file mode 100644 index 0000000000..6cdb273900 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -0,0 +1,2150 @@ +# PIPELINE DEFINITION +# Name: prophet-predict +# Description: Creates a batch prediction using a Prophet model. +# Inputs: +# bigquery_destination_uri: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# encryption_spec_key_name: str [Default: ''] +# location: str +# machine_type: str [Default: 'n1-standard-2'] +# max_num_workers: int [Default: 10.0] +# model_name: str +# project: str +# target_column: str +# time_column: str +# time_series_identifier_column: str +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bigquery-query-job-2: + executorLabel: exec-bigquery-query-job-2 + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-build-job-configuration-query-2: + executorLabel: exec-build-job-configuration-query-2 + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - build-job-configuration-query + - get-first-valid + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--get-first-valid-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-first-valid + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n base_data AS (\n SELECT\ + \ * FROM `{{$.inputs.parameters['pipelinechannel--get-first-valid-Output']}}`\n\ + \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ \n \n \n FROM base_data\n GROUP\ + \ BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + taskInfo: + name: remove-feature-columns + bigquery-query-job-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job-2 + dependentTasks: + - build-job-configuration-query-2 + - get-table-location-2 + - table-to-uri-2 + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query-2 + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location-2 + pipelinechannel--table-to-uri-2-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n predictions AS (\n SELECT\n\ + \ {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ JSON_QUERY_ARRAY(prediction, '$.{{$.inputs.parameters['pipelinechannel--time_column']}}')\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ JSON_EXTRACT(\n prediction,\n \ + \ '$.predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}'\n\ + \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ JSON_QUERY_ARRAY(\n prediction,\n \ + \ '$.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}'\n\ + \ ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ FROM `{{$.inputs.parameters['pipelinechannel--table-to-uri-2-uri']}}`\n\ + \ )\n SELECT\n {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ + \"',\n predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}}\n\ + \ ) AS predicted_on_{{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ PARSE_TIMESTAMP(\n '\\\"%Y-%m-%dT%H:%M:%SZ\\\ + \"',\n {{$.inputs.parameters['pipelinechannel--time_column']}}[SAFE_OFFSET(index)]\n\ + \ ) AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ STRUCT(\n CAST(predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}[SAFE_OFFSET(index)]\ + \ AS FLOAT64)\n AS value\n ) AS predicted_{{$.inputs.parameters['pipelinechannel--target_column']}}\n\ + \ FROM predictions\n CROSS JOIN\n UNNEST(GENERATE_ARRAY(0,\ + \ ARRAY_LENGTH({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ - 1)) AS index\n " + taskInfo: + name: create-predictions-table + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: data + write_disposition: + runtimeValue: + constant: WRITE_EMPTY + taskInfo: + name: build-job-configuration-query + build-job-configuration-query-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query-2 + dependentTasks: + - table-to-uri-2 + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-dataset_id'']}}' + pipelinechannel--table-to-uri-2-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: table-to-uri-2 + pipelinechannel--table-to-uri-2-table_id: + taskOutputParameter: + outputParameterKey: table_id + producerTask: table-to-uri-2 + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-project_id'']}}' + table_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--table-to-uri-2-table_id'']}}' + write_disposition: + runtimeValue: + constant: WRITE_TRUNCATE + taskInfo: + name: build-job-configuration-query-2 + get-first-valid: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-first-valid + dependentTasks: + - load-table-from-uri + inputs: + parameters: + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + pipelinechannel--load-table-from-uri-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: load-table-from-uri + values: + runtimeValue: + constant: '["{{$.inputs.parameters[''pipelinechannel--data_source_bigquery_table_path'']}}", + "{{$.inputs.parameters[''pipelinechannel--load-table-from-uri-Output'']}}"]' + taskInfo: + name: get-first-valid + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + get-table-location-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location-2 + dependentTasks: + - table-to-uri-2 + inputs: + parameters: + project: + componentInputParameter: pipelinechannel--project + table: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri-2 + taskInfo: + name: get-table-location-2 + load-table-from-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-load-table-from-uri + dependentTasks: + - bigquery-create-dataset + - get-table-location + inputs: + parameters: + destination: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}.csv_export' + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + source_format: + runtimeValue: + constant: CSV + source_uris: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: load-table-from-uri + make-vertex-model-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-make-vertex-model-artifact + inputs: + parameters: + location: + componentInputParameter: pipelinechannel--location + model_resource_name: + componentInputParameter: pipelinechannel--model_name + taskInfo: + name: make-vertex-model-artifact + maybe-replace-with-default: + cachingOptions: + enableCache: true + componentRef: + name: comp-maybe-replace-with-default + inputs: + parameters: + default: + componentInputParameter: pipelinechannel--project + value: + componentInputParameter: pipelinechannel--bigquery_destination_uri + taskInfo: + name: maybe-replace-with-default + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + dependentTasks: + - make-vertex-model-artifact + - maybe-replace-with-default + - table-to-uri + inputs: + artifacts: + model: + taskOutputArtifact: + outputArtifactKey: vertex_model + producerTask: make-vertex-model-artifact + parameters: + bigquery_destination_output_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--maybe-replace-with-default-Output']}} + bigquery_source_input_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--machine_type + max_replica_count: + componentInputParameter: pipelinechannel--max_num_workers + pipelinechannel--maybe-replace-with-default-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: maybe-replace-with-default + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-batch-predict + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + taskInfo: + name: table-to-uri + table-to-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri-2 + dependentTasks: + - model-batch-predict + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: bigquery_output_table + producerTask: model-batch-predict + taskInfo: + name: table-to-uri-2 + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + bigquery_destination_uri: + componentInputParameter: pipelinechannel--bigquery_destination_uri + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--bigquery_destination_uri: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--machine_type: + parameterType: STRING + pipelinechannel--max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--model_name: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + comp-get-first-valid: + executorLabel: exec-get-first-valid + inputDefinitions: + parameters: + values: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location-2: + executorLabel: exec-get-table-location-2 + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-load-table-from-uri: + executorLabel: exec-load-table-from-uri + inputDefinitions: + parameters: + destination: + description: Table into which data is to be loaded. + parameterType: STRING + location: + description: The GCP region. + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + source_format: + defaultValue: CSV + description: 'The file format for the files being imported. Only CSV is + + supported.' + isOptional: true + parameterType: STRING + source_uris: + description: 'URIs of data files to be loaded; in format + + gs:///.' + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-make-vertex-model-artifact: + executorLabel: exec-make-vertex-model-artifact + inputDefinitions: + parameters: + location: + parameterType: STRING + model_resource_name: + parameterType: STRING + outputDefinitions: + artifacts: + vertex_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-maybe-replace-with-default: + executorLabel: exec-maybe-replace-with-default + inputDefinitions: + parameters: + default: + defaultValue: '' + isOptional: true + parameterType: STRING + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-table-to-uri-2: + executorLabel: exec-table-to-uri-2 + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-bigquery-query-job-2: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-build-job-configuration-query-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-get-first-valid: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_first_valid + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first\ + \ truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n for value in json.loads(values):\n if value:\n return value\n\ + \ raise ValueError('No valid values.')\n\n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-get-table-location-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-load-table-from-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - load_table_from_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n\ + \ source_uris: str,\n destination: str,\n source_format: str =\ + \ 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n\ + \ project: The GCP project.\n location: The GCP region.\n source_uris:\ + \ URIs of data files to be loaded; in format\n gs:///.\n\ + \ destination: Table into which data is to be loaded.\n source_format:\ + \ The file format for the files being imported. Only CSV is\n supported.\n\ + \n Returns:\n The destination table containing imported data.\n \"\"\ + \"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not source_uris:\n return ''\n\n csv_list = [filename.strip()\ + \ for filename in source_uris.split(',')]\n client = bigquery.Client(project=project,\ + \ location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True,\ + \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ + \ destination=destination,\n project=project,\n location=location,\n\ + \ job_config=job_config).result()\n return destination\n\n" + image: python:3.7-slim + exec-make-vertex-model-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - make_vertex_model_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef make_vertex_model_artifact(\n location: str,\n model_resource_name:\ + \ str,\n vertex_model: dsl.Output[dsl.Artifact],\n) -> None:\n \"\"\"\ + Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ + \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ + \ f'/v1/{model_resource_name}')\n\n" + image: python:3.7-slim + exec-maybe-replace-with-default: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - maybe_replace_with_default + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ + \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ + \n return default if not value else value\n\n" + image: python:3.7-slim + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-table-to-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Creates a batch prediction using a Prophet model. + name: prophet-predict +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--bigquery_destination_uri: + componentInputParameter: bigquery_destination_uri + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--machine_type: + componentInputParameter: machine_type + pipelinechannel--max_num_workers: + componentInputParameter: max_num_workers + pipelinechannel--model_name: + componentInputParameter: model_name + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + bigquery_destination_uri: + defaultValue: '' + description: 'URI of the desired destination dataset. If not + + specified, resources will be created under a new dataset in the project. + + Unlike in Vertex Forecasting, all resources will be given hardcoded names + + under this dataset, and the model artifact will also be exported here.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + location: + description: The GCP region for Vertex AI. + parameterType: STRING + machine_type: + defaultValue: n1-standard-2 + description: The machine type used for batch prediction. + isOptional: true + parameterType: STRING + max_num_workers: + defaultValue: 10.0 + description: The max number of workers used for batch prediction. + isOptional: true + parameterType: NUMBER_INTEGER + model_name: + description: 'The name of the Model resource, in a form of + + projects/{project}/locations/{location}/models/{model}.' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py new file mode 100644 index 0000000000..7c3bb6111b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -0,0 +1,211 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Prophet trainer component spec.""" + +from typing import Optional +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Output + + +# pylint: disable=g-doc-args,unused-argument +@dsl.container_component +def prophet_trainer( + project: str, + location: str, + root_dir: str, + target_column: str, + time_column: str, + time_series_identifier_column: str, + forecast_horizon: int, + window_column: str, + data_granularity_unit: str, + predefined_split_column: str, + source_bigquery_uri: str, + gcp_resources: dsl.OutputPath(str), + unmanaged_container_model: Output[UnmanagedContainerModel], + evaluated_examples_directory: Output[Artifact], + optimization_objective: Optional[str] = 'rmse', + max_num_trials: Optional[int] = 6, + encryption_spec_key_name: Optional[str] = '', + dataflow_max_num_workers: Optional[int] = 10, + dataflow_machine_type: Optional[str] = 'n1-standard-1', + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_service_account: Optional[str] = '', + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, +): + # fmt: off + """Trains and tunes one Prophet model per time series using Dataflow. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the + time series. + time_series_identifier_column: Name of the column that identifies + the time series. + target_column: Name of the column that the model is to predict + values for. + forecast_horizon: The number of time periods into the future for + which forecasts will be created. Future periods start after the latest + timestamp for each time series. + optimization_objective: Optimization objective for tuning. Supported + metrics come from Prophet's performance_metrics function. These are mse, + rmse, mae, mape, mdape, smape, and coverage. + data_granularity_unit: String representing the units of time for the + time column. + predefined_split_column: The predefined_split column name. A string + that represents a list of comma separated CSV filenames. + source_bigquery_uri: The BigQuery table path of format + bq (str)://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter + input rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding window + from that row. + max_num_trials: Maximum number of tuning trials to perform + per time series. There are up to 100 possible combinations to explore + for each time series. Recommended values to try are 3, 6, and 24. + encryption_spec_key_name: Customer-managed encryption key. + dataflow_machine_type: The dataflow machine type used for + training. + dataflow_max_num_workers: The max number of Dataflow + workers used for training. + dataflow_disk_size_gb: Dataflow worker's disk size in GB + during training. + dataflow_service_account: Custom service account to run + dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + + Returns: + gcp_resources: Serialized gcp_resources proto tracking the custom training + job. + unmanaged_container_model: The UnmanagedContainerModel artifact. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + '{"display_name": ' + + f'"prophet-trainer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}", ', + '"encryption_spec": {"kms_key_name":"', + encryption_spec_key_name, + '"}, ', + '"job_spec": {"worker_pool_specs": [{"replica_count":"1", ', + '"machine_spec": {"machine_type": "n1-standard-4"}, ', + ( + '"container_spec":' + ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", ' + ), + '"args": ["prophet_trainer", "', + f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "', + ( + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", "' + ), + ( + '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325", "' + ), + '--artifacts_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/model/", "', + '--evaluated_examples_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/eval/", "', + '--region=', + location, + '", "', + '--source_bigquery_uri=', + source_bigquery_uri, + '", "', + '--target_column=', + target_column, + '", "', + '--time_column=', + time_column, + '", "', + '--time_series_identifier_column=', + time_series_identifier_column, + '", "', + '--forecast_horizon=', + forecast_horizon, + '", "', + '--window_column=', + window_column, + '", "', + '--optimization_objective=', + optimization_objective, + '", "', + '--data_granularity_unit=', + data_granularity_unit, + '", "', + '--predefined_split_column=', + predefined_split_column, + '", "', + '--max_num_trials=', + max_num_trials, + '", "', + '--dataflow_project=', + project, + '", "', + '--dataflow_max_num_workers=', + dataflow_max_num_workers, + '", "', + '--dataflow_machine_type=', + dataflow_machine_type, + '", "', + '--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "', + '--dataflow_service_account=', + dataflow_service_account, + '", "', + '--dataflow_subnetwork=', + dataflow_subnetwork, + '", "', + '--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "', + '--gcp_resources_path=', + gcp_resources, + '", "', + '--executor_input={{$.json_escape[1]}}"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml new file mode 100644 index 0000000000..2fadb6830e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -0,0 +1,2958 @@ +# PIPELINE DEFINITION +# Name: prophet-train +# Description: Trains one Prophet model per time series. +# Inputs: +# data_granularity_unit: str +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# encryption_spec_key_name: str [Default: ''] +# evaluation_dataflow_disk_size_gb: int [Default: 40.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-1'] +# evaluation_dataflow_max_num_workers: int [Default: 10.0] +# forecast_horizon: int +# location: str +# max_num_trials: int [Default: 6.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# project: str +# root_dir: str +# run_evaluation: bool [Default: True] +# target_column: str +# test_fraction: float [Default: -1.0] +# time_column: str +# time_series_identifier_column: str +# timestamp_split_key: str [Default: ''] +# trainer_dataflow_disk_size_gb: int [Default: 40.0] +# trainer_dataflow_machine_type: str [Default: 'n1-standard-1'] +# trainer_dataflow_max_num_workers: int [Default: 10.0] +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# window_column: str [Default: ''] +# window_max_count: int [Default: -1.0] +# window_stride_length: int [Default: -1.0] +components: + comp-bigquery-create-dataset: + executorLabel: exec-bigquery-create-dataset + inputDefinitions: + parameters: + dataset: + parameterType: STRING + exists_ok: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + comp-bigquery-delete-dataset-with-prefix: + executorLabel: exec-bigquery-delete-dataset-with-prefix + inputDefinitions: + parameters: + dataset_prefix: + parameterType: STRING + delete_contents: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + project: + parameterType: STRING + comp-bigquery-query-job: + executorLabel: exec-bigquery-query-job + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: 'Describes the Cloud + + KMS encryption key that will be used to protect destination + + BigQuery table. The BigQuery Service Account associated with your + + project requires access to this encryption key. If + + encryption_spec_key_name are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + job_configuration_query: + defaultValue: {} + description: 'A json formatted string + + describing the rest of the job configuration. For more details, see + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels associated with this job. You can + + use these to organize and group your jobs. Label keys and values can + + be no longer than 63 characters, can only containlowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. Label values are optional. Label keys must start with a + + letter and each label in the list must have a different key. + + Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BigQuery job. If not + + set, default to `US` multi-region. For more details, see + + https://cloud.google.com/bigquery/docs/locations#specifying_your_location' + isOptional: true + parameterType: STRING + project: + description: Project to run the BigQuery query job. + parameterType: STRING + query: + defaultValue: '' + description: 'SQL query text to execute. Only standard SQL is + + supported. If query are both specified in here and in + + job_configuration_query, the value in here will override the other + + one.' + isOptional: true + parameterType: STRING + query_parameters: + defaultValue: [] + description: 'jobs.query parameters for + + standard SQL queries. If query_parameters are both specified in here + + and in job_configuration_query, the value in here will override the + + other one.' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + destination_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Describes the table where the query results should be stored. + + This property must be set for large results that exceed the maximum + + response size. + + For queries that produce anonymous (cached) results, this field will + + be populated by BigQuery.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the BigQuery job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-build-job-configuration-query: + executorLabel: exec-build-job-configuration-query + inputDefinitions: + parameters: + dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + priority: + defaultValue: INTERACTIVE + isOptional: true + parameterType: STRING + project_id: + defaultValue: '' + isOptional: true + parameterType: STRING + table_id: + defaultValue: '' + isOptional: true + parameterType: STRING + write_disposition: + defaultValue: '' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRUCT + comp-condition-2: + dag: + tasks: + model-evaluation-regression: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-regression + inputs: + artifacts: + predictions_gcs_source: + componentInputArtifact: pipelinechannel--prophet-trainer-evaluated_examples_directory + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_gcs_source: + runtimeValue: + constant: [] + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + prediction_score_column: + runtimeValue: + constant: prediction.predicted_{{$.inputs.parameters['pipelinechannel--target_column']}} + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + target_field_name: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: model-evaluation-regression + inputDefinitions: + artifacts: + pipelinechannel--prophet-trainer-evaluated_examples_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + comp-exit-handler-1: + dag: + tasks: + bigquery-create-dataset: + cachingOptions: {} + componentRef: + name: comp-bigquery-create-dataset + dependentTasks: + - get-table-location + - validate-inputs + inputs: + parameters: + dataset: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: create-tmp-dataset + bigquery-query-job: + cachingOptions: + enableCache: true + componentRef: + name: comp-bigquery-query-job + dependentTasks: + - bigquery-create-dataset + - build-job-configuration-query + - get-fte-suffix + - get-table-location + inputs: + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + job_configuration_query: + taskOutputParameter: + outputParameterKey: Output + producerTask: build-job-configuration-query + location: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-table-location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--time_column: + componentInputParameter: pipelinechannel--time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + project: + componentInputParameter: pipelinechannel--project + query: + runtimeValue: + constant: "\n WITH\n base_data AS (\n SELECT\ + \ * FROM `{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelinechannel--bigquery-create-dataset-dataset_id']}}.fte_time_series_output_{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}`\n\ + \ )\n SELECT\n CAST({{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\ + \ AS STRING) AS {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}},\n\ + \ ARRAY_AGG(TIMESTAMP({{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--time_column']}},\n\ + \ ARRAY_AGG({{$.inputs.parameters['pipelinechannel--target_column']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS {{$.inputs.parameters['pipelinechannel--target_column']}},\n\ + \ ARRAY_AGG(split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ ARRAY_AGG(window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}}\ + \ ORDER BY {{$.inputs.parameters['pipelinechannel--time_column']}})\ + \ AS window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}},\n\ + \ FROM base_data\n GROUP BY {{$.inputs.parameters['pipelinechannel--time_series_identifier_column']}}\n\ + \ " + taskInfo: + name: aggregate-by-time-series-id + build-job-configuration-query: + cachingOptions: + enableCache: true + componentRef: + name: comp-build-job-configuration-query + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}' + table_id: + runtimeValue: + constant: data + write_disposition: + runtimeValue: + constant: WRITE_EMPTY + taskInfo: + name: build-job-configuration-query + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - prophet-trainer + inputs: + artifacts: + pipelinechannel--prophet-trainer-evaluated_examples_directory: + taskOutputArtifact: + outputArtifactKey: evaluated_examples_directory + producerTask: prophet-trainer + parameters: + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: run-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--run_evaluation'] + == true + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + dependentTasks: + - bigquery-create-dataset + inputs: + parameters: + autodetect_csv_schema: + runtimeValue: + constant: 1.0 + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + forecasting_apply_windowing: + runtimeValue: + constant: 0.0 + forecasting_context_window: + runtimeValue: + constant: 0.0 + forecasting_forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + forecasting_predefined_window_column: + componentInputParameter: pipelinechannel--window_column + forecasting_time_column: + componentInputParameter: pipelinechannel--time_column + forecasting_time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_window_max_count: + componentInputParameter: pipelinechannel--window_max_count + forecasting_window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + runtimeValue: + constant: time_series + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + runtimeValue: + constant: {} + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + taskInfo: + name: feature-transform-engine + get-fte-suffix: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-fte-suffix + dependentTasks: + - bigquery-create-dataset + - feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + runtimeValue: + constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' + fte_table: + runtimeValue: + constant: fte_time_series_output + location: + componentInputParameter: pipelinechannel--location + pipelinechannel--bigquery-create-dataset-dataset_id: + taskOutputParameter: + outputParameterKey: dataset_id + producerTask: bigquery-create-dataset + pipelinechannel--bigquery-create-dataset-project_id: + taskOutputParameter: + outputParameterKey: project_id + producerTask: bigquery-create-dataset + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: get-fte-suffix + get-table-location: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-table-location + inputs: + parameters: + default_location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + table: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + taskInfo: + name: get-table-location + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - prophet-trainer + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: prophet-trainer + parameters: + description: + runtimeValue: + constant: Prophet model. + display_name: + runtimeValue: + constant: prophet_{{$.pipeline_job_uuid}} + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + prophet-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-prophet-trainer + dependentTasks: + - get-fte-suffix + - table-to-uri + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--trainer_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--trainer_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--trainer_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + forecast_horizon: + componentInputParameter: pipelinechannel--forecast_horizon + location: + componentInputParameter: pipelinechannel--location + max_num_trials: + componentInputParameter: pipelinechannel--max_num_trials + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--get-fte-suffix-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: get-fte-suffix + pipelinechannel--table-to-uri-uri: + taskOutputParameter: + outputParameterKey: uri + producerTask: table-to-uri + predefined_split_column: + runtimeValue: + constant: split__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + source_bigquery_uri: + runtimeValue: + constant: bq://{{$.inputs.parameters['pipelinechannel--table-to-uri-uri']}} + target_column: + componentInputParameter: pipelinechannel--target_column + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + window_column: + runtimeValue: + constant: window__{{$.inputs.parameters['pipelinechannel--get-fte-suffix-Output']}} + taskInfo: + name: prophet-trainer + table-to-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-table-to-uri + dependentTasks: + - bigquery-query-job + inputs: + artifacts: + table: + taskOutputArtifact: + outputArtifactKey: destination_table + producerTask: bigquery-query-job + taskInfo: + name: table-to-uri + validate-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-validate-inputs + inputs: + parameters: + data_granularity_unit: + componentInputParameter: pipelinechannel--data_granularity_unit + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--data_source_csv_filenames + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + time_column: + componentInputParameter: pipelinechannel--time_column + time_series_identifier_column: + componentInputParameter: pipelinechannel--time_series_identifier_column + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + window_column: + componentInputParameter: pipelinechannel--window_column + window_max_count: + componentInputParameter: pipelinechannel--window_max_count + window_stride_length: + componentInputParameter: pipelinechannel--window_stride_length + taskInfo: + name: validate-inputs + inputDefinitions: + parameters: + pipelinechannel--data_granularity_unit: + parameterType: STRING + pipelinechannel--data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--data_source_csv_filenames: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--forecast_horizon: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--max_num_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--time_column: + parameterType: STRING + pipelinechannel--time_series_identifier_column: + parameterType: STRING + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--trainer_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--trainer_dataflow_machine_type: + parameterType: STRING + pipelinechannel--trainer_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--window_column: + parameterType: STRING + pipelinechannel--window_max_count: + parameterType: NUMBER_INTEGER + pipelinechannel--window_stride_length: + parameterType: NUMBER_INTEGER + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + defaultValue: '' + description: 'Forecasting + + time series identifier column.' + isOptional: true + parameterType: STRING + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-get-fte-suffix: + executorLabel: exec-get-fte-suffix + inputDefinitions: + parameters: + bigquery_staging_full_dataset_id: + parameterType: STRING + fte_table: + parameterType: STRING + location: + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-get-table-location: + executorLabel: exec-get-table-location + inputDefinitions: + parameters: + default_location: + defaultValue: '' + description: Location to return if no table was given. + isOptional: true + parameterType: STRING + project: + description: The GCP project. + parameterType: STRING + table: + description: The BigQuery table to get a location for. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-model-evaluation-regression: + executorLabel: exec-model-evaluation-regression + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The managed Vertex Model used for + + predictions job, if using Vertex batch prediction. Must share the same + + location as the provided input argument `location`.' + isOptional: true + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*". For explanation results, the files + + should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + ground_truth_bigquery_source: + defaultValue: '' + description: 'Required for custom tabular. + + The BigQuery table uri representing where the ground truth is located. + + Used to provide ground truth for each prediction instance when they are + + not part of the batch prediction jobs prediction instance.' + isOptional: true + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + description: 'Required for custom tabular and non + + tabular data. The file format for the ground truth files. `jsonl`, + + `csv`, and `bigquery` are the allowed formats. If not set, defaulted to + + `jsonl`.' + isOptional: true + parameterType: STRING + ground_truth_gcs_source: + defaultValue: [] + description: 'Required for custom + + tabular and non tabular data. The GCS uris representing where the ground + + truth is located. Used to provide ground truth for each prediction + + instance when they are not part of the batch prediction jobs prediction + + instance.' + isOptional: true + parameterType: LIST + location: + defaultValue: us-central1 + description: 'Location for running the evaluation. If not set, + + defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + description: 'The column name of the field + + containing batch prediction scores. Formatted to be able to find nested + + columns, delimited by `.`. If not set, defaulted to `prediction.scores` + + for classification.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run evaluation container. + parameterType: STRING + target_field_name: + description: 'The full name path of the features target field + + in the predictions file. Formatted to be able to find nested columns, + + delimited by `.`. Alternatively referred to as the ground truth (or + + ground_truth_column) field.' + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'google.ClassificationMetrics representing the classification + + evaluation metrics in GCS.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'An artifact of a model + + which to upload a new version to. Only specify this field when + + uploading a new version.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: "The unmanaged container model to be uploaded. The model can\n\ + be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ + \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ + \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" + isOptional: true + parameters: + description: + defaultValue: '' + description: The description of the model. + isOptional: true + parameterType: STRING + display_name: + description: 'The display name of the Model. The name + + can be up to 128 characters long and can be consist of any UTF-8 + + characters.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key spec for a Model. If set, this Model and all sub-resources of this + + Model will be secured by this key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + description: 'Metadata describing the Model''s + + input and output for explanation. Both `explanation_metadata` and + + `explanation_parameters` must be passed together when used. For more + + details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your model. Label keys and values can be no longer than 64 + + characters (Unicode codepoints), can only contain lowercase letters, + + numeric characters, underscores and dashes. International characters + + are allowed. See https://goo.gl/xmQnxf for more information and + + examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Optional location to upload this model to. If + + not set, default to us-central1.' + isOptional: true + parameterType: STRING + project: + description: Project to upload this model to. + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: Artifact tracking the created model. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the upload model''s + long + + running operation. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-prophet-trainer: + executorLabel: exec-prophet-trainer + inputDefinitions: + parameters: + data_granularity_unit: + description: 'String representing the units of time for the + + time column.' + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB + + during training.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-1 + description: 'The dataflow machine type used for + + training.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow + + workers used for training.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used.' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + forecast_horizon: + description: 'The number of time periods into the future for + + which forecasts will be created. Future periods start after the latest + + timestamp for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_num_trials: + defaultValue: 6.0 + description: 'Maximum number of tuning trials to perform + + per time series. There are up to 100 possible combinations to explore + + for each time series. Recommended values to try are 3, 6, and 24.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + defaultValue: rmse + description: 'Optimization objective for tuning. Supported + + metrics come from Prophet''s performance_metrics function. These are mse, + + rmse, mae, mape, mdape, smape, and coverage.' + isOptional: true + parameterType: STRING + predefined_split_column: + description: 'The predefined_split column name. A string + + that represents a list of comma separated CSV filenames.' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + source_bigquery_uri: + description: 'The BigQuery table path of format + + bq (str)://bq_project.bq_dataset.bq_table' + parameterType: STRING + target_column: + description: 'Name of the column that the model is to predict + + values for.' + parameterType: STRING + time_column: + description: 'Name of the column that identifies time order in the + + time series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies + + the time series.' + parameterType: STRING + window_column: + description: 'Name of the column that should be used to filter + + input rows. The column should contain either booleans or string + + booleans; if the value of the row is True, generate a sliding window + + from that row.' + parameterType: STRING + outputDefinitions: + artifacts: + evaluated_examples_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: The UnmanagedContainerModel artifact. + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the custom training + + job.' + parameterType: STRING + comp-table-to-uri: + executorLabel: exec-table-to-uri + inputDefinitions: + artifacts: + table: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + use_bq_prefix: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + outputDefinitions: + parameters: + dataset_id: + parameterType: STRING + project_id: + parameterType: STRING + table_id: + parameterType: STRING + uri: + parameterType: STRING + comp-validate-inputs: + executorLabel: exec-validate-inputs + inputDefinitions: + parameters: + bigquery_destination_uri: + isOptional: true + parameterType: STRING + data_granularity_unit: + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + isOptional: true + parameterType: STRING + data_source_csv_filenames: + isOptional: true + parameterType: STRING + optimization_objective: + isOptional: true + parameterType: STRING + predefined_split_key: + isOptional: true + parameterType: STRING + source_model_uri: + isOptional: true + parameterType: STRING + target_column: + isOptional: true + parameterType: STRING + test_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + isOptional: true + parameterType: STRING + time_series_identifier_column: + isOptional: true + parameterType: STRING + timestamp_split_key: + isOptional: true + parameterType: STRING + training_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + isOptional: true + parameterType: STRING + window_max_count: + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + isOptional: true + parameterType: NUMBER_INTEGER +deploymentSpec: + executors: + exec-bigquery-create-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_create_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n\ + \ dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs',\ + \ [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery\ + \ dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n\n from google.cloud import bigquery\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n ref\ + \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ + \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ + \ ref.project, ref.dataset_id)\n\n" + image: python:3.7-slim + exec-bigquery-delete-dataset-with-prefix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - bigquery_delete_dataset_with_prefix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n \ + \ dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n\ + \ \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n\ + \ # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project)\n for dataset in client.list_datasets(project=project):\n\ + \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ + \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ + \n" + image: python:3.7-slim + exec-bigquery-query-job: + container: + args: + - --type + - BigqueryQueryJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --payload + - '{"Concat": ["{", "\"configuration\": {", "\"query\": ", "{{$.inputs.parameters[''job_configuration_query'']}}", + ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}", "}"]}' + - --job_configuration_query_override + - '{"Concat": ["{", "\"query\": \"", "{{$.inputs.parameters[''query'']}}", + "\"", ", \"query_parameters\": ", "{{$.inputs.parameters[''query_parameters'']}}", + ", \"destination_encryption_configuration\": {", "\"kmsKeyName\": \"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-build-job-configuration-query: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - build_job_configuration_query + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef build_job_configuration_query(\n project_id: str = '',\n \ + \ dataset_id: str = '',\n table_id: str = '',\n write_disposition:\ + \ str = '',\n priority: str = 'INTERACTIVE',\n) -> dict: # pylint: disable=g-bare-generic\n\ + \ \"\"\"Creates a JobConfigurationQuery object.\"\"\"\n config = {\n \ + \ 'priority': priority,\n }\n if all([project_id, dataset_id, table_id]):\n\ + \ config['destinationTable'] = {\n 'projectId': project_id,\n\ + \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ + \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ + \ return config\n\n" + image: python:3.7-slim + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + exec-get-fte-suffix: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_fte_suffix + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_fte_suffix(\n project: str,\n location: str,\n bigquery_staging_full_dataset_id:\ + \ str,\n fte_table: str,\n) -> str:\n \"\"\"Infers the FTE suffix from\ + \ the intermediate FTE table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n client = bigquery.Client(project=project, location=location)\n for\ + \ table in client.list_tables(bigquery_staging_full_dataset_id):\n if\ + \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ + \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ + \n" + image: python:3.7-slim + exec-get-table-location: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_table_location + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_table_location(\n project: str,\n table: Optional[str],\n\ + \ default_location: str = '',\n) -> str:\n \"\"\"Returns the region\ + \ the given table belongs to.\n\n Args:\n project: The GCP project.\n\ + \ table: The BigQuery table to get a location for.\n default_location:\ + \ Location to return if no table was given.\n\n Returns:\n A GCP region\ + \ or multi-region.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n if not table:\n return default_location\n\n client = bigquery.Client(project=project)\n\ + \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ + \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ + \ return client.get_table(table).location\n\n" + image: python:3.7-slim + exec-model-evaluation-regression: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - regression + - --target_field_name + - '{"Concat": ["instance.", "{{$.inputs.parameters[''target_field_name'']}}"]}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - '{"IfPresent": {"InputName": "model", "Then": ["--model_name", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}"]}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --ground_truth_gcs_source + - '{{$.inputs.parameters[''ground_truth_gcs_source'']}}' + - --ground_truth_bigquery_source + - '{{$.inputs.parameters[''ground_truth_bigquery_source'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --dataflow_job_prefix + - evaluation-regression-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name + ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.model.upload_model.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-prophet-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"prophet-trainer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", + ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": + {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325\", + ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", + \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325\", + \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325\", + \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", + \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", + "\", \"", "--source_bigquery_uri=", "{{$.inputs.parameters[''source_bigquery_uri'']}}", + "\", \"", "--target_column=", "{{$.inputs.parameters[''target_column'']}}", + "\", \"", "--time_column=", "{{$.inputs.parameters[''time_column'']}}", + "\", \"", "--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}", + "\", \"", "--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}", + "\", \"", "--window_column=", "{{$.inputs.parameters[''window_column'']}}", + "\", \"", "--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}", + "\", \"", "--data_granularity_unit=", "{{$.inputs.parameters[''data_granularity_unit'']}}", + "\", \"", "--predefined_split_column=", "{{$.inputs.parameters[''predefined_split_column'']}}", + "\", \"", "--max_num_trials=", "{{$.inputs.parameters[''max_num_trials'']}}", + "\", \"", "--dataflow_project=", "{{$.inputs.parameters[''project'']}}", + "\", \"", "--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"", "--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"", "--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"", "--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"", "--dataflow_subnetwork=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"", "--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"", "--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"", "--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-table-to-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - table_to_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n use_bq_prefix:\ + \ bool = False,\n) -> NamedTuple(\n 'Outputs',\n [\n ('project_id',\ + \ str),\n ('dataset_id', str),\n ('table_id', str),\n \ + \ ('uri', str),\n ],\n):\n \"\"\"Converts a google.BQTable to a URI.\"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n\ + \ table.metadata['tableId'],\n ]\n bq_uri = '.'.join(outputs)\n \ + \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ + \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ + \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + image: python:3.7-slim + exec-validate-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - validate_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n \ + \ time_series_identifier_column: Optional[str] = None,\n target_column:\ + \ Optional[str] = None,\n data_source_bigquery_table_path: Optional[str]\ + \ = None,\n training_fraction: Optional[float] = None,\n validation_fraction:\ + \ Optional[float] = None,\n test_fraction: Optional[float] = None,\n\ + \ predefined_split_key: Optional[str] = None,\n timestamp_split_key:\ + \ Optional[str] = None,\n data_source_csv_filenames: Optional[str] =\ + \ None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri:\ + \ Optional[str] = None,\n window_column: Optional[str] = None,\n window_stride_length:\ + \ Optional[int] = None,\n window_max_count: Optional[int] = None,\n \ + \ optimization_objective: Optional[str] = None,\n data_granularity_unit:\ + \ Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input\ + \ parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n\ + \ import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\ + \n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n\ + \ dataset_pattern = r'[a-zA-Z0-9_]+'\n table_pattern = r'[^\\.\\:`]+'\n\ + \ dataset_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}')\n\ + \ table_uri_pattern = re.compile(\n f'(bq://)?{project_pattern}[.:]{dataset_pattern}[.:]{table_pattern}')\n\ + \n # Validate BigQuery column and dataset names.\n bigquery_column_parameters\ + \ = [\n time_column,\n time_series_identifier_column,\n target_column,\n\ + \ ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n \ + \ for column in bigquery_column_parameters:\n if column and not column_pattern.fullmatch(column):\n\ + \ raise ValueError(f'Invalid column name: {column}.')\n if (bigquery_destination_uri\ + \ and\n not dataset_uri_pattern.fullmatch(bigquery_destination_uri)):\n\ + \ raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n\ + \ if (source_model_uri and not table_uri_pattern.fullmatch(source_model_uri)):\n\ + \ raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\ + \n # Validate data source.\n data_source_count = sum([bool(source) for\ + \ source in [\n data_source_bigquery_table_path, data_source_csv_filenames]])\n\ + \ if data_source_count > 1:\n raise ValueError(f'Expected 1 data source,\ + \ found {data_source_count}.')\n if (data_source_bigquery_table_path\n\ + \ and not table_uri_pattern.fullmatch(data_source_bigquery_table_path)):\n\ + \ raise ValueError(\n f'Invalid BigQuery table URI: {data_source_bigquery_table_path}.')\n\ + \ gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if data_source_csv_filenames:\n\ + \ csv_list = [filename.strip()\n for filename in data_source_csv_filenames.split(',')]\n\ + \ for gcs_path in csv_list:\n if not gcs_path_pattern.fullmatch(gcs_path):\n\ + \ raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\ + \n # Validate split spec.\n fraction_splits = [\n training_fraction,\n\ + \ validation_fraction,\n test_fraction,\n ]\n fraction_splits\ + \ = [None if fraction == -1 else fraction\n for fraction\ + \ in fraction_splits]\n split_count = sum([\n bool(source)\n \ + \ for source in [predefined_split_key,\n any(fraction_splits)]\n\ + \ ])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type,\ + \ found {split_count}.')\n if (predefined_split_key and\n not column_pattern.fullmatch(predefined_split_key)):\n\ + \ raise ValueError(f'Invalid column name: {predefined_split_key}.')\n\ + \ if any(fraction_splits):\n if not all(fraction_splits):\n raise\ + \ ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n\ + \ if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction\ + \ splits must sum to 1. Got: {sum(fraction_splits)}.')\n if (timestamp_split_key\ + \ and\n not column_pattern.fullmatch(timestamp_split_key)):\n raise\ + \ ValueError(f'Invalid column name: {timestamp_split_key}.')\n if timestamp_split_key\ + \ and not all(fraction_splits):\n raise ValueError('All fractions must\ + \ be non-zero for timestamp split.')\n\n # Validate window config.\n if\ + \ window_stride_length == -1:\n window_stride_length = None\n if window_max_count\ + \ == -1:\n window_max_count = None\n window_configs = [window_column,\ + \ window_stride_length, window_max_count]\n window_config_count = sum([bool(config)\ + \ for config in window_configs])\n if window_config_count > 1:\n raise\ + \ ValueError(f'Expected 1 window config, found {window_config_count}.')\n\ + \ if window_column and not column_pattern.fullmatch(window_column):\n \ + \ raise ValueError(f'Invalid column name: {window_column}.')\n if window_stride_length\ + \ and (window_stride_length < 1 or\n window_stride_length\ + \ > 1000):\n raise ValueError('Stride must be between 1 and 1000. Got:\ + \ '\n f'{window_stride_length}.')\n if window_max_count\ + \ and (window_max_count < 1000 or\n window_max_count\ + \ > int(1e8)):\n raise ValueError('Max count must be between 1000 and\ + \ 100000000. Got: '\n f'{window_max_count}.')\n\n #\ + \ Validate eval metric.\n valid_optimization_objectives = ['rmse', 'mae',\ + \ 'rmsle']\n if optimization_objective:\n if optimization_objective\ + \ not in valid_optimization_objectives:\n raise ValueError(\n \ + \ 'Optimization objective should be one of the following: '\n \ + \ f'{valid_optimization_objectives}, got: {optimization_objective}.')\n\ + \n # Validate data granularity unit.\n valid_data_granularity_units =\ + \ [\n 'minute', 'hour', 'day', 'week', 'month', 'year']\n if data_granularity_unit:\n\ + \ if data_granularity_unit not in valid_data_granularity_units:\n \ + \ raise ValueError(\n 'Granularity unit should be one of the\ + \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ + \n" + image: python:3.7-slim +pipelineInfo: + description: Trains one Prophet model per time series. + name: prophet-train +root: + dag: + tasks: + bigquery-delete-dataset-with-prefix: + cachingOptions: {} + componentRef: + name: comp-bigquery-delete-dataset-with-prefix + dependentTasks: + - exit-handler-1 + inputs: + parameters: + dataset_prefix: + runtimeValue: + constant: tmp_{{$.pipeline_job_uuid}} + delete_contents: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: project + taskInfo: + name: delete-tmp-dataset + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--data_granularity_unit: + componentInputParameter: data_granularity_unit + pipelinechannel--data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + pipelinechannel--data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--forecast_horizon: + componentInputParameter: forecast_horizon + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--max_num_trials: + componentInputParameter: max_num_trials + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--time_column: + componentInputParameter: time_column + pipelinechannel--time_series_identifier_column: + componentInputParameter: time_series_identifier_column + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--trainer_dataflow_disk_size_gb: + componentInputParameter: trainer_dataflow_disk_size_gb + pipelinechannel--trainer_dataflow_machine_type: + componentInputParameter: trainer_dataflow_machine_type + pipelinechannel--trainer_dataflow_max_num_workers: + componentInputParameter: trainer_dataflow_max_num_workers + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--window_column: + componentInputParameter: window_column + pipelinechannel--window_max_count: + componentInputParameter: window_max_count + pipelinechannel--window_stride_length: + componentInputParameter: window_stride_length + taskInfo: + name: exit-handler-1 + inputDefinitions: + parameters: + data_granularity_unit: + description: 'String representing the units of time for the time + + column.' + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used.' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB during + + evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-1 + description: 'The dataflow machine type used for + + evaluation.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow workers used + + for evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + forecast_horizon: + description: 'The number of time periods into the future for which + + forecasts will be created. Future periods start after the latest timestamp + + for each time series.' + parameterType: NUMBER_INTEGER + location: + description: The GCP region for Vertex AI. + parameterType: STRING + max_num_trials: + defaultValue: 6.0 + description: 'Maximum number of tuning trials to perform per time series. + + There are up to 100 possible combinations to explore for each time series. + + Recommended values to try are 3, 6, and 24.' + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + description: Optimization objective for the model. + parameterType: STRING + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_evaluation: + defaultValue: true + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + target_column: + description: Name of the column that the model is to predict values for. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + description: 'Name of the column that identifies time order in the time + + series.' + parameterType: STRING + time_series_identifier_column: + description: 'Name of the column that identifies the time + + series.' + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + trainer_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB during + + training.' + isOptional: true + parameterType: NUMBER_INTEGER + trainer_dataflow_machine_type: + defaultValue: n1-standard-1 + description: The dataflow machine type used for training. + isOptional: true + parameterType: STRING + trainer_dataflow_max_num_workers: + defaultValue: 10.0 + description: 'The max number of Dataflow workers used + + for training.' + isOptional: true + parameterType: NUMBER_INTEGER + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + window_column: + defaultValue: '' + description: 'Name of the column that should be used to filter input rows. + + The column should contain either booleans or string booleans; if the value + + of the row is True, generate a sliding window from that row.' + isOptional: true + parameterType: STRING + window_max_count: + defaultValue: -1.0 + description: 'Number of rows that should be used to generate input + + examples. If the total row count is larger than this number, the input + + data will be randomly sampled to hit the count.' + isOptional: true + parameterType: NUMBER_INTEGER + window_stride_length: + defaultValue: -1.0 + description: 'Step length used to generate input examples. Every + + window_stride_length rows will be used to generate a sliding window.' + isOptional: true + parameterType: NUMBER_INTEGER +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py new file mode 100644 index 0000000000..b69d5430a5 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py @@ -0,0 +1,341 @@ +"""Util functions for Vertex Forecasting pipelines.""" + +import os +import pathlib +from typing import Any, Dict, Tuple + +_GCPC_FORECASTING_PATH = pathlib.Path(__file__).parent.resolve() + + +def get_bqml_arima_train_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + forecast_horizon: int, + data_granularity_unit: str, + predefined_split_key: str = '', + timestamp_split_key: str = '', + training_fraction: float = -1.0, + validation_fraction: float = -1.0, + test_fraction: float = -1.0, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + window_column: str = '', + window_stride_length: int = -1, + window_max_count: int = -1, + bigquery_destination_uri: str = '', + override_destination: bool = False, + max_order: int = 5, + run_evaluation: bool = True, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + forecast_horizon: The number of time periods into the future for which + forecasts will be created. Future periods start after the latest timestamp + for each time series. + data_granularity_unit: The data granularity unit. Accepted values are: + minute, hour, day, week, month, year. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter input rows. + The column should contain either booleans or string booleans; if the value + of the row is True, generate a sliding window from that row. + window_stride_length: Step length used to generate input examples. Every + window_stride_length rows will be used to generate a sliding window. + window_max_count: Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the input + data will be randomly sampled to hit the count. + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, resources will be created under a new dataset in the project. + Unlike in Vertex Forecasting, all resources will be given hardcoded names + under this dataset, and the model artifact will also be exported here. + override_destination: Whether to overwrite the metrics and evaluated + examples tables if they already exist. If this is False and the tables + exist, this pipeline will fail. + max_order: Integer between 1 and 5 representing the size of the parameter + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + but also the longest training runtime. + run_evaluation: Whether to run evaluation steps during training. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'forecast_horizon': forecast_horizon, + 'data_granularity_unit': data_granularity_unit, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'window_column': window_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'bigquery_destination_uri': bigquery_destination_uri, + 'override_destination': override_destination, + 'max_order': max_order, + 'run_evaluation': run_evaluation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'bqml_arima_train_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_bqml_arima_predict_pipeline_and_parameters( + project: str, + location: str, + model_name: str, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + bigquery_destination_uri: str = '', + generate_explanation: bool = False, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS prediction pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + model_name: ARIMA_PLUS BQML model URI. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, a resource will be created under a new dataset in the project. + generate_explanation: Generate explanation along with the batch prediction + results. This will cause the batch prediction output to include + explanations. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'model_name': model_name, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_destination_uri': bigquery_destination_uri, + 'generate_explanation': generate_explanation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'bqml_arima_predict_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_prophet_train_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + forecast_horizon: int, + optimization_objective: str, + data_granularity_unit: str, + predefined_split_key: str = '', + timestamp_split_key: str = '', + training_fraction: float = -1.0, + validation_fraction: float = -1.0, + test_fraction: float = -1.0, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + window_column: str = '', + window_stride_length: int = -1, + window_max_count: int = -1, + max_num_trials: int = 6, + trainer_dataflow_machine_type: str = 'n1-standard-1', + trainer_dataflow_max_num_workers: int = 10, + trainer_dataflow_disk_size_gb: int = 40, + evaluation_dataflow_machine_type: str = 'n1-standard-1', + evaluation_dataflow_max_num_workers: int = 10, + evaluation_dataflow_disk_size_gb: int = 40, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + run_evaluation: bool = True, +) -> Tuple[str, Dict[str, Any]]: + """Returns Prophet train pipeline and formatted parameters. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + root_dir: The Cloud Storage location to store the output. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + forecast_horizon: The number of time periods into the future for which + forecasts will be created. Future periods start after the latest timestamp + for each time series. + optimization_objective: Optimization objective for the model. + data_granularity_unit: String representing the units of time for the time + column. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter input rows. + The column should contain either booleans or string booleans; if the value + of the row is True, generate a sliding window from that row. + window_stride_length: Step length used to generate input examples. Every + window_stride_length rows will be used to generate a sliding window. + window_max_count: Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the input + data will be randomly sampled to hit the count. + max_num_trials: Maximum number of tuning trials to perform per time series. + trainer_dataflow_machine_type: The dataflow machine type used for training. + trainer_dataflow_max_num_workers: The max number of Dataflow workers used + for training. + trainer_dataflow_disk_size_gb: Dataflow worker's disk size in GB during + training. + evaluation_dataflow_machine_type: The dataflow machine type used for + evaluation. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers used + for evaluation. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB during + evaluation. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + run_evaluation: Whether to run evaluation steps during training. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'forecast_horizon': forecast_horizon, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'window_column': window_column, + 'window_stride_length': window_stride_length, + 'window_max_count': window_max_count, + 'max_num_trials': max_num_trials, + 'optimization_objective': optimization_objective, + 'data_granularity_unit': data_granularity_unit, + 'trainer_dataflow_machine_type': trainer_dataflow_machine_type, + 'trainer_dataflow_max_num_workers': trainer_dataflow_max_num_workers, + 'trainer_dataflow_disk_size_gb': trainer_dataflow_disk_size_gb, + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'dataflow_service_account': dataflow_service_account, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'run_evaluation': run_evaluation, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'prophet_trainer_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values + + +def get_prophet_prediction_pipeline_and_parameters( + project: str, + location: str, + model_name: str, + time_column: str, + time_series_identifier_column: str, + target_column: str, + data_source_csv_filenames: str = '', + data_source_bigquery_table_path: str = '', + bigquery_destination_uri: str = '', + machine_type: str = 'n1-standard-2', + max_num_workers: int = 10, +) -> Tuple[str, Dict[str, Any]]: + """Returns Prophet prediction pipeline and formatted parameters. + + Unlike the prediction server for Vertex Forecasting, the Prophet prediction + server returns predictions batched by time series id. This pipeline shows how + these predictions can be disaggregated to get results similar to what Vertex + Forecasting provides. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region for Vertex AI. + model_name: The name of the Model resource, in a form of + projects/{project}/locations/{location}/models/{model}. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column: Name of the column that the model is to predict values for. + data_source_csv_filenames: A string that represents a list of comma + separated CSV filenames. + data_source_bigquery_table_path: The BigQuery table path of format + bq://bq_project.bq_dataset.bq_table + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, resources will be created under a new dataset in the project. + machine_type: The machine type used for batch prediction. + max_num_workers: The max number of workers used for batch prediction. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'model_name': model_name, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column': target_column, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_destination_uri': bigquery_destination_uri, + 'machine_type': machine_type, + 'max_num_workers': max_num_workers, + } + pipeline_definition_path = os.path.join( + _GCPC_FORECASTING_PATH, 'prophet_predict_pipeline.yaml' + ) + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py new file mode 100644 index 0000000000..2522350d36 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py @@ -0,0 +1,37 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GA AutoML tabular components.""" + +from google_cloud_pipeline_components.v1.automl.tabular.cv_trainer import automl_tabular_cv_trainer as CvTrainerOp +from google_cloud_pipeline_components.v1.automl.tabular.ensemble import automl_tabular_ensemble as EnsembleOp +from google_cloud_pipeline_components.v1.automl.tabular.finalizer import automl_tabular_finalizer as FinalizerOp +from google_cloud_pipeline_components.v1.automl.tabular.infra_validator import automl_tabular_infra_validator as InfraValidatorOp +from google_cloud_pipeline_components.v1.automl.tabular.split_materialized_data import split_materialized_data as SplitMaterializedDataOp +from google_cloud_pipeline_components.v1.automl.tabular.stage_1_tuner import automl_tabular_stage_1_tuner as Stage1TunerOp +from google_cloud_pipeline_components.v1.automl.tabular.stats_and_example_gen import tabular_stats_and_example_gen as StatsAndExampleGenOp +from google_cloud_pipeline_components.v1.automl.tabular.training_configurator_and_validator import training_configurator_and_validator as TrainingConfiguratorAndValidatorOp +from google_cloud_pipeline_components.v1.automl.tabular.transform import automl_tabular_transform as TransformOp + +__all__ = [ + 'CvTrainerOp', + 'InfraValidatorOp', + 'Stage1TunerOp', + 'EnsembleOp', + 'StatsAndExampleGenOp', + 'TransformOp', + 'FinalizerOp', + 'SplitMaterializedDataOp', + 'TrainingConfiguratorAndValidatorOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml new file mode 100644 index 0000000000..3c4fbb6d46 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -0,0 +1,11149 @@ +# PIPELINE DEFINITION +# Name: automl-tabular +# Description: The AutoML Tabular pipeline v1. +# Inputs: +# additional_experiments: dict +# cv_trainer_worker_pool_specs_override: list +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# disable_early_stopping: bool [Default: False] +# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# distill_batch_predict_max_replica_count: int [Default: 25.0] +# distill_batch_predict_starting_replica_count: int [Default: 25.0] +# enable_probabilistic_inference: bool [Default: False] +# encryption_spec_key_name: str [Default: ''] +# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_explain_max_replica_count: int [Default: 10.0] +# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] +# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] +# evaluation_batch_predict_max_replica_count: int [Default: 20.0] +# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] +# evaluation_dataflow_disk_size_gb: int [Default: 50.0] +# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] +# evaluation_dataflow_max_num_workers: int [Default: 100.0] +# evaluation_dataflow_starting_num_workers: int [Default: 10.0] +# export_additional_model_without_custom_ops: bool [Default: False] +# fast_testing: bool [Default: False] +# location: str +# model_description: str [Default: ''] +# model_display_name: str [Default: ''] +# optimization_objective: str +# optimization_objective_precision_value: float [Default: -1.0] +# optimization_objective_recall_value: float [Default: -1.0] +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# quantiles: list +# root_dir: str +# run_distillation: bool [Default: False] +# run_evaluation: bool [Default: False] +# stage_1_num_parallel_trials: int [Default: 35.0] +# stage_1_tuner_worker_pool_specs_override: list +# stage_1_tuning_result_artifact_uri: str [Default: ''] +# stage_2_num_parallel_trials: int [Default: 35.0] +# stage_2_num_selected_trials: int [Default: 5.0] +# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] +# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] +# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] +# stratified_split_key: str [Default: ''] +# study_spec_parameters_override: list +# target_column: str +# test_fraction: float [Default: -1.0] +# timestamp_split_key: str [Default: ''] +# train_budget_milli_node_hours: float +# training_fraction: float [Default: -1.0] +# transform_dataflow_disk_size_gb: int [Default: 40.0] +# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] +# transform_dataflow_max_num_workers: int [Default: 25.0] +# transformations: str +# validation_fraction: float [Default: -1.0] +# vertex_dataset: system.Artifact +# weight_column: str [Default: ''] +# Outputs: +# feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-3-feature_attributions: system.Metrics +# feature-attribution-feature_attributions: system.Metrics +# model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-3-evaluation_metrics: system.Metrics +# model-evaluation-evaluation_metrics: system.Metrics +components: + comp-automl-tabular-cv-trainer: + executorLabel: exec-automl-tabular-cv-trainer + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-cv-trainer-2: + executorLabel: exec-automl-tabular-cv-trainer-2 + inputDefinitions: + artifacts: + materialized_cv_splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized cross-validation splits. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: AutoML Tabular tuning result. + parameters: + deadline_hours: + description: Number of hours the cross-validation trainer should run. + parameterType: NUMBER_DOUBLE + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble: + executorLabel: exec-automl-tabular-ensemble + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-2: + executorLabel: exec-automl-tabular-ensemble-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-3: + executorLabel: exec-automl-tabular-ensemble-3 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-finalizer: + executorLabel: exec-automl-tabular-finalizer + inputDefinitions: + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-infra-validator: + executorLabel: exec-automl-tabular-infra-validator + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-2: + executorLabel: exec-automl-tabular-infra-validator-2 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-infra-validator-3: + executorLabel: exec-automl-tabular-infra-validator-3 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' + comp-automl-tabular-stage-1-tuner: + executorLabel: exec-automl-tabular-stage-1-tuner + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-stage-1-tuner-2: + executorLabel: exec-automl-tabular-stage-1-tuner-2 + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform: + executorLabel: exec-automl-tabular-transform + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-transform-2: + executorLabel: exec-automl-tabular-transform-2 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized test split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized train split. + training_schema_uri: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The training schema. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-bool-identity: + executorLabel: exec-bool-identity + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-2: + executorLabel: exec-bool-identity-2 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-bool-identity-3: + executorLabel: exec-bool-identity-3 + inputDefinitions: + parameters: + value: + description: Boolean value to return + parameterType: BOOLEAN + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-calculate-training-parameters: + executorLabel: exec-calculate-training-parameters + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-calculate-training-parameters-2: + executorLabel: exec-calculate-training-parameters-2 + inputDefinitions: + parameters: + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + is_skip_architecture_search: + defaultValue: false + description: 'If component is being called in the + + skip_architecture_search pipeline.' + isOptional: true + parameterType: BOOLEAN + run_distillation: + description: Whether to run distill in the training pipeline. + parameterType: BOOLEAN + stage_1_num_parallel_trials: + description: Number of parallel trails for stage 1. + parameterType: NUMBER_INTEGER + stage_2_num_parallel_trials: + description: Number of parallel trails for stage 2. + parameterType: NUMBER_INTEGER + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + outputDefinitions: + parameters: + distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + reduce_search_space_mode: + parameterType: STRING + stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_1_num_selected_trials: + parameterType: NUMBER_INTEGER + stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + stage_2_deadline_hours: + parameterType: NUMBER_DOUBLE + stage_2_single_run_max_secs: + parameterType: NUMBER_INTEGER + comp-condition-2: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-3 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-3 + tasks: + automl-tabular-cv-trainer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer + dependentTasks: + - calculate-training-parameters + - importer + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer + automl-tabular-ensemble: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble + dependentTasks: + - automl-tabular-cv-trainer + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble + automl-tabular-infra-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + taskInfo: + name: automl-tabular-infra-validator + bool-identity: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity + calculate-training-parameters: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 1.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - automl-tabular-ensemble + - bool-identity + - model-upload + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + pipelinechannel--model-upload-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + pipelinechannel--bool-identity-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] + == 'true' + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: importer + model-upload: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload + dependentTasks: + - automl-tabular-ensemble + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation + tasks: + feature-attribution: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution + dependentTasks: + - model-batch-explanation + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution + model-batch-explanation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation + model-batch-predict: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict + model-evaluation: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation + dependentTasks: + - model-batch-predict + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation + model-evaluation-import: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import + dependentTasks: + - feature-attribution + - model-evaluation + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation + model: + componentInputArtifact: pipelinechannel--model-upload-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-4: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-5 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-7 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-5 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-7 + tasks: + automl-tabular-cv-trainer-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-cv-trainer-2 + dependentTasks: + - automl-tabular-stage-1-tuner + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_cv_splits: + componentInputArtifact: pipelinechannel--merge-materialized-splits-splits + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_2_deadline_hours + producerTask: calculate-training-parameters-2 + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_2_single_run_max_secs + producerTask: calculate-training-parameters-2 + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + taskInfo: + name: automl-tabular-cv-trainer-2 + automl-tabular-ensemble-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-2 + dependentTasks: + - automl-tabular-cv-trainer-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-cv-trainer-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-2 + automl-tabular-infra-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-2 + dependentTasks: + - automl-tabular-ensemble-2 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + taskInfo: + name: automl-tabular-infra-validator-2 + automl-tabular-stage-1-tuner: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + materialized_eval_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split + materialized_train_split: + componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output + parameters: + deadline_hours: + taskOutputParameter: + outputParameterKey: stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + taskOutputParameter: + outputParameterKey: stage_1_num_selected_trials + producerTask: calculate-training-parameters-2 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + root_dir: + componentInputParameter: pipelinechannel--root_dir + single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner + bool-identity-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-2 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_evaluation + taskInfo: + name: bool-identity-2 + bool-identity-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-bool-identity-3 + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--run_distillation + taskInfo: + name: bool-identity-3 + calculate-training-parameters-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-calculate-training-parameters-2 + inputs: + parameters: + fast_testing: + componentInputParameter: pipelinechannel--fast_testing + is_skip_architecture_search: + runtimeValue: + constant: 0.0 + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: calculate-training-parameters-2 + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-2 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-2 + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: no-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'false' + condition-7: + componentRef: + name: comp-condition-7 + dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 + - calculate-training-parameters-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + pipelinechannel--tabular-stats-and-example-gen-eval_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + pipelinechannel--tabular-stats-and-example-gen-metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + pipelinechannel--tabular-stats-and-example-gen-test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + pipelinechannel--tabular-stats-and-example-gen-train_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + parameters: + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: distill_stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: is-distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'true' + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-transform-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--merge-materialized-splits-splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-5: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-6 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-6 + tasks: + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - model-upload-2 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + pipelinechannel--model-upload-2-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-2 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-upload-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + description: + componentInputParameter: pipelinechannel--model_description + display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-6: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-2 + tasks: + feature-attribution-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-2 + dependentTasks: + - model-batch-explanation-2 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-2 + model-batch-explanation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-2 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-2 + model-batch-predict-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-2 + model-evaluation-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-2 + dependentTasks: + - model-batch-predict-2 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-2 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-2 + model-evaluation-import-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-2 + dependentTasks: + - feature-attribution-2 + - model-evaluation-2 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-2 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-2 + model: + componentInputArtifact: pipelinechannel--model-upload-2-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-2-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-7: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-8 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-8 + tasks: + automl-tabular-ensemble-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-ensemble-3 + dependentTasks: + - automl-tabular-stage-1-tuner-2 + - automl-tabular-transform-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + instance_baseline: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + tuning_result_input: + taskOutputArtifact: + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner-2 + warmup_data: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-ensemble-3 + automl-tabular-infra-validator-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-infra-validator-3 + dependentTasks: + - automl-tabular-ensemble-3 + inputs: + artifacts: + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + taskInfo: + name: automl-tabular-infra-validator-3 + automl-tabular-stage-1-tuner-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner-2 + dependentTasks: + - automl-tabular-transform-2 + inputs: + artifacts: + materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform-2 + materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform-2 + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform-2 + parameters: + deadline_hours: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + runtimeValue: + constant: 1.0 + single_run_max_secs: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner-2 + automl-tabular-transform-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform-2 + dependentTasks: + - write-bp-result-path + - write-bp-result-path-2 + inputs: + artifacts: + dataset_schema: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema + eval_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path-2 + metadata: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata + test_split: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split + train_split: + taskOutputArtifact: + outputArtifactKey: result + producerTask: write-bp-result-path + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform-2 + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - automl-tabular-ensemble-3 + - model-upload-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + pipelinechannel--model-upload-3-model: + taskOutputArtifact: + outputArtifactKey: model + producerTask: model-upload-3 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + taskInfo: + name: is-evaluation + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + model-batch-predict-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-3 + dependentTasks: + - read-input-uri + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-train-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-3 + model-batch-predict-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-4 + dependentTasks: + - read-input-uri-2 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + taskOutputParameter: + outputParameterKey: Output + producerTask: read-input-uri-2 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-eval-split + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: tf-record + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-4 + model-upload-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-3 + dependentTasks: + - automl-tabular-ensemble-3 + - automl-tabular-infra-validator-3 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + parameters: + display_name: + runtimeValue: + constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-3 + read-input-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split + taskInfo: + name: read-input-uri + read-input-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-read-input-uri-2 + inputs: + artifacts: + split_uri: + componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split + taskInfo: + name: read-input-uri-2 + write-bp-result-path: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path + dependentTasks: + - model-batch-predict-3 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-3 + taskInfo: + name: write-bp-result-path + write-bp-result-path-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-write-bp-result-path-2 + dependentTasks: + - model-batch-predict-4 + inputs: + artifacts: + bp_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-4 + taskInfo: + name: write-bp-result-path-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + pipelinechannel--tabular-stats-and-example-gen-train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + parameters: + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-condition-8: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-3 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-3 + tasks: + feature-attribution-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-3 + dependentTasks: + - model-batch-explanation-3 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-3 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-3 + model-batch-explanation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-3 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-3 + model-batch-predict-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-5 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + gcs_source_uris: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + instances_format: + runtimeValue: + constant: tf-record + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-5 + model-evaluation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-3 + dependentTasks: + - model-batch-predict-5 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-5 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-3 + model-evaluation-import-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-3 + dependentTasks: + - feature-attribution-3 + - model-evaluation-3 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-3 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-3 + model: + componentInputArtifact: pipelinechannel--model-upload-3-model + parameters: + dataset_paths: + componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json + dataset_type: + runtimeValue: + constant: tf-record + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-3 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-3-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + parameterType: LIST + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + automl-tabular-transform: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-transform + dependentTasks: + - tabular-stats-and-example-gen + inputs: + artifacts: + dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: automl-tabular-transform + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - automl-tabular-transform + - merge-materialized-splits + - string-not-empty + - tabular-stats-and-example-gen + inputs: + artifacts: + pipelinechannel--automl-tabular-transform-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + pipelinechannel--automl-tabular-transform-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: automl-tabular-transform + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--tabular-stats-and-example-gen-dataset_schema: + taskOutputArtifact: + outputArtifactKey: dataset_schema + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-eval_split: + taskOutputArtifact: + outputArtifactKey: eval_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split: + taskOutputArtifact: + outputArtifactKey: test_split + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-train_split: + taskOutputArtifact: + outputArtifactKey: train_split + producerTask: tabular-stats-and-example-gen + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: + taskOutputParameter: + outputParameterKey: downsampled_test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--tabular-stats-and-example-gen-test_split_json: + taskOutputParameter: + outputParameterKey: test_split_json + producerTask: tabular-stats-and-example-gen + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: pipelinechannel--transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - automl-tabular-transform + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: automl-tabular-transform + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: automl-tabular-transform + taskInfo: + name: merge-materialized-splits + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: string-not-empty + tabular-stats-and-example-gen: + cachingOptions: + enableCache: true + componentRef: + name: comp-tabular-stats-and-example-gen + inputs: + parameters: + additional_experiments_json: + componentInputParameter: pipelinechannel--additional_experiments + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + quantiles: + componentInputParameter: pipelinechannel--quantiles + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + componentInputParameter: pipelinechannel--run_distillation + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column_name: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + timestamp_split_key: + componentInputParameter: pipelinechannel--timestamp_split_key + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + transformations: + runtimeValue: + constant: '[]' + transformations_path: + componentInputParameter: pipelinechannel--transformations + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column_name: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: tabular-stats-and-example-gen + inputDefinitions: + parameters: + pipelinechannel--additional_experiments: + parameterType: STRUCT + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--location: + parameterType: STRING + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + parameterType: STRING + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--timestamp_split_key: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--transform_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--transform_dataflow_machine_type: + parameterType: STRING + pipelinechannel--transform_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--transformations: + parameterType: STRING + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-3: + executorLabel: exec-feature-attribution-3 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-3: + executorLabel: exec-model-batch-explanation-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-3: + executorLabel: exec-model-batch-predict-3 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-4: + executorLabel: exec-model-batch-predict-4 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-batch-predict-5: + executorLabel: exec-model-batch-predict-5 + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + bigquery_source_input_uri: + defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' + isOptional: true + parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' + isOptional: true + parameterType: STRING + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + isOptional: true + parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-model-evaluation: + executorLabel: exec-model-evaluation + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-2: + executorLabel: exec-model-evaluation-2 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-3: + executorLabel: exec-model-evaluation-3 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import: + executorLabel: exec-model-evaluation-import + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-2: + executorLabel: exec-model-evaluation-import-2 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-3: + executorLabel: exec-model-evaluation-import-3 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-2: + executorLabel: exec-model-upload-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload-3: + executorLabel: exec-model-upload-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + project: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-read-input-uri: + executorLabel: exec-read-input-uri + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-read-input-uri-2: + executorLabel: exec-read-input-uri-2 + inputDefinitions: + artifacts: + split_uri: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: Tbe path to the file that contains Dataset data. + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-set-optional-inputs: + executorLabel: exec-set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset when data source is Vertex dataset. + parameters: + data_source_bigquery_table_path: + description: The BigQuery table when data source is BQ. + parameterType: STRING + data_source_csv_filenames: + description: The CSV GCS path when data source is CSV. + parameterType: STRING + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_display_name: + description: The uploaded model's display name. + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + outputDefinitions: + parameters: + data_source_bigquery_table_path: + parameterType: STRING + data_source_csv_filenames: + parameterType: STRING + model_display_name: + parameterType: STRING + comp-string-not-empty: + executorLabel: exec-string-not-empty + inputDefinitions: + parameters: + value: + description: String value to be checked. + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-tabular-stats-and-example-gen: + executorLabel: exec-tabular-stats-and-example-gen + inputDefinitions: + parameters: + additional_experiments: + defaultValue: '' + isOptional: true + parameterType: STRING + additional_experiments_json: + defaultValue: {} + isOptional: true + parameterType: STRUCT + data_source_bigquery_table_path: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More + + details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + enable_probabilistic_inference: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: 'Location for running dataset statistics and example + + generation.' + parameterType: STRING + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_type: + description: 'The prediction type. Supported values: + + "classification", "regression".' + parameterType: STRING + project: + description: 'Project to run dataset statistics and example + + generation.' + parameterType: STRING + quantiles: + defaultValue: [] + isOptional: true + parameterType: LIST + request_type: + defaultValue: COLUMN_STATS_ONLY + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + stratified_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + target_column_name: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + transformations: + description: 'Quote escaped JSON string for transformations. Each + + transformation will apply transform function to given input column. And + + the result will be used for training. When creating transformation for + + BigQuery Struct column, the column should be flattened using "." as the + + delimiter.' + parameterType: STRING + transformations_path: + defaultValue: '' + description: 'Path to a GCS file containing JSON + + string for transformations.' + isOptional: true + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column_name: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + eval_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The eval split. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The instance baseline used to calculate explanations. + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + test_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The test split. + train_split: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The train split. + parameters: + downsampled_test_split_json: + description: The downsampled test split JSON object. + parameterType: LIST + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + test_split_json: + description: The test split JSON object. + parameterType: LIST + comp-write-bp-result-path: + executorLabel: exec-write-bp-result-path + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-write-bp-result-path-2: + executorLabel: exec-write-bp-result-path-2 + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + artifacts: + result: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-automl-tabular-cv-trainer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-cv-trainer-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", + "{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", + "{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", + "{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", + "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", + \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-3: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-finalizer: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-infra-validator: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-2: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-3: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-stage-1-tuner: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-stage-1-tuner-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-transform-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", + "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", + \"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", + "{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", + "{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", + "\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", + \"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", + "\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", + "\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", + "\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", + "\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-bool-identity: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-bool-identity-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _bool_identity + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ + \ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ + \ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-calculate-training-parameters: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-calculate-training-parameters-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _calculate_training_parameters + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ + \ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ + \ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ + \ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ + \ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ + \ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ + \ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ + \ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ + \"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ + \ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ + \ The train budget of creating this model,\n expressed in milli node\ + \ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ + \ Number of parallel trails for stage 2.\n run_distillation: Whether\ + \ to run distill in the training pipeline.\n is_skip_architecture_search:\ + \ If component is being called in the\n skip_architecture_search pipeline.\n\ + \ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ + \ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ + \ stage_1_num_selected_trials: Number of selected trails for stage\ + \ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ + \ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ + \ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ + \ Maximum number seconds to for a single stage\n 2\n training\ + \ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ + \ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ + \ The reduce search space mode. Possible values:\n minimal, regular,\ + \ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ + \ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ + \ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ + \ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ + \ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ + \ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ + \ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ + \ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ + \ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ + \ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ + \ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ + \ # All of magic number \"1.3\" above is because the trial doesn't\n\ + \ # always finish in time_per_trial. 1.3 is an empirical safety margin\ + \ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ + \ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ + \ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ + \ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ + \ case. Phase 2\n # can't finish in time after the deadline is cut,\ + \ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ + \ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ + \ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ + \ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ + \ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ + \ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ + \ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ + \ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ + \ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ + \ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ + \ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ + \ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ + \ of magic number \"1.3\" above is because the trial doesn't always\n \ + \ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ + \ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ + \ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ + \ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ + \ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ + \ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ + \ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ + \ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ + \ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ + \ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ + \ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ + \ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ + \ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ + \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ + \ reduce_search_space_mode,\n )\n\n" + image: python:3.7 + exec-feature-attribution: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-2: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-3: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-importer: + importer: + artifactUri: + runtimeParameter: uri + typeSchema: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + exec-merge-materialized-splits: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _merge_materialized_splits + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ + \ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ + ):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ + \ first materialized split.\n split_1: The second materialized split.\n\ + \ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ + \ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ + \ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ + \ f.write(','.join([split_0_content, split_1_content]))\n\n" + image: python:3.7 + exec-model-batch-explanation: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-4: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-5: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-evaluation: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-2: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-3: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-import: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-2: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-3: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-upload: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-2: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-upload-3: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-read-input-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-read-input-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _read_input_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ + ) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ + \ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ + \ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ + \ list of string that represents the batch prediction input files.\n \"\ + \"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ + \ return data_source['tf_record_data_source']['file_patterns']\n\n" + image: python:3.7 + exec-set-optional-inputs: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _set_optional_inputs + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ + \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ + \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ + \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ + \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ + \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ + \ The GCP project that runs the pipeline components.\n location: The\ + \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ + \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ + \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ + \ dataset when data source is Vertex dataset.\n model_display_name: The\ + \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ + \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ import collections\n from google.cloud import aiplatform\n from google.cloud\ + \ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ + \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ + \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ + \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ + \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ + \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ + \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ + \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ + \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ + \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ + \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ + \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ + \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ + \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ + \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ + \n" + image: python:3.7-slim + exec-string-not-empty: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _string_not_empty + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ + \ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ + \n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ + \ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ + \ \"\"\"\n return 'true' if value else 'false'\n\n" + image: python:3.7 + exec-tabular-stats-and-example-gen: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": + \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": + \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": + \\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": + \\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": + ", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": + ", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": + ", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", + \"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", + "\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", + "\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", + "\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", + "\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", + "\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", + "\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", + "\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", + "\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", + "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", + "\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", + \"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", + "\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", + "\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", + \"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", + \"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", + "\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", + "\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", + \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", + \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", + "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", + "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", + "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", + "\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", + "\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", + "\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", + "\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", + \"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", + \"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", + "{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", + "{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", + "\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", + "\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", + \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-write-bp-result-path: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 + exec-write-bp-result-path-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _write_bp_result_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ + \ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ + \ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ + \ job artifact.\n result: Tbe path to the file that contains Dataset\ + \ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ + \ 'tf_record_data_source': {\n 'file_patterns': [\n \ + \ f'{directory}/prediction.results-*',\n ],\n 'coder':\ + \ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ + \n" + image: python:3.7 +pipelineInfo: + description: The AutoML Tabular pipeline v1. + name: automl-tabular +root: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: exit-handler-1 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: exit-handler-1 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: exit-handler-1 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: exit-handler-1 + tasks: + automl-tabular-finalizer: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-finalizer + dependentTasks: + - exit-handler-1 + inputs: + parameters: + location: + componentInputParameter: location + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + taskInfo: + name: automl-tabular-finalizer + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + dependentTasks: + - set-optional-inputs + inputs: + parameters: + pipelinechannel--additional_experiments: + componentInputParameter: additional_experiments + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: distill_batch_predict_starting_replica_count + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: fast_testing + pipelinechannel--location: + componentInputParameter: location + pipelinechannel--model_description: + componentInputParameter: model_description + pipelinechannel--optimization_objective: + componentInputParameter: optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: optimization_objective_recall_value + pipelinechannel--predefined_split_key: + componentInputParameter: predefined_split_key + pipelinechannel--prediction_type: + componentInputParameter: prediction_type + pipelinechannel--project: + componentInputParameter: project + pipelinechannel--quantiles: + componentInputParameter: quantiles + pipelinechannel--root_dir: + componentInputParameter: root_dir + pipelinechannel--run_distillation: + componentInputParameter: run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: run_evaluation + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + taskOutputParameter: + outputParameterKey: data_source_bigquery_table_path + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + taskOutputParameter: + outputParameterKey: data_source_csv_filenames + producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-model_display_name: + taskOutputParameter: + outputParameterKey: model_display_name + producerTask: set-optional-inputs + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: stage_2_num_selected_trials + pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: + componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb + pipelinechannel--stats_and_example_gen_dataflow_machine_type: + componentInputParameter: stats_and_example_gen_dataflow_machine_type + pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: + componentInputParameter: stats_and_example_gen_dataflow_max_num_workers + pipelinechannel--stratified_split_key: + componentInputParameter: stratified_split_key + pipelinechannel--study_spec_parameters_override: + componentInputParameter: study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: target_column + pipelinechannel--test_fraction: + componentInputParameter: test_fraction + pipelinechannel--timestamp_split_key: + componentInputParameter: timestamp_split_key + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: train_budget_milli_node_hours + pipelinechannel--training_fraction: + componentInputParameter: training_fraction + pipelinechannel--transform_dataflow_disk_size_gb: + componentInputParameter: transform_dataflow_disk_size_gb + pipelinechannel--transform_dataflow_machine_type: + componentInputParameter: transform_dataflow_machine_type + pipelinechannel--transform_dataflow_max_num_workers: + componentInputParameter: transform_dataflow_max_num_workers + pipelinechannel--transformations: + componentInputParameter: transformations + pipelinechannel--validation_fraction: + componentInputParameter: validation_fraction + pipelinechannel--weight_column: + componentInputParameter: weight_column + taskInfo: + name: exit-handler-1 + set-optional-inputs: + cachingOptions: + enableCache: true + componentRef: + name: comp-set-optional-inputs + inputs: + artifacts: + vertex_dataset: + componentInputArtifact: vertex_dataset + parameters: + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + location: + componentInputParameter: location + model_display_name: + componentInputParameter: model_display_name + project: + componentInputParameter: project + taskInfo: + name: set-optional-inputs + inputDefinitions: + artifacts: + vertex_dataset: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The Vertex dataset artifact, + parameters: + additional_experiments: + description: Use this field to config private preview features. + isOptional: true + parameterType: STRUCT + cv_trainer_worker_pool_specs_override: + description: 'The dictionary for overriding stage + + cv trainer worker pool spec.' + isOptional: true + parameterType: LIST + data_source_bigquery_table_path: + defaultValue: '' + description: 'The BigQuery table path of format + + bq://bq_project.bq_dataset.bq_table' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'A string that represents a list of comma + + separated CSV filenames.' + isOptional: true + parameterType: STRING + dataflow_service_account: + defaultValue: '' + description: Custom service account to run dataflow jobs. + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork name, when empty + + the default subnetwork will be used. Example: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow workers use public IP + + addresses.' + isOptional: true + parameterType: BOOLEAN + disable_early_stopping: + defaultValue: false + description: If disable easly stopping. + isOptional: true + parameterType: BOOLEAN + distill_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'The prediction server machine type for + + batch predict component in the model distillation.' + isOptional: true + parameterType: STRING + distill_batch_predict_max_replica_count: + defaultValue: 25.0 + description: 'The max number of prediction server + + for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + distill_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'The initial number of + + prediction server for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is enabled, the + + model will fit a distribution that captures the uncertainty of a + + prediction. At inference time, the predictive distribution is used to make + + a point prediction that minimizes the optimization objective. For example, + + the mean of a predictive distribution is the point prediction that + + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + + distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: The KMS key name. + isOptional: true + parameterType: STRING + evaluation_batch_explain_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch explain components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_explain_max_replica_count: + defaultValue: 10.0 + description: 'The max number of prediction + + server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_explain_starting_replica_count: + defaultValue: 10.0 + description: 'The initial number of + + prediction server for batch explain components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_machine_type: + defaultValue: n1-highmem-8 + description: 'The prediction server machine type + + for batch predict components during evaluation.' + isOptional: true + parameterType: STRING + evaluation_batch_predict_max_replica_count: + defaultValue: 20.0 + description: 'The max number of prediction + + server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_batch_predict_starting_replica_count: + defaultValue: 20.0 + description: 'The initial number of + + prediction server for batch predict components during evaluation.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_disk_size_gb: + defaultValue: 50.0 + description: 'Dataflow worker''s disk size in GB for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The dataflow machine type for evaluation + + components.' + isOptional: true + parameterType: STRING + evaluation_dataflow_max_num_workers: + defaultValue: 100.0 + description: 'The max number of Dataflow workers for + + evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + evaluation_dataflow_starting_num_workers: + defaultValue: 10.0 + description: 'The initial number of Dataflow + + workers for evaluation components.' + isOptional: true + parameterType: NUMBER_INTEGER + export_additional_model_without_custom_ops: + defaultValue: false + description: 'Whether to export additional + + model without custom TensorFlow operators.' + isOptional: true + parameterType: BOOLEAN + fast_testing: + defaultValue: false + description: Internal flag used for presubmit tests. + isOptional: true + parameterType: BOOLEAN + location: + description: The GCP region that runs the pipeline components. + parameterType: STRING + model_description: + defaultValue: '' + description: The description name of the uploaded Vertex model, + isOptional: true + parameterType: STRING + model_display_name: + defaultValue: '' + description: The display name of the uploaded Vertex model, + isOptional: true + parameterType: STRING + optimization_objective: + description: 'For binary classification, "maximize-au-roc", + + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", + or + + "maximize-recall-at-precision". For multi class classification, + + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + + "minimize-rmsle".' + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when optimization_objective + + is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when optimization_objective is + + ''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + predefined_split_key: + defaultValue: '' + description: The predefined_split column name. + isOptional: true + parameterType: STRING + prediction_type: + description: 'The type of prediction the model is to produce. + + "classification" or "regression".' + parameterType: STRING + project: + description: The GCP project that runs the pipeline components. + parameterType: STRING + quantiles: + description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles + + are allowed of values between 0 and 1, exclusive. Represents the quantiles + + to use for that objective. Quantiles must be unique.' + isOptional: true + parameterType: LIST + root_dir: + description: The root GCS directory for the pipeline components. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: Whether to run evaluation steps during training. + isOptional: true + parameterType: BOOLEAN + stage_1_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 1. + isOptional: true + parameterType: NUMBER_INTEGER + stage_1_tuner_worker_pool_specs_override: + description: 'The dictionary for overriding + + stage 1 tuner worker pool spec.' + isOptional: true + parameterType: LIST + stage_1_tuning_result_artifact_uri: + defaultValue: '' + description: 'The stage 1 tuning result artifact GCS + + URI.' + isOptional: true + parameterType: STRING + stage_2_num_parallel_trials: + defaultValue: 35.0 + description: Number of parallel trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stage_2_num_selected_trials: + defaultValue: 5.0 + description: Number of selected trails for stage 2. + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in + + GB for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stats_and_example_gen_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for + + stats_and_example_gen component.' + isOptional: true + parameterType: STRING + stats_and_example_gen_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow + + workers for stats_and_example_gen component.' + isOptional: true + parameterType: NUMBER_INTEGER + stratified_split_key: + defaultValue: '' + description: The stratified_split column name. + isOptional: true + parameterType: STRING + study_spec_parameters_override: + description: The list for overriding study spec. + isOptional: true + parameterType: LIST + target_column: + description: The target column name. + parameterType: STRING + test_fraction: + defaultValue: -1.0 + description: float = The test fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + timestamp_split_key: + defaultValue: '' + description: The timestamp_split column name. + isOptional: true + parameterType: STRING + train_budget_milli_node_hours: + description: 'The train budget of creating this model, + + expressed in milli node hours i.e. 1,000 value in this field means 1 node + + hour.' + parameterType: NUMBER_DOUBLE + training_fraction: + defaultValue: -1.0 + description: The training fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + transform_dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'Dataflow worker''s disk size in GB for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transform_dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The dataflow machine type for transform + + component.' + isOptional: true + parameterType: STRING + transform_dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The max number of Dataflow workers for + + transform component.' + isOptional: true + parameterType: NUMBER_INTEGER + transformations: + description: 'The path to a GCS file containing the transformations to + + apply.' + parameterType: STRING + validation_fraction: + defaultValue: -1.0 + description: The validation fraction. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: The weight column name. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py new file mode 100644 index 0000000000..716d6f1ba4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -0,0 +1,166 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Cross Validation Trainer component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_cv_trainer( + project: str, + location: str, + root_dir: str, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + num_selected_trials: int, + transform_output: Input[Artifact], + metadata: Input[Artifact], + materialized_cv_splits: Input[Artifact], + tuning_result_input: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + execution_metrics: dsl.OutputPath(dict), + worker_pool_specs_override_json: Optional[list] = [], + num_selected_features: Optional[int] = 0, + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Tunes AutoML Tabular models and selects top trials using cross-validation. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + deadline_hours: Number of hours the cross-validation trainer should run. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of + features to learn in the NN models. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + materialized_cv_splits: The materialized cross-validation splits. + tuning_result_input: AutoML Tabular tuning result. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + tuning_result_output: The trained model and architectures. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + execution_metrics: Core metrics in dictionary of component execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-cv-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["l2l_cv_tuner", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + ( + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "--training_base_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--num_parallel_trial=' + ), + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + ( + '", "--valid_trials_completed_threshold=0.7",' + ' "--num_selected_trials=' + ), + num_selected_trials, + '", "--num_selected_features=', + num_selected_features, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--materialized_cv_splits=', + materialized_cv_splits.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--execution_metrics_path=', + execution_metrics, + ( + '", "--use_custom_job=true", "--use_json=true",' + ' "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json new file mode 100644 index 0000000000..5133d9cf2e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/deprecated/default_pipeline.json @@ -0,0 +1,7974 @@ +{ + "pipelineSpec": { + "components": { + "comp-automl-tabular-cv-trainer": { + "executorLabel": "exec-automl-tabular-cv-trainer", + "inputDefinitions": { + "artifacts": { + "materialized_cv_splits": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-ensemble": { + "executorLabel": "exec-automl-tabular-ensemble", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "warmup_data": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_architecture": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_without_custom_ops": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-ensemble-2": { + "executorLabel": "exec-automl-tabular-ensemble-2", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "tuning_result_input": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "warmup_data": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_architecture": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "model_without_custom_ops": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-finalizer": { + "executorLabel": "exec-automl-tabular-finalizer", + "inputDefinitions": { + "parameters": { + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-infra-validator": { + "executorLabel": "exec-automl-tabular-infra-validator", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-automl-tabular-infra-validator-2": { + "executorLabel": "exec-automl-tabular-infra-validator-2", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-automl-tabular-stage-1-tuner": { + "executorLabel": "exec-automl-tabular-stage-1-tuner", + "inputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "study_spec_parameters_override": { + "type": "STRING" + }, + "study_spec_parameters_override_json": { + "type": "STRING" + }, + "tune_feature_selection_rate": { + "type": "STRING" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-stage-1-tuner-2": { + "executorLabel": "exec-automl-tabular-stage-1-tuner-2", + "inputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "deadline_hours": { + "type": "DOUBLE" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "num_parallel_trials": { + "type": "INT" + }, + "num_selected_trials": { + "type": "INT" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "single_run_max_secs": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "study_spec_parameters_override": { + "type": "STRING" + }, + "study_spec_parameters_override_json": { + "type": "STRING" + }, + "tune_feature_selection_rate": { + "type": "STRING" + }, + "worker_pool_specs_override": { + "type": "STRING" + }, + "worker_pool_specs_override_json": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "tuning_result_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-transform": { + "executorLabel": "exec-automl-tabular-transform", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_test_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "training_schema_uri": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-automl-tabular-transform-2": { + "executorLabel": "exec-automl-tabular-transform-2", + "inputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "materialized_eval_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_test_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "materialized_train_split": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "training_schema_uri": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "transform_output": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bool-identity": { + "executorLabel": "exec-bool-identity", + "inputDefinitions": { + "parameters": { + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-bool-identity-2": { + "executorLabel": "exec-bool-identity-2", + "inputDefinitions": { + "parameters": { + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-condition-is-distill-4": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-5" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-5" + } + ] + } + } + }, + "tasks": { + "automl-tabular-ensemble-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-ensemble-2" + }, + "dependentTasks": [ + "automl-tabular-stage-1-tuner-2", + "automl-tabular-transform-2" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" + }, + "instance_baseline": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-instance_baseline" + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform-2" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-stage-1-tuner-2" + } + }, + "warmup_data": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" + } + }, + "parameters": { + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-ensemble-2" + } + }, + "automl-tabular-infra-validator-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-infra-validator-2" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2" + ], + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-infra-validator-2" + } + }, + "automl-tabular-stage-1-tuner-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-stage-1-tuner-2" + }, + "dependentTasks": [ + "automl-tabular-transform-2" + ], + "inputs": { + "artifacts": { + "materialized_eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform-2" + } + }, + "materialized_train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform-2" + } + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform-2" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" + }, + "disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "num_selected_trials": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "study_spec_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "study_spec_parameters_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "study_spec_parameters_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "tune_feature_selection_rate": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-stage-1-tuner-2" + } + }, + "automl-tabular-transform-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-transform-2" + }, + "dependentTasks": [ + "write-bp-result-path", + "write-bp-result-path-2" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-dataset_schema" + }, + "eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "result", + "producerTask": "write-bp-result-path-2" + } + }, + "metadata": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-metadata" + }, + "test_split": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-test_split" + }, + "train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "result", + "producerTask": "write-bp-result-path" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_use_public_ips": { + "runtimeValue": { + "constantValue": { + "stringValue": "true" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-transform-2" + } + }, + "condition-is-evaluation-5": { + "componentRef": { + "name": "comp-condition-is-evaluation-5" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2", + "model-upload-3" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--model-upload-3-model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-3" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "pipelineparam--bool-identity-2-Output": { + "componentInputParameter": "pipelineparam--bool-identity-2-Output" + }, + "pipelineparam--bool-identity-Output": { + "componentInputParameter": "pipelineparam--bool-identity-Output" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-is-evaluation-5" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" + } + }, + "model-batch-predict-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-2" + }, + "dependentTasks": [ + "model-upload-2", + "read-input-uri" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-2" + } + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "read-input-uri" + } + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-train-split" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-2" + } + }, + "model-batch-predict-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-3" + }, + "dependentTasks": [ + "model-upload-2", + "read-input-uri-2" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload-2" + } + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "read-input-uri-2" + } + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-eval-split" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-3" + } + }, + "model-upload-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload-2" + }, + "dependentTasks": [ + "set-model-can-skip-validation" + ], + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload-2" + } + }, + "model-upload-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload-3" + }, + "dependentTasks": [ + "automl-tabular-ensemble-2", + "automl-tabular-infra-validator-2" + ], + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble-2" + } + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble-2" + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload-3" + } + }, + "read-input-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-read-input-uri" + }, + "inputs": { + "artifacts": { + "split_uri": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-train_split" + } + } + }, + "taskInfo": { + "name": "read-input-uri" + } + }, + "read-input-uri-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-read-input-uri-2" + }, + "inputs": { + "artifacts": { + "split_uri": { + "componentInputArtifact": "pipelineparam--tabular-stats-and-example-gen-eval_split" + } + } + }, + "taskInfo": { + "name": "read-input-uri-2" + } + }, + "set-model-can-skip-validation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-set-model-can-skip-validation" + }, + "inputs": { + "artifacts": { + "model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + } + }, + "taskInfo": { + "name": "set-model-can-skip-validation" + } + }, + "write-bp-result-path": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-write-bp-result-path" + }, + "dependentTasks": [ + "model-batch-predict-2" + ], + "inputs": { + "artifacts": { + "bp_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-2" + } + } + } + }, + "taskInfo": { + "name": "write-bp-result-path" + } + }, + "write-bp-result-path-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-write-bp-result-path-2" + }, + "dependentTasks": [ + "model-batch-predict-3" + ], + "inputs": { + "artifacts": { + "bp_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-3" + } + } + } + }, + "taskInfo": { + "name": "write-bp-result-path-2" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--tabular-stats-and-example-gen-train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--disable_early_stopping": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--reduce_search_space_mode": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--transform_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-is-evaluation-3": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-2" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation" + } + ] + } + } + }, + "tasks": { + "model-batch-explanation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-explanation" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-explanation" + } + }, + "model-batch-predict": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict" + }, + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict" + } + }, + "model-evaluation": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation" + }, + "dependentTasks": [ + "model-batch-predict" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation" + } + }, + "model-evaluation-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-2" + }, + "dependentTasks": [ + "model-batch-explanation" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-explanation" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-2" + } + }, + "model-evaluation-import": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-import" + }, + "dependentTasks": [ + "model-evaluation", + "model-evaluation-2" + ], + "inputs": { + "artifacts": { + "explanation": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-2" + } + }, + "metrics": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation" + } + }, + "model": { + "componentInputArtifact": "pipelineparam--model-upload-model" + } + }, + "parameters": { + "dataset_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataset_paths": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "dataset_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + } + } + }, + "taskInfo": { + "name": "model-evaluation-import" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--model-upload-model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-is-evaluation-5": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-3" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "evaluation_metrics", + "producerSubtask": "model-evaluation-4" + } + ] + } + } + }, + "tasks": { + "model-batch-explanation-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-explanation-2" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-2-explanation_parameters" + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-explanation-2" + } + }, + "model-batch-predict-4": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-batch-predict-4" + }, + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model" + } + }, + "parameters": { + "accelerator_count": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "accelerator_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_destination_output_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "bigquery_source_input_uri": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "gcs_destination_output_uri_prefix": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "gcs_source_uris": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "generate_explanation": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "instances_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "tf-record" + } + } + }, + "job_display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "manual_batch_tuning_parameters_batch_size": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "model_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + } + } + }, + "taskInfo": { + "name": "model-batch-predict-4" + } + }, + "model-evaluation-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-3" + }, + "dependentTasks": [ + "model-batch-predict-4" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-predict-4" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "0" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-3" + } + }, + "model-evaluation-4": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-4" + }, + "dependentTasks": [ + "model-batch-explanation-2" + ], + "inputs": { + "artifacts": { + "batch_prediction_job": { + "taskOutputArtifact": { + "outputArtifactKey": "batchpredictionjob", + "producerTask": "model-batch-explanation-2" + } + } + }, + "parameters": { + "class_names": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "classification_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "dataflow_max_workers_num": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "dataflow_workers_num": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "example_weight_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "generate_feature_attribution": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "ground_truth_column": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "ground_truth_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "ground_truth_gcs_source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "key_columns": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "positive_classes": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "prediction_id_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_label_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_score_column": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "predictions_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "jsonl" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "model-evaluation-4" + } + }, + "model-evaluation-import-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-evaluation-import-2" + }, + "dependentTasks": [ + "model-evaluation-3", + "model-evaluation-4" + ], + "inputs": { + "artifacts": { + "explanation": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-4" + } + }, + "metrics": { + "taskOutputArtifact": { + "outputArtifactKey": "evaluation_metrics", + "producerTask": "model-evaluation-3" + } + }, + "model": { + "componentInputArtifact": "pipelineparam--model-upload-3-model" + } + }, + "parameters": { + "dataset_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataset_paths": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "dataset_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "problem_type": { + "componentInputParameter": "pipelineparam--prediction_type" + } + } + }, + "taskInfo": { + "name": "model-evaluation-import-2" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-2-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-2-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--model-upload-3-model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-2-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-condition-no-distill-2": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-3" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "condition-is-evaluation-3" + } + ] + } + } + }, + "tasks": { + "condition-is-evaluation-3": { + "componentRef": { + "name": "comp-condition-is-evaluation-3" + }, + "dependentTasks": [ + "model-upload" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + }, + "pipelineparam--model-upload-model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "model-upload" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "pipelineparam--bool-identity-2-Output": { + "componentInputParameter": "pipelineparam--bool-identity-2-Output" + }, + "pipelineparam--bool-identity-Output": { + "componentInputParameter": "pipelineparam--bool-identity-Output" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "componentInputParameter": "pipelineparam--tabular-stats-and-example-gen-test_split_json" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-is-evaluation-3" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-Output'].string_value == 'true'" + } + }, + "model-upload": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-upload" + }, + "inputs": { + "artifacts": { + "explanation_metadata_artifact": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact" + }, + "unmanaged_container_model": { + "componentInputArtifact": "pipelineparam--automl-tabular-ensemble-unmanaged_container_model" + } + }, + "parameters": { + "description": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "display_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "automl-tabular-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" + } + } + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "explanation_metadata": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "explanation_parameters": { + "componentInputParameter": "pipelineparam--automl-tabular-ensemble-explanation_parameters" + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "model-upload" + } + } + } + }, + "inputDefinitions": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "type": "STRING" + }, + "pipelineparam--bool-identity-2-Output": { + "type": "STRING" + }, + "pipelineparam--bool-identity-Output": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "type": "STRING" + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-exit-handler-1": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "condition-no-distill-2" + } + ] + }, + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "condition-is-distill-4" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "condition-is-distill-4" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "condition-no-distill-2" + } + ] + } + } + }, + "tasks": { + "automl-tabular-cv-trainer": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-cv-trainer" + }, + "dependentTasks": [ + "automl-tabular-stage-1-tuner", + "automl-tabular-transform", + "merge-materialized-splits", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "materialized_cv_splits": { + "taskOutputArtifact": { + "outputArtifactKey": "splits", + "producerTask": "merge-materialized-splits" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-stage-1-tuner" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--stage_2_deadline_hours" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_2_num_parallel_trials" + }, + "num_selected_trials": { + "componentInputParameter": "pipelineparam--stage_2_num_selected_trials" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_2_single_run_max_secs" + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--cv_trainer_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-cv-trainer" + } + }, + "automl-tabular-ensemble": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-ensemble" + }, + "dependentTasks": [ + "automl-tabular-cv-trainer", + "automl-tabular-transform", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "instance_baseline": { + "taskOutputArtifact": { + "outputArtifactKey": "instance_baseline", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + }, + "tuning_result_input": { + "taskOutputArtifact": { + "outputArtifactKey": "tuning_result_output", + "producerTask": "automl-tabular-cv-trainer" + } + }, + "warmup_data": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-ensemble" + } + }, + "automl-tabular-infra-validator": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-infra-validator" + }, + "dependentTasks": [ + "automl-tabular-ensemble" + ], + "inputs": { + "artifacts": { + "unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-infra-validator" + } + }, + "automl-tabular-stage-1-tuner": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-stage-1-tuner" + }, + "dependentTasks": [ + "automl-tabular-transform", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "materialized_eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform" + } + }, + "materialized_train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "transform_output": { + "taskOutputArtifact": { + "outputArtifactKey": "transform_output", + "producerTask": "automl-tabular-transform" + } + } + }, + "parameters": { + "deadline_hours": { + "componentInputParameter": "pipelineparam--stage_1_deadline_hours" + }, + "disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "num_selected_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_selected_trials" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "study_spec_override": { + "componentInputParameter": "pipelineparam--study_spec_override" + }, + "study_spec_parameters_override": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + }, + "study_spec_parameters_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "tune_feature_selection_rate": { + "runtimeValue": { + "constantValue": { + "stringValue": "false" + } + } + }, + "worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "worker_pool_specs_override_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "automl-tabular-stage-1-tuner" + } + }, + "automl-tabular-transform": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-automl-tabular-transform" + }, + "dependentTasks": [ + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "test_split": { + "taskOutputArtifact": { + "outputArtifactKey": "test_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "train_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-transform" + } + }, + "bool-identity": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bool-identity" + }, + "inputs": { + "parameters": { + "value": { + "componentInputParameter": "pipelineparam--run_evaluation" + } + } + }, + "taskInfo": { + "name": "bool-identity" + } + }, + "bool-identity-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bool-identity-2" + }, + "inputs": { + "parameters": { + "value": { + "componentInputParameter": "pipelineparam--run_distillation" + } + } + }, + "taskInfo": { + "name": "bool-identity-2" + } + }, + "condition-is-distill-4": { + "componentRef": { + "name": "comp-condition-is-distill-4" + }, + "dependentTasks": [ + "automl-tabular-ensemble", + "automl-tabular-infra-validator", + "bool-identity", + "bool-identity-2", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--tabular-stats-and-example-gen-dataset_schema": { + "taskOutputArtifact": { + "outputArtifactKey": "dataset_schema", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-eval_split": { + "taskOutputArtifact": { + "outputArtifactKey": "eval_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-instance_baseline": { + "taskOutputArtifact": { + "outputArtifactKey": "instance_baseline", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-metadata": { + "taskOutputArtifact": { + "outputArtifactKey": "metadata", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split": { + "taskOutputArtifact": { + "outputArtifactKey": "test_split", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-train_split": { + "taskOutputArtifact": { + "outputArtifactKey": "train_split", + "producerTask": "tabular-stats-and-example-gen" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--bool-identity-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity-2" + } + }, + "pipelineparam--bool-identity-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity" + } + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--disable_early_stopping": { + "componentInputParameter": "pipelineparam--disable_early_stopping" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--distill_batch_predict_machine_type" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_max_replica_count" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--distill_batch_predict_starting_replica_count" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "componentInputParameter": "pipelineparam--distill_stage_1_deadline_hours" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "componentInputParameter": "pipelineparam--export_additional_model_without_custom_ops" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--reduce_search_space_mode": { + "componentInputParameter": "pipelineparam--reduce_search_space_mode" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "componentInputParameter": "pipelineparam--stage_1_num_parallel_trials" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "componentInputParameter": "pipelineparam--stage_1_single_run_max_secs" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "componentInputParameter": "pipelineparam--stage_1_tuner_worker_pool_specs_override" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "downsampled_test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--transform_dataflow_disk_size_gb" + }, + "pipelineparam--transform_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--transform_dataflow_machine_type" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--transform_dataflow_max_num_workers" + } + } + }, + "taskInfo": { + "name": "condition-is-distill-4" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'true'" + } + }, + "condition-no-distill-2": { + "componentRef": { + "name": "comp-condition-no-distill-2" + }, + "dependentTasks": [ + "automl-tabular-ensemble", + "automl-tabular-infra-validator", + "bool-identity", + "bool-identity-2", + "tabular-stats-and-example-gen" + ], + "inputs": { + "artifacts": { + "pipelineparam--automl-tabular-ensemble-explanation_metadata_artifact": { + "taskOutputArtifact": { + "outputArtifactKey": "explanation_metadata_artifact", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--automl-tabular-ensemble-unmanaged_container_model": { + "taskOutputArtifact": { + "outputArtifactKey": "unmanaged_container_model", + "producerTask": "automl-tabular-ensemble" + } + } + }, + "parameters": { + "pipelineparam--automl-tabular-ensemble-explanation_parameters": { + "taskOutputParameter": { + "outputParameterKey": "explanation_parameters", + "producerTask": "automl-tabular-ensemble" + } + }, + "pipelineparam--bool-identity-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity-2" + } + }, + "pipelineparam--bool-identity-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bool-identity" + } + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "pipelineparam--dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "pipelineparam--evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--evaluation_dataflow_max_num_workers" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "pipelineparam--tabular-stats-and-example-gen-downsampled_test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "downsampled_test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--tabular-stats-and-example-gen-test_split_json": { + "taskOutputParameter": { + "outputParameterKey": "test_split_json", + "producerTask": "tabular-stats-and-example-gen" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + } + } + }, + "taskInfo": { + "name": "condition-no-distill-2" + }, + "triggerPolicy": { + "condition": "inputs.parameters['pipelineparam--bool-identity-2-Output'].string_value == 'false'" + } + }, + "merge-materialized-splits": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-merge-materialized-splits" + }, + "dependentTasks": [ + "automl-tabular-transform" + ], + "inputs": { + "artifacts": { + "split_0": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_train_split", + "producerTask": "automl-tabular-transform" + } + }, + "split_1": { + "taskOutputArtifact": { + "outputArtifactKey": "materialized_eval_split", + "producerTask": "automl-tabular-transform" + } + } + } + }, + "taskInfo": { + "name": "merge-materialized-splits" + } + }, + "tabular-stats-and-example-gen": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-tabular-stats-and-example-gen" + }, + "inputs": { + "parameters": { + "additional_experiments": { + "componentInputParameter": "pipelineparam--additional_experiments" + }, + "additional_experiments_json": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "data_source": { + "componentInputParameter": "pipelineparam--data_source" + }, + "data_source_bigquery_table_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "data_source_csv_filenames": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_disk_size_gb": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb" + }, + "dataflow_machine_type": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_machine_type" + }, + "dataflow_max_num_workers": { + "componentInputParameter": "pipelineparam--stats_and_example_gen_dataflow_max_num_workers" + }, + "dataflow_service_account": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "dataflow_subnetwork": { + "componentInputParameter": "pipelineparam--dataflow_subnetwork" + }, + "dataflow_use_public_ips": { + "componentInputParameter": "pipelineparam--dataflow_use_public_ips" + }, + "encryption_spec_key_name": { + "componentInputParameter": "pipelineparam--encryption_spec_key_name" + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "optimization_objective": { + "componentInputParameter": "pipelineparam--optimization_objective" + }, + "optimization_objective_precision_value": { + "componentInputParameter": "pipelineparam--optimization_objective_precision_value" + }, + "optimization_objective_recall_value": { + "componentInputParameter": "pipelineparam--optimization_objective_recall_value" + }, + "predefined_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "prediction_type": { + "componentInputParameter": "pipelineparam--prediction_type" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "request_type": { + "runtimeValue": { + "constantValue": { + "stringValue": "COLUMN_STATS_ONLY" + } + } + }, + "root_dir": { + "componentInputParameter": "pipelineparam--root_dir" + }, + "run_distillation": { + "componentInputParameter": "pipelineparam--run_distillation" + }, + "split_spec": { + "componentInputParameter": "pipelineparam--split_spec" + }, + "stratified_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "test_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "timestamp_split_key": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "training_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "transformations": { + "componentInputParameter": "pipelineparam--transformations" + }, + "transformations_path": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "validation_fraction": { + "runtimeValue": { + "constantValue": { + "stringValue": "-1" + } + } + }, + "weight_column_name": { + "componentInputParameter": "pipelineparam--weight_column_name" + } + } + }, + "taskInfo": { + "name": "tabular-stats-and-example-gen" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "pipelineparam--additional_experiments": { + "type": "STRING" + }, + "pipelineparam--cv_trainer_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--data_source": { + "type": "STRING" + }, + "pipelineparam--dataflow_service_account": { + "type": "STRING" + }, + "pipelineparam--dataflow_subnetwork": { + "type": "STRING" + }, + "pipelineparam--dataflow_use_public_ips": { + "type": "STRING" + }, + "pipelineparam--disable_early_stopping": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--encryption_spec_key_name": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--optimization_objective": { + "type": "STRING" + }, + "pipelineparam--optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "pipelineparam--optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "pipelineparam--prediction_type": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--reduce_search_space_mode": { + "type": "STRING" + }, + "pipelineparam--root_dir": { + "type": "STRING" + }, + "pipelineparam--run_distillation": { + "type": "STRING" + }, + "pipelineparam--run_evaluation": { + "type": "STRING" + }, + "pipelineparam--split_spec": { + "type": "STRING" + }, + "pipelineparam--stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_num_selected_trials": { + "type": "INT" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "pipelineparam--stage_2_deadline_hours": { + "type": "DOUBLE" + }, + "pipelineparam--stage_2_num_parallel_trials": { + "type": "INT" + }, + "pipelineparam--stage_2_num_selected_trials": { + "type": "INT" + }, + "pipelineparam--stage_2_single_run_max_secs": { + "type": "INT" + }, + "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--stats_and_example_gen_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--study_spec_override": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "pipelineparam--transform_dataflow_machine_type": { + "type": "STRING" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "type": "INT" + }, + "pipelineparam--transformations": { + "type": "STRING" + }, + "pipelineparam--weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-merge-materialized-splits": { + "executorLabel": "exec-merge-materialized-splits", + "inputDefinitions": { + "artifacts": { + "split_0": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "split_1": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "splits": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-model-batch-explanation": { + "executorLabel": "exec-model-batch-explanation", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-explanation-2": { + "executorLabel": "exec-model-batch-explanation-2", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict": { + "executorLabel": "exec-model-batch-predict", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-2": { + "executorLabel": "exec-model-batch-predict-2", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-3": { + "executorLabel": "exec-model-batch-predict-3", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-batch-predict-4": { + "executorLabel": "exec-model-batch-predict-4", + "inputDefinitions": { + "artifacts": { + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "accelerator_count": { + "type": "INT" + }, + "accelerator_type": { + "type": "STRING" + }, + "bigquery_destination_output_uri": { + "type": "STRING" + }, + "bigquery_source_input_uri": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "gcs_destination_output_uri_prefix": { + "type": "STRING" + }, + "gcs_source_uris": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "instances_format": { + "type": "STRING" + }, + "job_display_name": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "machine_type": { + "type": "STRING" + }, + "manual_batch_tuning_parameters_batch_size": { + "type": "INT" + }, + "max_replica_count": { + "type": "INT" + }, + "model_parameters": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "starting_replica_count": { + "type": "INT" + } + } + }, + "outputDefinitions": { + "artifacts": { + "batchpredictionjob": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + }, + "bigquery_output_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + }, + "gcs_output_directory": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation": { + "executorLabel": "exec-model-evaluation", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-2": { + "executorLabel": "exec-model-evaluation-2", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-3": { + "executorLabel": "exec-model-evaluation-3", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-4": { + "executorLabel": "exec-model-evaluation-4", + "inputDefinitions": { + "artifacts": { + "batch_prediction_job": { + "artifactType": { + "schemaTitle": "google.VertexBatchPredictionJob", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "class_names": { + "type": "STRING" + }, + "classification_type": { + "type": "STRING" + }, + "dataflow_disk_size": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_workers_num": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "dataflow_workers_num": { + "type": "INT" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "example_weight_column": { + "type": "STRING" + }, + "generate_feature_attribution": { + "type": "STRING" + }, + "ground_truth_column": { + "type": "STRING" + }, + "ground_truth_format": { + "type": "STRING" + }, + "ground_truth_gcs_source": { + "type": "STRING" + }, + "key_columns": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "positive_classes": { + "type": "STRING" + }, + "prediction_id_column": { + "type": "STRING" + }, + "prediction_label_column": { + "type": "STRING" + }, + "prediction_score_column": { + "type": "STRING" + }, + "predictions_format": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-import": { + "executorLabel": "exec-model-evaluation-import", + "inputDefinitions": { + "artifacts": { + "explanation": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataset_path": { + "type": "STRING" + }, + "dataset_paths": { + "type": "STRING" + }, + "dataset_type": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-evaluation-import-2": { + "executorLabel": "exec-model-evaluation-import-2", + "inputDefinitions": { + "artifacts": { + "explanation": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "dataset_path": { + "type": "STRING" + }, + "dataset_paths": { + "type": "STRING" + }, + "dataset_type": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "problem_type": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload": { + "executorLabel": "exec-model-upload", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload-2": { + "executorLabel": "exec-model-upload-2", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-model-upload-3": { + "executorLabel": "exec-model-upload-3", + "inputDefinitions": { + "artifacts": { + "explanation_metadata_artifact": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "unmanaged_container_model": { + "artifactType": { + "schemaTitle": "google.UnmanagedContainerModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "description": { + "type": "STRING" + }, + "display_name": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "explanation_metadata": { + "type": "STRING" + }, + "explanation_parameters": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.VertexModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-read-input-uri": { + "executorLabel": "exec-read-input-uri", + "inputDefinitions": { + "artifacts": { + "split_uri": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-read-input-uri-2": { + "executorLabel": "exec-read-input-uri-2", + "inputDefinitions": { + "artifacts": { + "split_uri": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-set-model-can-skip-validation": { + "executorLabel": "exec-set-model-can-skip-validation", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-tabular-stats-and-example-gen": { + "executorLabel": "exec-tabular-stats-and-example-gen", + "inputDefinitions": { + "parameters": { + "additional_experiments": { + "type": "STRING" + }, + "additional_experiments_json": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "data_source_bigquery_table_path": { + "type": "STRING" + }, + "data_source_csv_filenames": { + "type": "STRING" + }, + "dataflow_disk_size_gb": { + "type": "INT" + }, + "dataflow_machine_type": { + "type": "STRING" + }, + "dataflow_max_num_workers": { + "type": "INT" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "optimization_objective": { + "type": "STRING" + }, + "optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "predefined_split_key": { + "type": "STRING" + }, + "prediction_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "request_type": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "split_spec": { + "type": "STRING" + }, + "stratified_split_key": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "test_fraction": { + "type": "DOUBLE" + }, + "timestamp_split_key": { + "type": "STRING" + }, + "training_fraction": { + "type": "DOUBLE" + }, + "transformations": { + "type": "STRING" + }, + "transformations_path": { + "type": "STRING" + }, + "validation_fraction": { + "type": "DOUBLE" + }, + "weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "dataset_schema": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "dataset_stats": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "eval_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "instance_baseline": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "metadata": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + }, + "test_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + }, + "train_split": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "downsampled_test_split_json": { + "type": "STRING" + }, + "gcp_resources": { + "type": "STRING" + }, + "test_split_json": { + "type": "STRING" + } + } + } + }, + "comp-write-bp-result-path": { + "executorLabel": "exec-write-bp-result-path", + "inputDefinitions": { + "artifacts": { + "bp_job": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "result": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "comp-write-bp-result-path-2": { + "executorLabel": "exec-write-bp-result-path-2", + "inputDefinitions": { + "artifacts": { + "bp_job": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "artifacts": { + "result": { + "artifactType": { + "schemaTitle": "system.Dataset", + "schemaVersion": "0.0.1" + } + } + } + } + } + }, + "deploymentSpec": { + "executors": { + "exec-automl-tabular-cv-trainer": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_cv_splits={{$.inputs.artifacts['materialized_cv_splits'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_custom_job=true\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-ensemble": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-ensemble-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"ensemble\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--export_custom_model={{$.inputs.parameters['export_additional_model_without_custom_ops']}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--tuning_result_input_path={{$.inputs.artifacts['tuning_result_input'].uri}}\", \"--instance_baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--warmup_data={{$.inputs.artifacts['warmup_data'].uri}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--model_path={{$.outputs.artifacts['model'].uri}}\", \"--custom_model_path={{$.outputs.artifacts['model_without_custom_ops'].uri}}\", \"--explanation_metadata_path={{$.outputs.parameters['explanation_metadata'].output_file}},{{$.outputs.artifacts['explanation_metadata_artifact'].uri}}\", \"--explanation_parameters_path={{$.outputs.parameters['explanation_parameters'].output_file}}\", \"--model_architecture_path={{$.outputs.artifacts['model_architecture'].uri}}\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-finalizer": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-infra-validator": { + "container": { + "args": [ + "--executor_input", + "{{$}}" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", + "resources": { + "cpuLimit": 8.0, + "memoryLimit": 52.0 + } + } + }, + "exec-automl-tabular-infra-validator-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod", + "resources": { + "cpuLimit": 8.0, + "memoryLimit": 52.0 + } + } + }, + "exec-automl-tabular-stage-1-tuner": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-stage-1-tuner-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_docker_uri=us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"--disable_early_stopping={{$.inputs.parameters['disable_early_stopping']}}\", \"--tune_feature_selection_rate={{$.inputs.parameters['tune_feature_selection_rate']}}\", \"--reduce_search_space_mode={{$.inputs.parameters['reduce_search_space_mode']}}\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--study_spec_override={{$.inputs.parameters['study_spec_override']}}\", \"--worker_pool_specs_override={{$.inputs.parameters['worker_pool_specs_override']}}\", \"--num_parallel_trial={{$.inputs.parameters['num_parallel_trials']}}\", \"--single_run_max_secs={{$.inputs.parameters['single_run_max_secs']}}\", \"--deadline_hours={{$.inputs.parameters['deadline_hours']}}\", \"--num_selected_trials={{$.inputs.parameters['num_selected_trials']}}\", \"--lro_job_info={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/lro\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--materialized_train_split={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--tuning_result_output_path={{$.outputs.artifacts['tuning_result_output'].uri}}\", \"--kms_key_name={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-transform": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-automl-tabular-transform-2": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"transform\", \"--transform_output_artifact_path={{$.outputs.artifacts['transform_output'].uri}}\", \"--transform_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", \"--materialized_splits_output_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--dataset_schema_path={{$.inputs.artifacts['dataset_schema'].uri}}\", \"--train_split={{$.inputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.inputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.inputs.artifacts['test_split'].uri}}\", \"--materialized_train_split={{$.outputs.artifacts['materialized_train_split'].uri}}\", \"--materialized_eval_split={{$.outputs.artifacts['materialized_eval_split'].uri}}\", \"--materialized_test_split={{$.outputs.artifacts['materialized_test_split'].uri}}\", \"--training_schema_path={{$.outputs.artifacts['training_schema_uri'].uri}}\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-bool-identity": { + "container": { + "args": [ + "--value", + "{{$.inputs.parameters['value']}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bool-identity-2": { + "container": { + "args": [ + "--value", + "{{$.inputs.parameters['value']}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _bool_identity(value):\n \"\"\"Returns boolean value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\ndef _deserialize_bool(s) -> bool:\n from distutils.util import strtobool\n return strtobool(s) == 1\n\ndef _serialize_str(str_value: str) -> str:\n if not isinstance(str_value, str):\n raise TypeError('Value \"{}\" has type \"{}\" instead of str.'.format(\n str(str_value), str(type(str_value))))\n return str_value\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Bool identity', description='Returns boolean value.')\n_parser.add_argument(\"--value\", dest=\"value\", type=_deserialize_bool, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _bool_identity(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_str,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-merge-materialized-splits": { + "container": { + "args": [ + "--split-0", + "{{$.inputs.artifacts['split_0'].path}}", + "--split-1", + "{{$.inputs.artifacts['split_1'].path}}", + "--splits", + "{{$.outputs.artifacts['splits'].path}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef _merge_materialized_splits(\n split_0,\n split_1,\n splits,\n):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The first materialized split.\n split_1: The second materialized split.\n splits: The merged materialized split.\n \"\"\"\n with open(split_0, 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r') as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n f.write(','.join([split_0_content, split_1_content]))\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Merge materialized splits', description='Merge two materialized splits.')\n_parser.add_argument(\"--split-0\", dest=\"split_0\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--split-1\", dest=\"split_1\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--splits\", dest=\"splits\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = _merge_materialized_splits(**_parsed_args)\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-model-batch-explanation": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-batch-explanation-2": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-batch-predict": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-2": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-3": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"model\": \"{{$.inputs.artifacts['model'].metadata['resourceName']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-batch-predict-4": { + "container": { + "args": [ + "--type", + "BatchPredictionJob", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['job_display_name']}}\", \"input_config\": {\"instances_format\": \"{{$.inputs.parameters['instances_format']}}\", \"gcs_source\": {\"uris\":{{$.inputs.parameters['gcs_source_uris']}}}, \"bigquery_source\": {\"input_uri\": \"{{$.inputs.parameters['bigquery_source_input_uri']}}\"}}, \"model_parameters\": {{$.inputs.parameters['model_parameters']}}, \"output_config\": {\"predictions_format\": \"{{$.inputs.parameters['predictions_format']}}\", \"gcs_destination\": {\"output_uri_prefix\": \"{{$.inputs.parameters['gcs_destination_output_uri_prefix']}}\"}, \"bigquery_destination\": {\"output_uri\": \"{{$.inputs.parameters['bigquery_destination_output_uri']}}\"}}, \"dedicated_resources\": {\"machine_spec\": {\"machine_type\": \"{{$.inputs.parameters['machine_type']}}\", \"accelerator_type\": \"{{$.inputs.parameters['accelerator_type']}}\", \"accelerator_count\": {{$.inputs.parameters['accelerator_count']}}}, \"starting_replica_count\": {{$.inputs.parameters['starting_replica_count']}}, \"max_replica_count\": {{$.inputs.parameters['max_replica_count']}}}, \"manual_batch_tuning_parameters\": {\"batch_size\": {{$.inputs.parameters['manual_batch_tuning_parameters_batch_size']}}}, \"generate_explanation\": {{$.inputs.parameters['generate_explanation']}}, \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"labels\": {{$.inputs.parameters['labels']}}, \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-evaluation": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-2": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-3": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-4": { + "container": { + "args": [ + "--setup_file", + "/setup.py", + "--json_mode", + "true", + "--project_id", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--batch_prediction_format", + "{{$.inputs.parameters['predictions_format']}}", + "--batch_prediction_gcs_source", + "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}", + "--ground_truth_format", + "{{$.inputs.parameters['ground_truth_format']}}", + "--ground_truth_gcs_source", + "{{$.inputs.parameters['ground_truth_gcs_source']}}", + "--key_prefix_in_prediction_dataset", + "instance", + "--key_columns", + "{{$.inputs.parameters['key_columns']}}", + "--root_dir", + "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--classification_type", + "{{$.inputs.parameters['classification_type']}}", + "--class_names", + "{{$.inputs.parameters['class_names']}}", + "--ground_truth_column", + "instance.{{$.inputs.parameters['ground_truth_column']}}", + "--prediction_score_column", + "{{$.inputs.parameters['prediction_score_column']}}", + "--prediction_label_column", + "{{$.inputs.parameters['prediction_label_column']}}", + "--prediction_id_column", + "{{$.inputs.parameters['prediction_id_column']}}", + "--example_weight_column", + "{{$.inputs.parameters['example_weight_column']}}", + "--positive_classes", + "{{$.inputs.parameters['positive_classes']}}", + "--generate_feature_attribution", + "{{$.inputs.parameters['generate_feature_attribution']}}", + "--dataflow_job_prefix", + "evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "--dataflow_service_account", + "{{$.inputs.parameters['dataflow_service_account']}}", + "--dataflow_disk_size", + "{{$.inputs.parameters['dataflow_disk_size']}}", + "--dataflow_machine_type", + "{{$.inputs.parameters['dataflow_machine_type']}}", + "--dataflow_workers_num", + "{{$.inputs.parameters['dataflow_workers_num']}}", + "--dataflow_max_workers_num", + "{{$.inputs.parameters['dataflow_max_workers_num']}}", + "--dataflow_subnetwork", + "{{$.inputs.parameters['dataflow_subnetwork']}}", + "--dataflow_use_public_ips", + "{{$.inputs.parameters['dataflow_use_public_ips']}}", + "--kms_key_name", + "{{$.inputs.parameters['encryption_spec_key_name']}}", + "--output_metrics_gcs_path", + "{{$.outputs.artifacts['evaluation_metrics'].uri}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python", + "/main.py" + ], + "image": "gcr.io/ml-pipeline/model-evaluation:v0.4" + } + }, + "exec-model-evaluation-import": { + "container": { + "args": [ + "--metrics", + "{{$.inputs.artifacts['metrics'].uri}}", + "--metrics_explanation", + "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", + "--explanation", + "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--display_name", + "{{$.inputs.parameters['display_name']}}", + "--dataset_path", + "{{$.inputs.parameters['dataset_path']}}", + "--dataset_paths", + "{{$.inputs.parameters['dataset_paths']}}", + "--dataset_type", + "{{$.inputs.parameters['dataset_type']}}", + "--pipeline_job_id", + "{{$.pipeline_job_uuid}}", + "--pipeline_job_resource_name", + "{{$.pipeline_job_resource_name}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['resourceName']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-evaluation-import-2": { + "container": { + "args": [ + "--metrics", + "{{$.inputs.artifacts['metrics'].uri}}", + "--metrics_explanation", + "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}", + "--explanation", + "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}", + "--problem_type", + "{{$.inputs.parameters['problem_type']}}", + "--display_name", + "{{$.inputs.parameters['display_name']}}", + "--dataset_path", + "{{$.inputs.parameters['dataset_path']}}", + "--dataset_paths", + "{{$.inputs.parameters['dataset_paths']}}", + "--dataset_type", + "{{$.inputs.parameters['dataset_type']}}", + "--pipeline_job_id", + "{{$.pipeline_job_uuid}}", + "--pipeline_job_resource_name", + "{{$.pipeline_job_resource_name}}", + "--model_name", + "{{$.inputs.artifacts['model'].metadata['resourceName']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-model-upload": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-upload-2": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-model-upload-3": { + "container": { + "args": [ + "--type", + "UploadModel", + "--payload", + "{\"display_name\": \"{{$.inputs.parameters['display_name']}}\", \"description\": \"{{$.inputs.parameters['description']}}\", \"explanation_spec\": {\"parameters\": {{$.inputs.parameters['explanation_parameters']}}, \"metadata\": {{$.inputs.parameters['explanation_metadata']}}}, \"explanation_metadata_artifact\": \"{{$.inputs.artifacts['explanation_metadata_artifact'].uri}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"labels\": {{$.inputs.parameters['labels']}}}", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "launcher" + ], + "image": "gcr.io/ml-pipeline/automl-tables-private:1.0.13" + } + }, + "exec-read-input-uri": { + "container": { + "args": [ + "--split-uri", + "{{$.inputs.artifacts['split_uri'].path}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-read-input-uri-2": { + "container": { + "args": [ + "--split-uri", + "{{$.inputs.artifacts['split_uri'].path}}", + "----output-paths", + "{{$.outputs.parameters['Output'].output_file}}" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", + "def _read_input_uri(split_uri):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n split_uri: Tbe path to the file that contains Dataset data.\n\n Returns:\n The list of string that represents the batch prediction input files.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n return data_source['tf_record_data_source']['file_patterns']\n\ndef _serialize_json(obj) -> str:\n if isinstance(obj, str):\n return obj\n import json\n\n def default_serializer(obj):\n if hasattr(obj, 'to_struct'):\n return obj.to_struct()\n else:\n raise TypeError(\n \"Object of type '%s' is not JSON serializable and does not have .to_struct() method.\"\n % obj.__class__.__name__)\n\n return json.dumps(obj, default=default_serializer, sort_keys=True)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Read input uri', description='Construct Dataset based on the batch prediction job.')\n_parser.add_argument(\"--split-uri\", dest=\"split_uri\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"----output-paths\", dest=\"_output_paths\", type=str, nargs=1)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = _read_input_uri(**_parsed_args)\n\n_outputs = [_outputs]\n\n_output_serializers = [\n _serialize_json,\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-set-model-can-skip-validation": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_set_model_can_skip_validation" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _set_model_can_skip_validation(model: Input[Artifact]):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n model: The model artifact.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import os\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n # create an empty CAN_SKIP_VALIDATION file\n with tf.io.gfile.GFile(os.path.join(model.uri, 'CAN_SKIP_VALIDATION'),\n 'w') as f:\n f.write('')\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + }, + "exec-tabular-stats-and-example-gen": { + "container": { + "args": [ + "--type", + "CustomJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--payload", + "{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"stats_generator\",\"--train_spec={\\\"prediction_type\\\": \\\"{{$.inputs.parameters['prediction_type']}}\\\", \\\"target_column\\\": \\\"{{$.inputs.parameters['target_column_name']}}\\\", \\\"optimization_objective\\\": \\\"{{$.inputs.parameters['optimization_objective']}}\\\", \\\"weight_column_name\\\": \\\"{{$.inputs.parameters['weight_column_name']}}\\\", \\\"transformations\\\": {{$.inputs.parameters['transformations']}}}\", \"--transformations_override_path={{$.inputs.parameters['transformations_path']}}\", \"--split_spec={{$.inputs.parameters['split_spec']}}\", \"--data_source={{$.inputs.parameters['data_source']}}\", \"--data_source_csv_filenames={{$.inputs.parameters['data_source_csv_filenames']}}\", \"--data_source_bigquery_table_path={{$.inputs.parameters['data_source_bigquery_table_path']}}\", \"--predefined_split_key={{$.inputs.parameters['predefined_split_key']}}\", \"--timestamp_split_key={{$.inputs.parameters['timestamp_split_key']}}\", \"--stratified_split_key={{$.inputs.parameters['stratified_split_key']}}\", \"--training_fraction={{$.inputs.parameters['training_fraction']}}\", \"--validation_fraction={{$.inputs.parameters['validation_fraction']}}\", \"--test_fraction={{$.inputs.parameters['test_fraction']}}\", \"--target_column={{$.inputs.parameters['target_column_name']}}\", \"--request_type={{$.inputs.parameters['request_type']}}\", \"--optimization_objective_recall_value={{$.inputs.parameters['optimization_objective_recall_value']}}\", \"--optimization_objective_precision_value={{$.inputs.parameters['optimization_objective_precision_value']}}\", \"--example_gen_gcs_output_prefix={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", \"--dataset_stats_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", \"--stats_result_path={{$.outputs.artifacts['dataset_stats'].uri}}\", \"--dataset_schema_path={{$.outputs.artifacts['dataset_schema'].uri}}\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--dataflow_service_account={{$.inputs.parameters['dataflow_service_account']}}\", \"--is_distill={{$.inputs.parameters['run_distillation']}}\", \"--additional_experiments={{$.inputs.parameters['additional_experiments']}}\", \"--metadata_path={{$.outputs.artifacts['metadata'].uri}}\", \"--train_split={{$.outputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.outputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.outputs.artifacts['test_split'].uri}}\", \"--test_split_for_batch_prediction_component={{$.outputs.parameters['test_split_json'].output_file}}\", \"--downsampled_test_split_for_batch_prediction_component={{$.outputs.parameters['downsampled_test_split_json'].output_file}}\", \"--instance_baseline_path={{$.outputs.artifacts['instance_baseline'].uri}}\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.21" + } + }, + "exec-write-bp-result-path": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_write_bp_result_path" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + }, + "exec-write-bp-result-path-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "_write_bp_result_path" + ], + "command": [ + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef _write_bp_result_path(\n bp_job: Input[Artifact],\n result: OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based on the batch prediction job.\n\n Args:\n bp_job: The batch prediction job artifact.\n result: Tbe path to the file that contains Dataset data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n 'tf_record_data_source': {\n 'file_patterns': [f'{directory}/prediction.results-*',],\n 'coder': 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\n" + ], + "image": "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod" + } + } + } + }, + "pipelineInfo": { + "name": "automl-tabular-deprecated" + }, + "root": { + "dag": { + "outputs": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-2-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-3-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-3-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-4-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-4-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + }, + "model-evaluation-evaluation_metrics": { + "artifactSelectors": [ + { + "outputArtifactKey": "model-evaluation-evaluation_metrics", + "producerSubtask": "exit-handler-1" + } + ] + } + } + }, + "tasks": { + "automl-tabular-finalizer": { + "componentRef": { + "name": "comp-automl-tabular-finalizer" + }, + "dependentTasks": [ + "exit-handler-1" + ], + "inputs": { + "parameters": { + "encryption_spec_key_name": { + "runtimeValue": { + "constantValue": { + "stringValue": "" + } + } + }, + "location": { + "componentInputParameter": "location" + }, + "project": { + "componentInputParameter": "project" + }, + "root_dir": { + "componentInputParameter": "root_dir" + } + } + }, + "taskInfo": { + "name": "automl-tabular-finalizer" + }, + "triggerPolicy": { + "strategy": "ALL_UPSTREAM_TASKS_COMPLETED" + } + }, + "exit-handler-1": { + "componentRef": { + "name": "comp-exit-handler-1" + }, + "inputs": { + "parameters": { + "pipelineparam--additional_experiments": { + "componentInputParameter": "additional_experiments" + }, + "pipelineparam--cv_trainer_worker_pool_specs_override": { + "componentInputParameter": "cv_trainer_worker_pool_specs_override" + }, + "pipelineparam--data_source": { + "componentInputParameter": "data_source" + }, + "pipelineparam--dataflow_service_account": { + "componentInputParameter": "dataflow_service_account" + }, + "pipelineparam--dataflow_subnetwork": { + "componentInputParameter": "dataflow_subnetwork" + }, + "pipelineparam--dataflow_use_public_ips": { + "componentInputParameter": "dataflow_use_public_ips" + }, + "pipelineparam--disable_early_stopping": { + "componentInputParameter": "disable_early_stopping" + }, + "pipelineparam--distill_batch_predict_machine_type": { + "componentInputParameter": "distill_batch_predict_machine_type" + }, + "pipelineparam--distill_batch_predict_max_replica_count": { + "componentInputParameter": "distill_batch_predict_max_replica_count" + }, + "pipelineparam--distill_batch_predict_starting_replica_count": { + "componentInputParameter": "distill_batch_predict_starting_replica_count" + }, + "pipelineparam--distill_stage_1_deadline_hours": { + "componentInputParameter": "distill_stage_1_deadline_hours" + }, + "pipelineparam--encryption_spec_key_name": { + "componentInputParameter": "encryption_spec_key_name" + }, + "pipelineparam--evaluation_batch_predict_machine_type": { + "componentInputParameter": "evaluation_batch_predict_machine_type" + }, + "pipelineparam--evaluation_batch_predict_max_replica_count": { + "componentInputParameter": "evaluation_batch_predict_max_replica_count" + }, + "pipelineparam--evaluation_batch_predict_starting_replica_count": { + "componentInputParameter": "evaluation_batch_predict_starting_replica_count" + }, + "pipelineparam--evaluation_dataflow_disk_size_gb": { + "componentInputParameter": "evaluation_dataflow_disk_size_gb" + }, + "pipelineparam--evaluation_dataflow_machine_type": { + "componentInputParameter": "evaluation_dataflow_machine_type" + }, + "pipelineparam--evaluation_dataflow_max_num_workers": { + "componentInputParameter": "evaluation_dataflow_max_num_workers" + }, + "pipelineparam--export_additional_model_without_custom_ops": { + "componentInputParameter": "export_additional_model_without_custom_ops" + }, + "pipelineparam--location": { + "componentInputParameter": "location" + }, + "pipelineparam--optimization_objective": { + "componentInputParameter": "optimization_objective" + }, + "pipelineparam--optimization_objective_precision_value": { + "componentInputParameter": "optimization_objective_precision_value" + }, + "pipelineparam--optimization_objective_recall_value": { + "componentInputParameter": "optimization_objective_recall_value" + }, + "pipelineparam--prediction_type": { + "componentInputParameter": "prediction_type" + }, + "pipelineparam--project": { + "componentInputParameter": "project" + }, + "pipelineparam--reduce_search_space_mode": { + "componentInputParameter": "reduce_search_space_mode" + }, + "pipelineparam--root_dir": { + "componentInputParameter": "root_dir" + }, + "pipelineparam--run_distillation": { + "componentInputParameter": "run_distillation" + }, + "pipelineparam--run_evaluation": { + "componentInputParameter": "run_evaluation" + }, + "pipelineparam--split_spec": { + "componentInputParameter": "split_spec" + }, + "pipelineparam--stage_1_deadline_hours": { + "componentInputParameter": "stage_1_deadline_hours" + }, + "pipelineparam--stage_1_num_parallel_trials": { + "componentInputParameter": "stage_1_num_parallel_trials" + }, + "pipelineparam--stage_1_num_selected_trials": { + "componentInputParameter": "stage_1_num_selected_trials" + }, + "pipelineparam--stage_1_single_run_max_secs": { + "componentInputParameter": "stage_1_single_run_max_secs" + }, + "pipelineparam--stage_1_tuner_worker_pool_specs_override": { + "componentInputParameter": "stage_1_tuner_worker_pool_specs_override" + }, + "pipelineparam--stage_2_deadline_hours": { + "componentInputParameter": "stage_2_deadline_hours" + }, + "pipelineparam--stage_2_num_parallel_trials": { + "componentInputParameter": "stage_2_num_parallel_trials" + }, + "pipelineparam--stage_2_num_selected_trials": { + "componentInputParameter": "stage_2_num_selected_trials" + }, + "pipelineparam--stage_2_single_run_max_secs": { + "componentInputParameter": "stage_2_single_run_max_secs" + }, + "pipelineparam--stats_and_example_gen_dataflow_disk_size_gb": { + "componentInputParameter": "stats_and_example_gen_dataflow_disk_size_gb" + }, + "pipelineparam--stats_and_example_gen_dataflow_machine_type": { + "componentInputParameter": "stats_and_example_gen_dataflow_machine_type" + }, + "pipelineparam--stats_and_example_gen_dataflow_max_num_workers": { + "componentInputParameter": "stats_and_example_gen_dataflow_max_num_workers" + }, + "pipelineparam--study_spec_override": { + "componentInputParameter": "study_spec_override" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "target_column_name" + }, + "pipelineparam--transform_dataflow_disk_size_gb": { + "componentInputParameter": "transform_dataflow_disk_size_gb" + }, + "pipelineparam--transform_dataflow_machine_type": { + "componentInputParameter": "transform_dataflow_machine_type" + }, + "pipelineparam--transform_dataflow_max_num_workers": { + "componentInputParameter": "transform_dataflow_max_num_workers" + }, + "pipelineparam--transformations": { + "componentInputParameter": "transformations" + }, + "pipelineparam--weight_column_name": { + "componentInputParameter": "weight_column_name" + } + } + }, + "taskInfo": { + "name": "exit-handler-1" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "additional_experiments": { + "type": "STRING" + }, + "cv_trainer_worker_pool_specs_override": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "dataflow_service_account": { + "type": "STRING" + }, + "dataflow_subnetwork": { + "type": "STRING" + }, + "dataflow_use_public_ips": { + "type": "STRING" + }, + "disable_early_stopping": { + "type": "STRING" + }, + "distill_batch_predict_machine_type": { + "type": "STRING" + }, + "distill_batch_predict_max_replica_count": { + "type": "INT" + }, + "distill_batch_predict_starting_replica_count": { + "type": "INT" + }, + "distill_stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "encryption_spec_key_name": { + "type": "STRING" + }, + "evaluation_batch_predict_machine_type": { + "type": "STRING" + }, + "evaluation_batch_predict_max_replica_count": { + "type": "INT" + }, + "evaluation_batch_predict_starting_replica_count": { + "type": "INT" + }, + "evaluation_dataflow_disk_size_gb": { + "type": "INT" + }, + "evaluation_dataflow_machine_type": { + "type": "STRING" + }, + "evaluation_dataflow_max_num_workers": { + "type": "INT" + }, + "export_additional_model_without_custom_ops": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "optimization_objective": { + "type": "STRING" + }, + "optimization_objective_precision_value": { + "type": "DOUBLE" + }, + "optimization_objective_recall_value": { + "type": "DOUBLE" + }, + "prediction_type": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "reduce_search_space_mode": { + "type": "STRING" + }, + "root_dir": { + "type": "STRING" + }, + "run_distillation": { + "type": "STRING" + }, + "run_evaluation": { + "type": "STRING" + }, + "split_spec": { + "type": "STRING" + }, + "stage_1_deadline_hours": { + "type": "DOUBLE" + }, + "stage_1_num_parallel_trials": { + "type": "INT" + }, + "stage_1_num_selected_trials": { + "type": "INT" + }, + "stage_1_single_run_max_secs": { + "type": "INT" + }, + "stage_1_tuner_worker_pool_specs_override": { + "type": "STRING" + }, + "stage_2_deadline_hours": { + "type": "DOUBLE" + }, + "stage_2_num_parallel_trials": { + "type": "INT" + }, + "stage_2_num_selected_trials": { + "type": "INT" + }, + "stage_2_single_run_max_secs": { + "type": "INT" + }, + "stats_and_example_gen_dataflow_disk_size_gb": { + "type": "INT" + }, + "stats_and_example_gen_dataflow_machine_type": { + "type": "STRING" + }, + "stats_and_example_gen_dataflow_max_num_workers": { + "type": "INT" + }, + "study_spec_override": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "transform_dataflow_disk_size_gb": { + "type": "INT" + }, + "transform_dataflow_machine_type": { + "type": "STRING" + }, + "transform_dataflow_max_num_workers": { + "type": "INT" + }, + "transformations": { + "type": "STRING" + }, + "weight_column_name": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model-evaluation-2-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-3-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-4-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + }, + "model-evaluation-evaluation_metrics": { + "artifactType": { + "schemaTitle": "system.Metrics", + "schemaVersion": "0.0.1" + } + } + } + } + }, + "schemaVersion": "2.0.0", + "sdkVersion": "kfp-1.8.11" + }, + "runtimeConfig": { + "parameters": { + "additional_experiments": { + "stringValue": "" + }, + "cv_trainer_worker_pool_specs_override": { + "stringValue": "" + }, + "dataflow_service_account": { + "stringValue": "" + }, + "dataflow_subnetwork": { + "stringValue": "" + }, + "dataflow_use_public_ips": { + "stringValue": "True" + }, + "disable_early_stopping": { + "stringValue": "False" + }, + "distill_batch_predict_machine_type": { + "stringValue": "n1-standard-16" + }, + "distill_batch_predict_max_replica_count": { + "intValue": "25" + }, + "distill_batch_predict_starting_replica_count": { + "intValue": "25" + }, + "distill_stage_1_deadline_hours": { + "doubleValue": 1.0 + }, + "encryption_spec_key_name": { + "stringValue": "" + }, + "evaluation_batch_predict_machine_type": { + "stringValue": "n1-standard-16" + }, + "evaluation_batch_predict_max_replica_count": { + "intValue": "25" + }, + "evaluation_batch_predict_starting_replica_count": { + "intValue": "25" + }, + "evaluation_dataflow_disk_size_gb": { + "intValue": "50" + }, + "evaluation_dataflow_machine_type": { + "stringValue": "n1-standard-4" + }, + "evaluation_dataflow_max_num_workers": { + "intValue": "25" + }, + "export_additional_model_without_custom_ops": { + "stringValue": "False" + }, + "optimization_objective_precision_value": { + "doubleValue": -1.0 + }, + "optimization_objective_recall_value": { + "doubleValue": -1.0 + }, + "reduce_search_space_mode": { + "stringValue": "regular" + }, + "run_distillation": { + "stringValue": "False" + }, + "run_evaluation": { + "stringValue": "False" + }, + "stage_1_tuner_worker_pool_specs_override": { + "stringValue": "" + }, + "stats_and_example_gen_dataflow_disk_size_gb": { + "intValue": "40" + }, + "stats_and_example_gen_dataflow_machine_type": { + "stringValue": "n1-standard-16" + }, + "stats_and_example_gen_dataflow_max_num_workers": { + "intValue": "25" + }, + "study_spec_override": { + "stringValue": "" + }, + "transform_dataflow_disk_size_gb": { + "intValue": "40" + }, + "transform_dataflow_machine_type": { + "stringValue": "n1-standard-16" + }, + "transform_dataflow_max_num_workers": { + "intValue": "25" + }, + "weight_column_name": { + "stringValue": "" + } + } + } +} \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py new file mode 100644 index 0000000000..1afdbfa157 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -0,0 +1,167 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Ensemble component spec.""" + +from typing import Optional + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_ensemble( + project: str, + location: str, + root_dir: str, + transform_output: Input[Artifact], + metadata: Input[Artifact], + dataset_schema: Input[Artifact], + tuning_result_input: Input[Artifact], + instance_baseline: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + model_architecture: Output[Artifact], + model: Output[Artifact], + unmanaged_container_model: Output[UnmanagedContainerModel], + model_without_custom_ops: Output[Artifact], + explanation_metadata: dsl.OutputPath(dict), + explanation_metadata_artifact: Output[Artifact], + explanation_parameters: dsl.OutputPath(dict), + warmup_data: Optional[Input[Dataset]] = None, + encryption_spec_key_name: Optional[str] = '', + export_additional_model_without_custom_ops: Optional[bool] = False, +): + # fmt: off + """Ensembles AutoML Tabular models. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + transform_output: The transform output artifact. + metadata: The tabular example gen metadata. + dataset_schema: The schema of the dataset. + tuning_result_input: AutoML Tabular tuning + result. + instance_baseline: The instance baseline + used to calculate explanations. + warmup_data: The warm up data. Ensemble component will save the + warm up data together with the model artifact, used to warm up the model + when prediction server starts. + encryption_spec_key_name: Customer-managed encryption key. + export_additional_model_without_custom_ops: True if export + an additional model without custom TF operators to the + `model_without_custom_ops` output. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + model_architecture: The architecture of the output model. + model: The output model. + model_without_custom_ops: The output model without custom TF operators, this output will be empty unless `export_additional_model_without_custom_ops` is set. + model_uri: The URI of the output model. + instance_schema_uri: The URI of the instance schema. + prediction_schema_uri: The URI of the prediction schema. + explanation_metadata: The explanation metadata used by Vertex online and batch explanations. + explanation_metadata: The explanation parameters used by Vertex online and batch explanations. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["ensemble", "--transform_output_path=', + transform_output.uri, + '", "--model_output_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model",' + ' "--custom_model_output_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/custom_model",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--export_custom_model=' + ), + export_additional_model_without_custom_ops, + '", "--metadata_path=', + metadata.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + '", "--tuning_result_input_path=', + tuning_result_input.uri, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--warmup_data=', + warmup_data.uri, + '", "--prediction_docker_uri=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + '", "--model_path=', + model.uri, + '", "--custom_model_path=', + model_without_custom_ops.uri, + '", "--explanation_metadata_path=', + explanation_metadata, + ',', + explanation_metadata_artifact.uri, + '", "--explanation_parameters_path=', + explanation_parameters, + '", "--model_architecture_path=', + model_architecture.uri, + ( + '", "--use_json=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py new file mode 100644 index 0000000000..ea36d7d297 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -0,0 +1,88 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Pipeline Finalizer component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def automl_tabular_finalizer( + project: str, + location: str, + root_dir: str, + gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Finalizes AutoML Tabular pipelines. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-finalizer-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["cancel_l2l_tuner", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--cleanup_lro_job_infos=' + ), + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro"' + ']}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py new file mode 100644 index 0000000000..8fc6b00ec9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -0,0 +1,39 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Infra Validator component spec.""" + +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from kfp import dsl +from kfp.dsl import Input + + +@dsl.container_component +def automl_tabular_infra_validator( + unmanaged_container_model: Input[UnmanagedContainerModel], # pylint: disable=unused-argument +): + # fmt: off + """Validates the trained AutoML Tabular model is a valid model. + + Args: + unmanaged_container_model: google.UnmanagedContainerModel for model + to be validated. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + command=[], + args=['--executor_input', '{{$}}'], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py new file mode 100644 index 0000000000..29091ded20 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -0,0 +1,119 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Split Materialized Data component spec.""" + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def split_materialized_data( + materialized_data: Input[Dataset], + materialized_train_split: Output[Artifact], + materialized_eval_split: Output[Artifact], + materialized_test_split: Output[Artifact], +): + # fmt: off + """Splits materialized dataset into train, eval, and test data splits. + + The materialized dataset generated by the Feature Transform Engine consists of + all the splits + that were combined into the input transform dataset (i.e., train, eval, and + test splits). + This components splits the output materialized dataset into corresponding + materialized data splits + so that the splits can be used by down-stream training or evaluation + components. + + Args: + materialized_data: Materialized dataset output by the Feature + Transform Engine. + + Returns: + materialized_train_split: Path patern to materialized train split. + materialized_eval_split: Path patern to materialized eval split. + materialized_test_split: Path patern to materialized test split. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + command=[ + 'sh', + '-ec', + ( + 'program_path=$(mktemp -d)\nprintf "%s" "$0" >' + ' "$program_path/ephemeral_component.py"\npython3 -m' + ' kfp.components.executor_main ' + ' --component_module_path ' + ' "$program_path/ephemeral_component.py" ' + ' "$@"\n' + ), + ( + '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom' + ' typing import *\n\ndef _split_materialized_data(\n ' + ' materialized_data: Input[Dataset],\n ' + " materialized_train_split: OutputPath('MaterializedSplit'),\n " + " materialized_eval_split: OutputPath('MaterializedSplit'),\n " + " materialized_test_split: OutputPath('MaterializedSplit')):\n " + ' """Splits materialized_data into materialized_data test,' + ' train, and eval splits.\n\n Necessary adapter between FTE' + ' pipeline and trainer.\n\n Args:\n materialized_data:' + ' materialized_data dataset output by FTE.\n ' + ' materialized_train_split: Path patern to' + ' materialized_train_split.\n materialized_eval_split: Path' + ' patern to materialized_eval_split.\n ' + ' materialized_test_split: Path patern to' + ' materialized_test_split.\n """\n # pylint:' + ' disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n' + ' import json\n import tensorflow as tf\n # pylint:' + ' enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\n' + " with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n " + ' artifact_path = f.read()\n\n # needed to import tf because' + ' this is a path in gs://\n with' + " tf.io.gfile.GFile(artifact_path, 'r') as f:\n " + ' materialized_data_json = json.load(f)\n\n if' + " 'tf_record_data_source' in materialized_data_json:\n " + ' file_patterns =' + " materialized_data_json['tf_record_data_source'][\n " + " 'file_patterns']\n elif 'avro_data_source' in" + ' materialized_data_json:\n file_patterns =' + " materialized_data_json['avro_data_source'][\n " + " 'file_patterns']\n elif 'parquet_data_source' in" + ' materialized_data_json:\n file_patterns =' + " materialized_data_json['parquet_data_source'][\n " + " 'file_patterns']\n else:\n raise ValueError(f'Unsupported" + " training data source: {materialized_data_json}')\n\n # we map" + ' indices to file patterns based on the ordering of insertion' + ' order\n # in our transform_data (see above in' + ' _generate_analyze_and_transform_data)\n with' + " tf.io.gfile.GFile(materialized_train_split, 'w') as f:\n " + ' f.write(file_patterns[0])\n\n with' + " tf.io.gfile.GFile(materialized_eval_split, 'w') as f:\n " + ' f.write(file_patterns[1])\n\n with' + " tf.io.gfile.GFile(materialized_test_split, 'w') as f:\n " + ' f.write(file_patterns[2])\n\n' + ), + ], + args=[ + '--executor_input', + '{{$}}', + '--function_to_execute', + '_split_materialized_data', + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py new file mode 100644 index 0000000000..095837620d --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -0,0 +1,189 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Tabular Stage 1 Tuner component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_stage_1_tuner( + project: str, + location: str, + root_dir: str, + num_selected_trials: int, + deadline_hours: float, + num_parallel_trials: int, + single_run_max_secs: int, + metadata: Input[Artifact], + transform_output: Input[Artifact], + materialized_train_split: Input[Artifact], + materialized_eval_split: Input[Artifact], + gcp_resources: dsl.OutputPath(str), + tuning_result_output: Output[Artifact], + execution_metrics: dsl.OutputPath(dict), + study_spec_parameters_override: Optional[list] = [], + worker_pool_specs_override_json: Optional[list] = [], + reduce_search_space_mode: Optional[str] = 'regular', + num_selected_features: Optional[int] = 0, + disable_early_stopping: Optional[bool] = False, + feature_ranking: Optional[Input[Artifact]] = None, + tune_feature_selection_rate: Optional[bool] = False, + encryption_spec_key_name: Optional[str] = '', + run_distillation: Optional[bool] = False, +): + # fmt: off + """Searches AutoML Tabular architectures and selects the top trials. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + study_spec_parameters_override: JSON study spec. E.g., + [{"parameter_id": "model_type","categorical_value_spec": {"values": + ["nn"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., + [{"machine_spec": {"machine_type": + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + "n1-standard-16"}}] + reduce_search_space_mode: The reduce search space mode. Possible + values: "regular" (default), "minimal", "full". + num_selected_trials: Number of selected trials. The number of weak + learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of + features to learn in the NN models. + deadline_hours: Number of hours the cross-validation trainer + should run. + disable_early_stopping: True if disable early stopping. Default + value is false. + num_parallel_trials: Number of parallel training trials. + single_run_max_secs: Max number of seconds each training trial runs. + metadata: The tabular example gen metadata. + transform_output: The transform output artifact. + materialized_train_split: The materialized train + split. + materialized_eval_split: The materialized eval split. + encryption_spec_key_name: Customer-managed encryption key. + run_distillation: True if in distillation mode. The default value + is false. + + Returns: + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + tuning_result_output: The trained model and architectures. + execution_metrics: Core metrics in dictionary of component execution. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-stage-1-tuner-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', + transform_output.uri, + '", "--training_docker_uri=', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "--feature_selection_result_path=', + feature_ranking.uri, + '", "--disable_early_stopping=', + disable_early_stopping, + '", "--tune_feature_selection_rate=', + tune_feature_selection_rate, + '", "--reduce_search_space_mode=', + reduce_search_space_mode, + ( + f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "--training_base_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/train",' + ' "--num_parallel_trial=' + ), + num_parallel_trials, + '", "--single_run_max_secs=', + single_run_max_secs, + '", "--deadline_hours=', + deadline_hours, + '", "--num_selected_trials=', + num_selected_trials, + '", "--num_selected_features=', + num_selected_features, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--error_file_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--is_distill=', + run_distillation, + '", "--tuning_result_output_path=', + tuning_result_output.uri, + '", "--kms_key_name=', + encryption_spec_key_name, + '", "--gcp_resources_path=', + gcp_resources, + '", "--execution_metrics_path=', + execution_metrics, + ( + '", "--use_json=true", "--log_level=ERROR",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py new file mode 100644 index 0000000000..6c7e915dbe --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -0,0 +1,304 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Stats and Example Generation component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Output + + +@dsl.container_component +def tabular_stats_and_example_gen( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + transformations: str, + dataset_schema: Output[Artifact], + dataset_stats: Output[Artifact], + train_split: Output[Dataset], + eval_split: Output[Dataset], + test_split: Output[Dataset], + test_split_json: dsl.OutputPath(list), + downsampled_test_split_json: dsl.OutputPath(list), + instance_baseline: Output[Artifact], + metadata: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + weight_column_name: Optional[str] = '', + optimization_objective: Optional[str] = '', + optimization_objective_recall_value: Optional[float] = -1, + optimization_objective_precision_value: Optional[float] = -1, + transformations_path: Optional[str] = '', + request_type: Optional[str] = 'COLUMN_STATS_ONLY', + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + run_distillation: Optional[bool] = False, + additional_experiments: Optional[str] = '', + additional_experiments_json: Optional[dict] = {}, + data_source_csv_filenames: Optional[str] = '', + data_source_bigquery_table_path: Optional[str] = '', + predefined_split_key: Optional[str] = '', + timestamp_split_key: Optional[str] = '', + stratified_split_key: Optional[str] = '', + training_fraction: Optional[float] = -1, + validation_fraction: Optional[float] = -1, + test_fraction: Optional[float] = -1, + quantiles: Optional[list] = [], + enable_probabilistic_inference: Optional[bool] = False, +): + # fmt: off + """Generates stats and training instances for tabular data. + + Args: + project: Project to run dataset statistics and example + generation. + location: Location for running dataset statistics and example + generation. + root_dir: The Cloud Storage location to store the output. + target_column_name: The target column name. + weight_column_name: The weight column name. + prediction_type: The prediction type. Supported values: + "classification", "regression". + optimization_objective: Objective function the model is optimizing + towards. The training process creates a model that maximizes/minimizes + the value of the objective function over the validation set. The + supported optimization objectives depend on the prediction type. If the + field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the + area under the receiver operating characteristic (ROC) curve. + "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - + Maximize the area under the precision-recall curve. + "maximize-precision-at-recall" - Maximize precision for a specified + recall value. "maximize-recall-at-precision" - Maximize recall for a + specified precision value. + classification (multi-class): "minimize-log-loss" (default) - Minimize + log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared + error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when + optimization_objective is "maximize-precision-at-recall". Must be + between 0 and 1, inclusive. + optimization_objective_precision_value: Required when + optimization_objective is "maximize-recall-at-precision". Must be + between 0 and 1, inclusive. + transformations: Quote escaped JSON string for transformations. Each + transformation will apply transform function to given input column. And + the result will be used for training. When creating transformation for + BigQuery Struct column, the column should be flattened using "." as the + delimiter. + transformations_path: Path to a GCS file containing JSON + string for transformations. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + run_distillation: True if in distillation mode. The default value + is false. + + Returns: + dataset_schema: The schema of the dataset. + dataset_stats: The stats of the dataset. + train_split: The train split. + eval_split: The eval split. + test_split: The test split. + test_split_json: The test split JSON object. + downsampled_test_split_json: The downsampled test split JSON object. + instance_baseline: The instance baseline used to calculate explanations. + metadata: The tabular example gen metadata. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + '", "args": ["stats_generator",', + '"--train_spec={\\"prediction_type\\": \\"', + prediction_type, + '\\", \\"target_column\\": \\"', + target_column_name, + '\\", \\"optimization_objective\\": \\"', + optimization_objective, + '\\", \\"weight_column_name\\": \\"', + weight_column_name, + '\\", \\"transformations\\": ', + transformations, + ', \\"quantiles\\": ', + quantiles, + ', \\"enable_probabilistic_inference\\": ', + enable_probabilistic_inference, + '}", "--transformations_override_path=', + transformations_path, + '", "--data_source_csv_filenames=', + data_source_csv_filenames, + '", "--data_source_bigquery_table_path=', + data_source_bigquery_table_path, + '", "--predefined_split_key=', + predefined_split_key, + '", "--timestamp_split_key=', + timestamp_split_key, + '", "--stratified_split_key=', + stratified_split_key, + '", "--training_fraction=', + training_fraction, + '", "--validation_fraction=', + validation_fraction, + '", "--test_fraction=', + test_fraction, + '", "--target_column=', + target_column_name, + '", "--request_type=', + request_type, + '", "--optimization_objective_recall_value=', + optimization_objective_recall_value, + '", "--optimization_objective_precision_value=', + optimization_objective_precision_value, + '", "--example_gen_gcs_output_prefix=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/example_gen_output",' + ' "--dataset_stats_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/stats/",' + ' "--stats_result_path=' + ), + dataset_stats.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + ( + f'", "--job_name=tabular-stats-and-example-gen-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--is_distill=', + run_distillation, + '", "--additional_experiments=', + additional_experiments, + '", "--metadata_path=', + metadata.uri, + '", "--train_split=', + train_split.uri, + '", "--eval_split=', + eval_split.uri, + '", "--test_split=', + test_split.uri, + '", "--test_split_for_batch_prediction_component=', + test_split_json, + ( + '", "--downsampled_test_split_for_batch_prediction_component=' + ), + downsampled_test_split_json, + '", "--instance_baseline_path=', + instance_baseline.uri, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--gcp_resources_path=' + ), + gcp_resources, + ( + '", "--parse_json=true",' + ' "--generate_additional_downsample_test_split=true",' + ' "--executor_input={{$.json_escape[1]}}"]}}]}}' + ), + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py new file mode 100644 index 0000000000..d4ff9c5473 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -0,0 +1,285 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Training Configurator and Validator component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def training_configurator_and_validator( + dataset_stats: Input[Artifact], + split_example_counts: str, + training_schema: Input[Artifact], + instance_schema: Input[Artifact], + metadata: Output[Artifact], + instance_baseline: Output[Artifact], + target_column: Optional[str] = '', + weight_column: Optional[str] = '', + prediction_type: Optional[str] = '', + optimization_objective: Optional[str] = '', + optimization_objective_recall_value: Optional[float] = -1, + optimization_objective_precision_value: Optional[float] = -1, + run_evaluation: Optional[bool] = False, + run_distill: Optional[bool] = False, + enable_probabilistic_inference: Optional[bool] = False, + time_series_identifier_column: Optional[str] = '', + time_column: Optional[str] = '', + time_series_attribute_columns: Optional[list] = [], + available_at_forecast_columns: Optional[list] = [], + unavailable_at_forecast_columns: Optional[list] = [], + quantiles: Optional[list] = [], + context_window: Optional[int] = -1, + forecast_horizon: Optional[int] = -1, + forecasting_model_type: Optional[str] = '', + forecasting_transformations: Optional[dict] = {}, + stage_1_deadline_hours: Optional[float] = None, + stage_2_deadline_hours: Optional[float] = None, + group_columns: Optional[list] = None, + group_total_weight: float = 0.0, + temporal_total_weight: float = 0.0, + group_temporal_total_weight: float = 0.0, +): + # fmt: off + """Configures training and validates data and user-input configurations. + + Args: + dataset_stats: Dataset stats generated by + feature transform engine. + split_example_counts: JSON string of data split example counts for + train, validate, and test splits. + training_schema_path: Schema of input data to the tf_model + at training time. + instance_schema: Schema of input data to the tf_model at + serving time. + target_column: Target column of input data. + weight_column: Weight column of input data. + prediction_type: Model prediction type. One of "classification", + "regression", "time_series". + optimization_objective: Objective function the model is optimizing + towards. The training process creates a model that maximizes/minimizes + the value of the objective function over the validation set. The + supported optimization objectives depend on the prediction type. If the + field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the + area under the receiver operating characteristic (ROC) curve. + "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - + Maximize the area under the precision-recall curve. + "maximize-precision-at-recall" - Maximize precision for a specified + recall value. "maximize-recall-at-precision" - Maximize recall for a + specified precision value. + classification (multi-class): "minimize-log-loss" (default) - Minimize + log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared + error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when + optimization_objective is "maximize-precision-at-recall". Must be + between 0 and 1, inclusive. + optimization_objective_precision_value: Required when + optimization_objective is "maximize-recall-at-precision". Must be + between 0 and 1, inclusive. + run_evaluation: Whether we are running evaluation in the training + pipeline. + run_distill: Whether the distillation should be applied to the + training. + enable_probabilistic_inference: If probabilistic inference is + enabled, the model will fit a distribution that captures the uncertainty + of a prediction. At inference time, the predictive distribution is used + to make a point prediction that minimizes the optimization objective. + For example, the mean of a predictive distribution is the point + prediction that minimizes RMSE loss. If quantiles are specified, then + the quantiles of the distribution are also returned. + time_series_identifier_column: Time series idenfier column. Used by + forecasting only. + time_column: The column that indicates the time. Used by forecasting + only. + time_series_attribute_columns: The column names of the time series + attributes. + available_at_forecast_columns: The names of the columns that are + available at forecast time. + unavailable_at_forecast_columns: The names of the columns that are + not available at forecast time. + quantiles: All quantiles that the model need to predict. + context_window: The length of the context window. + forecast_horizon: The length of the forecast horizon. + forecasting_model_type: The model types, e.g. l2l, seq2seq, tft. + forecasting_transformations: Dict mapping auto and/or type-resolutions to + feature columns. The supported types are auto, categorical, numeric, + text, and timestamp. + stage_1_deadline_hours: Stage 1 training budget in + hours. + stage_2_deadline_hours: Stage 2 training budget in + hours. + group_columns: A list of time series attribute column + names that define the time series hierarchy. + group_total_weight: The weight of the loss for + predictions aggregated over time series in the same group. + temporal_total_weight: The weight of the loss for + predictions aggregated over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for + predictions aggregated over both the horizon and time series in the same + hierarchy group. + + Returns: + metadata: The tabular example gen metadata. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + command=[], + args=[ + 'training_configurator_and_validator', + dsl.ConcatPlaceholder( + items=['--instance_schema_path=', instance_schema.uri] + ), + dsl.ConcatPlaceholder( + items=['--training_schema_path=', training_schema.uri] + ), + dsl.ConcatPlaceholder( + items=['--dataset_stats_path=', dataset_stats.uri] + ), + dsl.ConcatPlaceholder( + items=['--split_example_counts=', split_example_counts] + ), + dsl.ConcatPlaceholder(items=['--target_column=', target_column]), + dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), + dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), + dsl.ConcatPlaceholder( + items=['--optimization_objective=', optimization_objective] + ), + dsl.ConcatPlaceholder( + items=[ + '--optimization_objective_recall_value=', + optimization_objective_recall_value, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--optimization_objective_precision_value=', + optimization_objective_precision_value, + ] + ), + dsl.ConcatPlaceholder(items=['--metadata_path=', metadata.uri]), + dsl.ConcatPlaceholder( + items=['--instance_baseline_path=', instance_baseline.uri] + ), + dsl.ConcatPlaceholder(items=['--run_evaluation=', run_evaluation]), + dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), + dsl.ConcatPlaceholder( + items=[ + '--enable_probabilistic_inference=', + enable_probabilistic_inference, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--time_series_identifier_column=', + time_series_identifier_column, + ] + ), + dsl.ConcatPlaceholder(items=['--time_column=', time_column]), + dsl.ConcatPlaceholder( + items=[ + '--time_series_attribute_columns=', + time_series_attribute_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--available_at_forecast_columns=', + available_at_forecast_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--unavailable_at_forecast_columns=', + unavailable_at_forecast_columns, + ] + ), + dsl.IfPresentPlaceholder( + input_name='quantiles', + then=dsl.ConcatPlaceholder( + items=[ + '--quantiles=', + quantiles, + ] + ), + ), + dsl.ConcatPlaceholder(items=['--context_window=', context_window]), + dsl.ConcatPlaceholder( + items=['--forecast_horizon=', forecast_horizon] + ), + dsl.ConcatPlaceholder( + items=['--forecasting_model_type=', forecasting_model_type] + ), + dsl.ConcatPlaceholder( + items=[ + '--forecasting_transformations=', + forecasting_transformations, + ] + ), + dsl.IfPresentPlaceholder( + input_name='stage_1_deadline_hours', + then=dsl.ConcatPlaceholder( + items=[ + '--stage_1_deadline_hours=', + stage_1_deadline_hours, + ] + ), + ), + dsl.IfPresentPlaceholder( + input_name='stage_2_deadline_hours', + then=dsl.ConcatPlaceholder( + items=[ + '--stage_2_deadline_hours=', + stage_2_deadline_hours, + ] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_columns', + then=dsl.ConcatPlaceholder( + items=['--group_columns=', group_columns] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_total_weight', + then=dsl.ConcatPlaceholder( + items=['--group_total_weight=', group_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=['--temporal_total_weight=', temporal_total_weight] + ), + ), + dsl.IfPresentPlaceholder( + input_name='group_temporal_total_weight', + then=dsl.ConcatPlaceholder( + items=[ + '--group_temporal_total_weight=', + group_temporal_total_weight, + ] + ), + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py new file mode 100644 index 0000000000..c9ab7ef401 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -0,0 +1,200 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""AutoML Transform component spec.""" + +from typing import Optional + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.container_component +def automl_tabular_transform( + project: str, + location: str, + root_dir: str, + metadata: Input[Artifact], + dataset_schema: Input[Artifact], + train_split: Input[Dataset], + eval_split: Input[Dataset], + test_split: Input[Dataset], + materialized_train_split: Output[Artifact], + materialized_eval_split: Output[Artifact], + materialized_test_split: Output[Artifact], + training_schema_uri: Output[Artifact], + transform_output: Output[Artifact], + gcp_resources: dsl.OutputPath(str), + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', +): + # fmt: off + """Transforms raw features to engineered features. + + Args: + project: Project to run Cross-validation trainer. + location: Location for running the Cross-validation trainer. + root_dir: The Cloud Storage location to store the output. + metadata: The tabular example gen metadata. + dataset_schema: The schema of the dataset. + train_split: The train split. + eval_split: The eval split. + test_split: The test split. + dataflow_machine_type: The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account: Custom service account to run + dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. + + Returns: + materialized_train_split: The materialized train split. + materialized_eval_split: The materialized eval split. + materialized_eval_split: The materialized test split. + training_schema_uri: The training schema. + transform_output: The transform output artifact. + gcp_resources: GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ' "encryption_spec": {"kms_key_name":"' + ), + encryption_spec_key_name, + ( + '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + ( + '", "args": ["transform", "--is_mp=true",' + ' "--transform_output_artifact_path=' + ), + transform_output.uri, + '", "--transform_output_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform",' + ' "--materialized_splits_output_path=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform_materialized",' + ' "--metadata_path=' + ), + metadata.uri, + '", "--dataset_schema_path=', + dataset_schema.uri, + '", "--train_split=', + train_split.uri, + '", "--eval_split=', + eval_split.uri, + '", "--test_split=', + test_split.uri, + '", "--materialized_train_split=', + materialized_train_split.uri, + '", "--materialized_eval_split=', + materialized_eval_split.uri, + '", "--materialized_test_split=', + materialized_test_split.uri, + '", "--training_schema_path=', + training_schema_uri.uri, + ( + f'", "--job_name=automl-tabular-transform-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + '", "--dataflow_project=', + project, + '", "--error_file_path=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb",' + ' "--dataflow_staging_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging",' + ' "--dataflow_tmp_dir=' + ), + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp",' + ' "--dataflow_max_num_workers=' + ), + dataflow_max_num_workers, + '", "--dataflow_machine_type=', + dataflow_machine_type, + '", "--dataflow_worker_container_image=', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + '", "--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + '", "--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + '", "--dataflow_use_public_ips=', + dataflow_use_public_ips, + '", "--dataflow_kms_key=', + encryption_spec_key_name, + '", "--dataflow_service_account=', + dataflow_service_account, + '", "--lro_job_info=', + root_dir, + ( + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/lro",' + ' "--gcp_resources_path=' + ), + gcp_resources, + '"]}}]}}', + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py new file mode 100644 index 0000000000..2c19976e47 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py @@ -0,0 +1,1435 @@ +"""Util functions for AutoML Tabular pipeline.""" + +import json +import math +import os +import pathlib +from typing import Any, Dict, List, Optional, Tuple +import warnings + +_DEFAULT_NUM_PARALLEL_TRAILS = 35 +_DEFAULT_STAGE_2_NUM_SELECTED_TRAILS = 5 +_NUM_FOLDS = 5 +_DISTILL_TOTAL_TRIALS = 100 +_EVALUATION_BATCH_PREDICT_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT = 20 +_EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT = 20 +_EVALUATION_BATCH_EXPLAIN_MACHINE_TYPE = 'n1-highmem-8' +_EVALUATION_BATCH_EXPLAIN_STARTING_REPLICA_COUNT = 10 +_EVALUATION_BATCH_EXPLAIN_MAX_REPLICA_COUNT = 10 +_EVALUATION_DATAFLOW_MACHINE_TYPE = 'n1-standard-4' +_EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 +_EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 +_EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 + +# Needed because we reference the AutoML Tabular V2 pipeline. +_GCPC_STAGING_PATH = pathlib.Path( + __file__ +).parent.parent.parent.parent.resolve() +_GCPC_PREVIEW_TABULAR_PATH = ( + _GCPC_STAGING_PATH / 'preview' / 'automl' / 'tabular' +) + + +# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag +# to signify FTE usage instead of the presence of num_selected_features. +def _get_default_pipeline_params( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[float] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + max_selected_features: Optional[int] = None, + apply_feature_selection_tuning: bool = False, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Dict[str, Any]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + max_selected_features: number of features to select for training, + apply_feature_selection_tuning: tuning feature selection rate if true. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. If specified, + enable_probabilistic_inference and run_distillation cannot be enabled. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + if not study_spec_parameters_override: + study_spec_parameters_override = [] + if not stage_1_tuner_worker_pool_specs_override: + stage_1_tuner_worker_pool_specs_override = [] + if not cv_trainer_worker_pool_specs_override: + cv_trainer_worker_pool_specs_override = [] + if not quantiles: + quantiles = [] + + parameter_values = {} + parameters = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column': target_column, + 'prediction_type': prediction_type, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'predefined_split_key': predefined_split_key, + 'timestamp_split_key': timestamp_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'optimization_objective': optimization_objective, + 'train_budget_milli_node_hours': train_budget_milli_node_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'weight_column': weight_column, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_parameters_override': study_spec_parameters_override, + 'stage_1_tuner_worker_pool_specs_override': ( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': ( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'dataflow_service_account': dataflow_service_account, + 'encryption_spec_key_name': encryption_spec_key_name, + 'max_selected_features': max_selected_features, + 'stage_1_tuning_result_artifact_uri': stage_1_tuning_result_artifact_uri, + 'quantiles': quantiles, + 'enable_probabilistic_inference': enable_probabilistic_inference, + 'model_display_name': model_display_name, + 'model_description': model_description, + } + parameter_values.update( + {param: value for param, value in parameters.items() if value is not None} + ) + + if run_evaluation: + eval_parameters = { + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_batch_explain_machine_type': ( + evaluation_batch_explain_machine_type + ), + 'evaluation_batch_explain_starting_replica_count': ( + evaluation_batch_explain_starting_replica_count + ), + 'evaluation_batch_explain_max_replica_count': ( + evaluation_batch_explain_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_starting_num_workers': ( + evaluation_dataflow_starting_num_workers + ), + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + } + parameter_values.update( + { + param: value + for param, value in eval_parameters.items() + if value is not None + } + ) + + # V1 pipeline without FTE + if num_selected_features is None: + if not additional_experiments: + additional_experiments = {} + + parameters = { + 'transformations': transformations, + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'additional_experiments': additional_experiments, + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + if apply_feature_selection_tuning: + parameter_values.update({ + 'apply_feature_selection_tuning': apply_feature_selection_tuning, + }) + + if run_distillation: + distillation_parameters = { + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + } + parameter_values.update( + { + param: value + for param, value in distillation_parameters.items() + if value is not None + } + ) + + # V2 pipeline (with FTE) + else: + if run_distillation: + raise ValueError( + 'Distillation is currently not supported' + ' when num_selected_features is specified.' + ) + + parameters = { + 'num_selected_features': num_selected_features, + 'dataset_level_custom_transformation_definitions': [], + 'dataset_level_transformations': [], + 'tf_auto_transform_features': {}, + 'tf_custom_transformation_definitions': [], + 'legacy_transformations_path': transformations, + 'feature_transform_engine_dataflow_machine_type': ( + transform_dataflow_machine_type + ), + 'feature_transform_engine_dataflow_max_num_workers': ( + transform_dataflow_max_num_workers + ), + 'feature_transform_engine_dataflow_disk_size_gb': ( + transform_dataflow_disk_size_gb + ), + } + parameter_values.update( + { + param: value + for param, value in parameters.items() + if value is not None + } + ) + + return parameter_values + + +def get_automl_tabular_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: Optional[int] = None, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + study_spec_parameters_override: Optional[List[Dict[str, Any]]] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, + run_distillation: bool = False, + distill_batch_predict_machine_type: Optional[str] = None, + distill_batch_predict_starting_replica_count: Optional[int] = None, + distill_batch_predict_max_replica_count: Optional[int] = None, + stage_1_tuning_result_artifact_uri: Optional[str] = None, + quantiles: Optional[List[float]] = None, + enable_probabilistic_inference: bool = False, + num_selected_features: Optional[int] = None, + model_display_name: str = '', + model_description: str = '', +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular v1 default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The path to a GCS file containing the transformations to + apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + study_spec_parameters_override: The list for overriding study spec. The list + should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + quantiles: Quantiles to use for probabilistic inference. Up to 5 quantiles + are allowed of values between 0 and 1, exclusive. Represents the quantiles + to use for that objective. Quantiles must be unique. + enable_probabilistic_inference: If probabilistic inference is enabled, the + model will fit a distribution that captures the uncertainty of a + prediction. At inference time, the predictive distribution is used to make + a point prediction that minimizes the optimization objective. For example, + the mean of a predictive distribution is the point prediction that + minimizes RMSE loss. If quantiles are specified, then the quantiles of the + distribution are also returned. + num_selected_features: Number of selected features for feature selection, + defaults to None, in which case all features are used. + model_display_name: The display name of the uploaded Vertex model. + model_description: The description for the uploaded model. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + parameter_values = _get_default_pipeline_params( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=study_spec_parameters_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=run_distillation, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=quantiles, + enable_probabilistic_inference=enable_probabilistic_inference, + num_selected_features=num_selected_features, + model_display_name=model_display_name, + model_description=model_description, + ) + + # V1 pipeline without FTE + if num_selected_features is None: + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'automl_tabular_pipeline.yaml' + ) + + # V2 pipeline with FTE + else: + pipeline_definition_path = os.path.join( + _GCPC_PREVIEW_TABULAR_PATH, + 'automl_tabular_v2_pipeline.yaml', + ) + + return pipeline_definition_path, parameter_values + + +def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: + """Convert json input dict to encoded parameter string. + + This function is required due to the limitation on YAML component definition + that YAML definition does not have a keyword for apply quote escape, so the + JSON argument's quote must be manually escaped using this function. + + Args: + input_dict: The input json dictionary. + + Returns: + The encoded string used for parameter. + """ + if not input_dict: + return '' + out = json.dumps(json.dumps(input_dict)) + return out[1:-1] # remove the outside quotes, e.g., "foo" -> foo + + +def get_skip_evaluation_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips evaluation. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + return get_default_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column_name=target_column_name, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + split_spec=split_spec, + data_source=data_source, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + weight_column_name=weight_column_name, + study_spec_override=study_spec_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + run_evaluation=False, + run_distillation=False, + ) + + +def get_default_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: str = '', + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: str = _EVALUATION_BATCH_PREDICT_MACHINE_TYPE, + evaluation_batch_predict_starting_replica_count: int = _EVALUATION_BATCH_PREDICT_STARTING_REPLICA_COUNT, + evaluation_batch_predict_max_replica_count: int = _EVALUATION_BATCH_PREDICT_MAX_REPLICA_COUNT, + evaluation_dataflow_machine_type: str = _EVALUATION_DATAFLOW_MACHINE_TYPE, + evaluation_dataflow_max_num_workers: int = _EVALUATION_DATAFLOW_MAX_NUM_WORKERS, + evaluation_dataflow_disk_size_gb: int = _EVALUATION_DATAFLOW_DISK_SIZE_GB, + run_distillation: bool = False, + distill_batch_predict_machine_type: str = 'n1-standard-16', + distill_batch_predict_starting_replica_count: int = 25, + distill_batch_predict_max_replica_count: int = 25, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular default training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + run_distillation: Whether to run distill in the training pipeline. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'This method is deprecated,' + ' please use get_automl_tabular_pipeline_and_parameters instead.' + ) + + if stage_1_num_parallel_trials <= 0: + stage_1_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS + + if stage_2_num_parallel_trials <= 0: + stage_2_num_parallel_trials = _DEFAULT_NUM_PARALLEL_TRAILS + + hours = float(train_budget_milli_node_hours) / 1000.0 + multiplier = stage_1_num_parallel_trials * hours / 500.0 + stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0) + phase_2_rounds = int( + math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials + 0.5 + ) + if phase_2_rounds < 1: + phase_2_rounds = 1 + + # All of magic number "1.3" above is because the trial doesn't always finish + # in time_per_trial. 1.3 is an empirical safety margin here. + stage_1_deadline_secs = int( + hours * 3600.0 - 1.3 * stage_1_single_run_max_secs * phase_2_rounds + ) + + if stage_1_deadline_secs < hours * 3600.0 * 0.5: + stage_1_deadline_secs = int(hours * 3600.0 * 0.5) + # Phase 1 deadline is the same as phase 2 deadline in this case. Phase 2 + # can't finish in time after the deadline is cut, so adjust the time per + # trial to meet the deadline. + stage_1_single_run_max_secs = int( + stage_1_deadline_secs / (1.3 * phase_2_rounds) + ) + + reduce_search_space_mode = 'minimal' + if multiplier > 2: + reduce_search_space_mode = 'regular' + if multiplier > 4: + reduce_search_space_mode = 'full' + + # Stage 2 number of trials is stage_1_num_selected_trials * + # _NUM_FOLDS, which should be equal to phase_2_rounds * + # stage_2_num_parallel_trials. Use this information to calculate + # stage_1_num_selected_trials: + stage_1_num_selected_trials = int( + phase_2_rounds * stage_2_num_parallel_trials / _NUM_FOLDS + ) + stage_1_deadline_hours = stage_1_deadline_secs / 3600.0 + + stage_2_deadline_hours = hours - stage_1_deadline_hours + stage_2_single_run_max_secs = stage_1_single_run_max_secs + + parameter_values = { + 'project': project, + 'location': location, + 'root_dir': root_dir, + 'target_column_name': target_column_name, + 'prediction_type': prediction_type, + 'optimization_objective': optimization_objective, + 'transformations': input_dictionary_to_parameter(transformations), + 'split_spec': input_dictionary_to_parameter(split_spec), + 'data_source': input_dictionary_to_parameter(data_source), + 'stage_1_deadline_hours': stage_1_deadline_hours, + 'stage_1_num_parallel_trials': stage_1_num_parallel_trials, + 'stage_1_num_selected_trials': stage_1_num_selected_trials, + 'stage_1_single_run_max_secs': stage_1_single_run_max_secs, + 'reduce_search_space_mode': reduce_search_space_mode, + 'stage_2_deadline_hours': stage_2_deadline_hours, + 'stage_2_num_parallel_trials': stage_2_num_parallel_trials, + 'stage_2_num_selected_trials': stage_2_num_selected_trials, + 'stage_2_single_run_max_secs': stage_2_single_run_max_secs, + 'weight_column_name': weight_column_name, + 'optimization_objective_recall_value': ( + optimization_objective_recall_value + ), + 'optimization_objective_precision_value': ( + optimization_objective_precision_value + ), + 'study_spec_override': input_dictionary_to_parameter(study_spec_override), + 'stage_1_tuner_worker_pool_specs_override': input_dictionary_to_parameter( + stage_1_tuner_worker_pool_specs_override + ), + 'cv_trainer_worker_pool_specs_override': input_dictionary_to_parameter( + cv_trainer_worker_pool_specs_override + ), + 'export_additional_model_without_custom_ops': ( + export_additional_model_without_custom_ops + ), + 'stats_and_example_gen_dataflow_machine_type': ( + stats_and_example_gen_dataflow_machine_type + ), + 'stats_and_example_gen_dataflow_max_num_workers': ( + stats_and_example_gen_dataflow_max_num_workers + ), + 'stats_and_example_gen_dataflow_disk_size_gb': ( + stats_and_example_gen_dataflow_disk_size_gb + ), + 'transform_dataflow_machine_type': transform_dataflow_machine_type, + 'transform_dataflow_max_num_workers': transform_dataflow_max_num_workers, + 'transform_dataflow_disk_size_gb': transform_dataflow_disk_size_gb, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + } + if additional_experiments: + parameter_values.update( + { + 'additional_experiments': input_dictionary_to_parameter( + additional_experiments + ) + } + ) + if run_evaluation: + parameter_values.update({ + 'dataflow_service_account': dataflow_service_account, + 'evaluation_batch_predict_machine_type': ( + evaluation_batch_predict_machine_type + ), + 'evaluation_batch_predict_starting_replica_count': ( + evaluation_batch_predict_starting_replica_count + ), + 'evaluation_batch_predict_max_replica_count': ( + evaluation_batch_predict_max_replica_count + ), + 'evaluation_dataflow_machine_type': evaluation_dataflow_machine_type, + 'evaluation_dataflow_max_num_workers': ( + evaluation_dataflow_max_num_workers + ), + 'evaluation_dataflow_disk_size_gb': evaluation_dataflow_disk_size_gb, + 'run_evaluation': run_evaluation, + }) + if run_distillation: + # All of magic number "1.3" above is because the trial doesn't always finish + # in time_per_trial. 1.3 is an empirical safety margin here. + distill_stage_1_deadline_hours = ( + math.ceil( + float(_DISTILL_TOTAL_TRIALS) + / parameter_values['stage_1_num_parallel_trials'] + ) + * parameter_values['stage_1_single_run_max_secs'] + * 1.3 + / 3600.0 + ) + + parameter_values.update({ + 'distill_stage_1_deadline_hours': distill_stage_1_deadline_hours, + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + }) + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'deprecated/default_pipeline.json', + ) + return pipeline_definition_path, parameter_values + + +def get_skip_architecture_search_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + transformations: str, + train_budget_milli_node_hours: float, + stage_1_tuning_result_artifact_uri: str, + stage_2_num_parallel_trials: Optional[int] = None, + stage_2_num_selected_trials: Optional[int] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + predefined_split_key: Optional[str] = None, + timestamp_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + weight_column: Optional[str] = None, + optimization_objective_recall_value: Optional[float] = None, + optimization_objective_precision_value: Optional[float] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: Optional[str] = None, + stats_and_example_gen_dataflow_max_num_workers: Optional[int] = None, + stats_and_example_gen_dataflow_disk_size_gb: Optional[int] = None, + transform_dataflow_machine_type: Optional[str] = None, + transform_dataflow_max_num_workers: Optional[int] = None, + transform_dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: Optional[str] = None, + additional_experiments: Optional[Dict[str, Any]] = None, + dataflow_service_account: Optional[str] = None, + run_evaluation: bool = True, + evaluation_batch_predict_machine_type: Optional[str] = None, + evaluation_batch_predict_starting_replica_count: Optional[int] = None, + evaluation_batch_predict_max_replica_count: Optional[int] = None, + evaluation_batch_explain_machine_type: Optional[str] = None, + evaluation_batch_explain_starting_replica_count: Optional[int] = None, + evaluation_batch_explain_max_replica_count: Optional[int] = None, + evaluation_dataflow_machine_type: Optional[str] = None, + evaluation_dataflow_starting_num_workers: Optional[int] = None, + evaluation_dataflow_max_num_workers: Optional[int] = None, + evaluation_dataflow_disk_size_gb: Optional[int] = None, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that skips architecture search. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_tuning_result_artifact_uri: The stage 1 tuning result artifact GCS + URI. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + data_source_csv_filenames: The CSV data source. + data_source_bigquery_table_path: The BigQuery data source. + predefined_split_key: The predefined_split column name. + timestamp_split_key: The timestamp_split column name. + stratified_split_key: The stratified_split column name. + training_fraction: The training fraction. + validation_fraction: The validation fraction. + test_fraction: float = The test fraction. + weight_column: The weight column name. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + dataflow_service_account: Custom service account to run dataflow jobs. + run_evaluation: Whether to run evaluation in the training pipeline. + evaluation_batch_predict_machine_type: The prediction server machine type + for batch predict components during evaluation. + evaluation_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict components during evaluation. + evaluation_batch_predict_max_replica_count: The max number of prediction + server for batch predict components during evaluation. + evaluation_batch_explain_machine_type: The prediction server machine type + for batch explain components during evaluation. + evaluation_batch_explain_starting_replica_count: The initial number of + prediction server for batch explain components during evaluation. + evaluation_batch_explain_max_replica_count: The max number of prediction + server for batch explain components during evaluation. + evaluation_dataflow_machine_type: The dataflow machine type for evaluation + components. + evaluation_dataflow_starting_num_workers: The initial number of Dataflow + workers for evaluation components. + evaluation_dataflow_max_num_workers: The max number of Dataflow workers for + evaluation components. + evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + evaluation components. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + + return get_automl_tabular_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column=target_column, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=None, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + data_source_csv_filenames=data_source_csv_filenames, + data_source_bigquery_table_path=data_source_bigquery_table_path, + predefined_split_key=predefined_split_key, + timestamp_split_key=timestamp_split_key, + stratified_split_key=stratified_split_key, + training_fraction=training_fraction, + validation_fraction=validation_fraction, + test_fraction=test_fraction, + weight_column=weight_column, + study_spec_parameters_override=[], + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override={}, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + dataflow_service_account=dataflow_service_account, + run_evaluation=run_evaluation, + evaluation_batch_predict_machine_type=evaluation_batch_predict_machine_type, + evaluation_batch_predict_starting_replica_count=evaluation_batch_predict_starting_replica_count, + evaluation_batch_predict_max_replica_count=evaluation_batch_predict_max_replica_count, + evaluation_batch_explain_machine_type=evaluation_batch_explain_machine_type, + evaluation_batch_explain_starting_replica_count=evaluation_batch_explain_starting_replica_count, + evaluation_batch_explain_max_replica_count=evaluation_batch_explain_max_replica_count, + evaluation_dataflow_machine_type=evaluation_dataflow_machine_type, + evaluation_dataflow_starting_num_workers=evaluation_dataflow_starting_num_workers, + evaluation_dataflow_max_num_workers=evaluation_dataflow_max_num_workers, + evaluation_dataflow_disk_size_gb=evaluation_dataflow_disk_size_gb, + run_distillation=None, + distill_batch_predict_machine_type=None, + distill_batch_predict_starting_replica_count=None, + distill_batch_predict_max_replica_count=None, + stage_1_tuning_result_artifact_uri=stage_1_tuning_result_artifact_uri, + quantiles=[], + enable_probabilistic_inference=False, + ) + + +def get_distill_skip_evaluation_pipeline_and_parameters( + project: str, + location: str, + root_dir: str, + target_column_name: str, + prediction_type: str, + optimization_objective: str, + transformations: Dict[str, Any], + split_spec: Dict[str, Any], + data_source: Dict[str, Any], + train_budget_milli_node_hours: float, + stage_1_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_parallel_trials: int = _DEFAULT_NUM_PARALLEL_TRAILS, + stage_2_num_selected_trials: int = _DEFAULT_STAGE_2_NUM_SELECTED_TRAILS, + weight_column_name: str = '', + study_spec_override: Optional[Dict[str, Any]] = None, + optimization_objective_recall_value: float = -1, + optimization_objective_precision_value: float = -1, + stage_1_tuner_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + cv_trainer_worker_pool_specs_override: Optional[Dict[str, Any]] = None, + export_additional_model_without_custom_ops: bool = False, + stats_and_example_gen_dataflow_machine_type: str = 'n1-standard-16', + stats_and_example_gen_dataflow_max_num_workers: int = 25, + stats_and_example_gen_dataflow_disk_size_gb: int = 40, + transform_dataflow_machine_type: str = 'n1-standard-16', + transform_dataflow_max_num_workers: int = 25, + transform_dataflow_disk_size_gb: int = 40, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + additional_experiments: Optional[Dict[str, Any]] = None, + distill_batch_predict_machine_type: str = 'n1-standard-16', + distill_batch_predict_starting_replica_count: int = 25, + distill_batch_predict_max_replica_count: int = 25, +) -> Tuple[str, Dict[str, Any]]: + """Get the AutoML Tabular training pipeline that distill and skips evaluation. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + root_dir: The root GCS directory for the pipeline components. + target_column_name: The target column name. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + optimization_objective: For binary classification, "maximize-au-roc", + "minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", or + "maximize-recall-at-precision". For multi class classification, + "minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or + "minimize-rmsle". + transformations: The transformations to apply. + split_spec: The split spec. + data_source: The data source. + train_budget_milli_node_hours: The train budget of creating this model, + expressed in milli node hours i.e. 1,000 value in this field means 1 node + hour. + stage_1_num_parallel_trials: Number of parallel trails for stage 1. + stage_2_num_parallel_trials: Number of parallel trails for stage 2. + stage_2_num_selected_trials: Number of selected trials for stage 2. + weight_column_name: The weight column name. + study_spec_override: The dictionary for overriding study spec. The + dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/study.proto#L181. + optimization_objective_recall_value: Required when optimization_objective is + "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective + is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. + stage 1 tuner worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + cv_trainer_worker_pool_specs_override: The dictionary for overriding stage + cv trainer worker pool spec. The dictionary should be of format + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + export_additional_model_without_custom_ops: Whether to export additional + model without custom TensorFlow operators. + stats_and_example_gen_dataflow_machine_type: The dataflow machine type for + stats_and_example_gen component. + stats_and_example_gen_dataflow_max_num_workers: The max number of Dataflow + workers for stats_and_example_gen component. + stats_and_example_gen_dataflow_disk_size_gb: Dataflow worker's disk size in + GB for stats_and_example_gen component. + transform_dataflow_machine_type: The dataflow machine type for transform + component. + transform_dataflow_max_num_workers: The max number of Dataflow workers for + transform component. + transform_dataflow_disk_size_gb: Dataflow worker's disk size in GB for + transform component. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: The KMS key name. + additional_experiments: Use this field to config private preview features. + distill_batch_predict_machine_type: The prediction server machine type for + batch predict component in the model distillation. + distill_batch_predict_starting_replica_count: The initial number of + prediction server for batch predict component in the model distillation. + distill_batch_predict_max_replica_count: The max number of prediction server + for batch predict component in the model distillation. + + Returns: + Tuple of pipeline_definition_path and parameter_values. + """ + warnings.warn( + 'Depreciated. Please use get_automl_tabular_pipeline_and_parameters.' + ) + + return get_default_pipeline_and_parameters( + project=project, + location=location, + root_dir=root_dir, + target_column_name=target_column_name, + prediction_type=prediction_type, + optimization_objective=optimization_objective, + transformations=transformations, + split_spec=split_spec, + data_source=data_source, + train_budget_milli_node_hours=train_budget_milli_node_hours, + stage_1_num_parallel_trials=stage_1_num_parallel_trials, + stage_2_num_parallel_trials=stage_2_num_parallel_trials, + stage_2_num_selected_trials=stage_2_num_selected_trials, + weight_column_name=weight_column_name, + study_spec_override=study_spec_override, + optimization_objective_recall_value=optimization_objective_recall_value, + optimization_objective_precision_value=optimization_objective_precision_value, + stage_1_tuner_worker_pool_specs_override=stage_1_tuner_worker_pool_specs_override, + cv_trainer_worker_pool_specs_override=cv_trainer_worker_pool_specs_override, + export_additional_model_without_custom_ops=export_additional_model_without_custom_ops, + stats_and_example_gen_dataflow_machine_type=stats_and_example_gen_dataflow_machine_type, + stats_and_example_gen_dataflow_max_num_workers=stats_and_example_gen_dataflow_max_num_workers, + stats_and_example_gen_dataflow_disk_size_gb=stats_and_example_gen_dataflow_disk_size_gb, + transform_dataflow_machine_type=transform_dataflow_machine_type, + transform_dataflow_max_num_workers=transform_dataflow_max_num_workers, + transform_dataflow_disk_size_gb=transform_dataflow_disk_size_gb, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + additional_experiments=additional_experiments, + distill_batch_predict_machine_type=distill_batch_predict_machine_type, + distill_batch_predict_starting_replica_count=distill_batch_predict_starting_replica_count, + distill_batch_predict_max_replica_count=distill_batch_predict_max_replica_count, + run_evaluation=False, + run_distillation=True, + ) From 4f74148343397297bbb89aa7d24cedf7874fb70f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 22:55:06 +0000 Subject: [PATCH 020/253] chore(deps): bump fast-xml-parser and @aws-sdk/credential-providers in /frontend/server (#9685) Bumps [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) to 4.2.5 and updates ancestor dependency [@aws-sdk/credential-providers](https://github.com/aws/aws-sdk-js-v3/tree/HEAD/packages/credential-providers). These dependencies need to be updated together. Updates `fast-xml-parser` from 4.2.4 to 4.2.5 - [Release notes](https://github.com/NaturalIntelligence/fast-xml-parser/releases) - [Changelog](https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/CHANGELOG.md) - [Commits](https://github.com/NaturalIntelligence/fast-xml-parser/compare/v4.2.4...v4.2.5) Updates `@aws-sdk/credential-providers` from 3.348.0 to 3.359.0 - [Release notes](https://github.com/aws/aws-sdk-js-v3/releases) - [Changelog](https://github.com/aws/aws-sdk-js-v3/blob/main/packages/credential-providers/CHANGELOG.md) - [Commits](https://github.com/aws/aws-sdk-js-v3/commits/v3.359.0/packages/credential-providers) --- updated-dependencies: - dependency-name: fast-xml-parser dependency-type: indirect - dependency-name: "@aws-sdk/credential-providers" dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- frontend/server/package-lock.json | 812 +++++++++++++++--------------- frontend/server/package.json | 2 +- 2 files changed, 419 insertions(+), 395 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index fcddfb4b4d..07472df459 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -106,11 +106,11 @@ } }, "@aws-sdk/abort-controller": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/abort-controller/-/abort-controller-3.347.0.tgz", - "integrity": "sha512-P/2qE6ntYEmYG4Ez535nJWZbXqgbkJx8CMz7ChEuEg3Gp3dvVYEKg+iEUEvlqQ2U5dWP5J3ehw5po9t86IsVPQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/abort-controller/-/abort-controller-3.357.0.tgz", + "integrity": "sha512-nQYDJon87quPwt2JZJwUN2GFKJnvE5kWb6tZP4xb5biSGUKBqDQo06oYed7yokatCuCMouIXV462aN0fWODtOw==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -122,42 +122,42 @@ } }, "@aws-sdk/client-cognito-identity": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-cognito-identity/-/client-cognito-identity-3.348.0.tgz", - "integrity": "sha512-1fcJFUQTsAXjkaAn/kn9ty790uHbCpukkuqJ/0QNPFYaa6vu93xx7FnzOvRK4XvaojwZ/C+yxp0fNQ+GjXG0vg==", + "version": "3.359.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-cognito-identity/-/client-cognito-identity-3.359.0.tgz", + "integrity": "sha512-zb5hSVuyHOXFTjGiqzPhQ/F6Zg4oLffO/NmC3MyvufUzr8yZYmcQzxNU6Jv6WbVmP01OiU4KAozBLMS7URfgzg==", "requires": { "@aws-crypto/sha256-browser": "3.0.0", "@aws-crypto/sha256-js": "3.0.0", - "@aws-sdk/client-sts": "3.348.0", - "@aws-sdk/config-resolver": "3.347.0", - "@aws-sdk/credential-provider-node": "3.348.0", - "@aws-sdk/fetch-http-handler": "3.347.0", - "@aws-sdk/hash-node": "3.347.0", - "@aws-sdk/invalid-dependency": "3.347.0", - "@aws-sdk/middleware-content-length": "3.347.0", - "@aws-sdk/middleware-endpoint": "3.347.0", - "@aws-sdk/middleware-host-header": "3.347.0", - "@aws-sdk/middleware-logger": "3.347.0", - "@aws-sdk/middleware-recursion-detection": "3.347.0", - "@aws-sdk/middleware-retry": "3.347.0", - "@aws-sdk/middleware-serde": "3.347.0", - "@aws-sdk/middleware-signing": "3.347.0", - "@aws-sdk/middleware-stack": "3.347.0", - "@aws-sdk/middleware-user-agent": "3.347.0", - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/node-http-handler": "3.348.0", - "@aws-sdk/smithy-client": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/client-sts": "3.359.0", + "@aws-sdk/config-resolver": "3.357.0", + "@aws-sdk/credential-provider-node": "3.358.0", + "@aws-sdk/fetch-http-handler": "3.357.0", + "@aws-sdk/hash-node": "3.357.0", + "@aws-sdk/invalid-dependency": "3.357.0", + "@aws-sdk/middleware-content-length": "3.357.0", + "@aws-sdk/middleware-endpoint": "3.357.0", + "@aws-sdk/middleware-host-header": "3.357.0", + "@aws-sdk/middleware-logger": "3.357.0", + "@aws-sdk/middleware-recursion-detection": "3.357.0", + "@aws-sdk/middleware-retry": "3.357.0", + "@aws-sdk/middleware-serde": "3.357.0", + "@aws-sdk/middleware-signing": "3.357.0", + "@aws-sdk/middleware-stack": "3.357.0", + "@aws-sdk/middleware-user-agent": "3.357.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/node-http-handler": "3.357.0", + "@aws-sdk/smithy-client": "3.358.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", "@aws-sdk/util-base64": "3.310.0", "@aws-sdk/util-body-length-browser": "3.310.0", "@aws-sdk/util-body-length-node": "3.310.0", - "@aws-sdk/util-defaults-mode-browser": "3.347.0", - "@aws-sdk/util-defaults-mode-node": "3.347.0", - "@aws-sdk/util-endpoints": "3.347.0", - "@aws-sdk/util-retry": "3.347.0", - "@aws-sdk/util-user-agent-browser": "3.347.0", - "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-defaults-mode-browser": "3.358.0", + "@aws-sdk/util-defaults-mode-node": "3.358.0", + "@aws-sdk/util-endpoints": "3.357.0", + "@aws-sdk/util-retry": "3.357.0", + "@aws-sdk/util-user-agent-browser": "3.357.0", + "@aws-sdk/util-user-agent-node": "3.357.0", "@aws-sdk/util-utf8": "3.310.0", "@smithy/protocol-http": "^1.0.1", "@smithy/types": "^1.0.0", @@ -172,39 +172,39 @@ } }, "@aws-sdk/client-sso": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.348.0.tgz", - "integrity": "sha512-5S23gVKBl0fhZ96RD8LdPhMKeh8E5fmebyZxMNZuWliSXz++Q9ZCrwPwQbkks3duPOTcKKobs3IoqP82HoXMvQ==", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.358.0.tgz", + "integrity": "sha512-Kc9IsoPIHJfkjDuStyItwQAOpnxw/I9xfF3vvukeN9vkXcRiWeMDhEXACN4L1AYFlU9FHQSRdNwpYTIz7OrD2A==", "requires": { "@aws-crypto/sha256-browser": "3.0.0", "@aws-crypto/sha256-js": "3.0.0", - "@aws-sdk/config-resolver": "3.347.0", - "@aws-sdk/fetch-http-handler": "3.347.0", - "@aws-sdk/hash-node": "3.347.0", - "@aws-sdk/invalid-dependency": "3.347.0", - "@aws-sdk/middleware-content-length": "3.347.0", - "@aws-sdk/middleware-endpoint": "3.347.0", - "@aws-sdk/middleware-host-header": "3.347.0", - "@aws-sdk/middleware-logger": "3.347.0", - "@aws-sdk/middleware-recursion-detection": "3.347.0", - "@aws-sdk/middleware-retry": "3.347.0", - "@aws-sdk/middleware-serde": "3.347.0", - "@aws-sdk/middleware-stack": "3.347.0", - "@aws-sdk/middleware-user-agent": "3.347.0", - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/node-http-handler": "3.348.0", - "@aws-sdk/smithy-client": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/config-resolver": "3.357.0", + "@aws-sdk/fetch-http-handler": "3.357.0", + "@aws-sdk/hash-node": "3.357.0", + "@aws-sdk/invalid-dependency": "3.357.0", + "@aws-sdk/middleware-content-length": "3.357.0", + "@aws-sdk/middleware-endpoint": "3.357.0", + "@aws-sdk/middleware-host-header": "3.357.0", + "@aws-sdk/middleware-logger": "3.357.0", + "@aws-sdk/middleware-recursion-detection": "3.357.0", + "@aws-sdk/middleware-retry": "3.357.0", + "@aws-sdk/middleware-serde": "3.357.0", + "@aws-sdk/middleware-stack": "3.357.0", + "@aws-sdk/middleware-user-agent": "3.357.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/node-http-handler": "3.357.0", + "@aws-sdk/smithy-client": "3.358.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", "@aws-sdk/util-base64": "3.310.0", "@aws-sdk/util-body-length-browser": "3.310.0", "@aws-sdk/util-body-length-node": "3.310.0", - "@aws-sdk/util-defaults-mode-browser": "3.347.0", - "@aws-sdk/util-defaults-mode-node": "3.347.0", - "@aws-sdk/util-endpoints": "3.347.0", - "@aws-sdk/util-retry": "3.347.0", - "@aws-sdk/util-user-agent-browser": "3.347.0", - "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-defaults-mode-browser": "3.358.0", + "@aws-sdk/util-defaults-mode-node": "3.358.0", + "@aws-sdk/util-endpoints": "3.357.0", + "@aws-sdk/util-retry": "3.357.0", + "@aws-sdk/util-user-agent-browser": "3.357.0", + "@aws-sdk/util-user-agent-node": "3.357.0", "@aws-sdk/util-utf8": "3.310.0", "@smithy/protocol-http": "^1.0.1", "@smithy/types": "^1.0.0", @@ -219,39 +219,39 @@ } }, "@aws-sdk/client-sso-oidc": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.348.0.tgz", - "integrity": "sha512-tvHpcycx4EALvk38I9rAOdPeHvBDezqIB4lrE7AvnOJljlvCcdQ2gXa9GDrwrM7zuYBIZMBRE/njTMrCwoOdAA==", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.358.0.tgz", + "integrity": "sha512-Gy09fSlhJdGbr8rNNR8EdLaUynB1B34nw8kN1aFT4CdAnjFKxTainqG6Aq4vx64TbMDMhvMYWpNAluvq7UHVhw==", "requires": { "@aws-crypto/sha256-browser": "3.0.0", "@aws-crypto/sha256-js": "3.0.0", - "@aws-sdk/config-resolver": "3.347.0", - "@aws-sdk/fetch-http-handler": "3.347.0", - "@aws-sdk/hash-node": "3.347.0", - "@aws-sdk/invalid-dependency": "3.347.0", - "@aws-sdk/middleware-content-length": "3.347.0", - "@aws-sdk/middleware-endpoint": "3.347.0", - "@aws-sdk/middleware-host-header": "3.347.0", - "@aws-sdk/middleware-logger": "3.347.0", - "@aws-sdk/middleware-recursion-detection": "3.347.0", - "@aws-sdk/middleware-retry": "3.347.0", - "@aws-sdk/middleware-serde": "3.347.0", - "@aws-sdk/middleware-stack": "3.347.0", - "@aws-sdk/middleware-user-agent": "3.347.0", - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/node-http-handler": "3.348.0", - "@aws-sdk/smithy-client": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/config-resolver": "3.357.0", + "@aws-sdk/fetch-http-handler": "3.357.0", + "@aws-sdk/hash-node": "3.357.0", + "@aws-sdk/invalid-dependency": "3.357.0", + "@aws-sdk/middleware-content-length": "3.357.0", + "@aws-sdk/middleware-endpoint": "3.357.0", + "@aws-sdk/middleware-host-header": "3.357.0", + "@aws-sdk/middleware-logger": "3.357.0", + "@aws-sdk/middleware-recursion-detection": "3.357.0", + "@aws-sdk/middleware-retry": "3.357.0", + "@aws-sdk/middleware-serde": "3.357.0", + "@aws-sdk/middleware-stack": "3.357.0", + "@aws-sdk/middleware-user-agent": "3.357.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/node-http-handler": "3.357.0", + "@aws-sdk/smithy-client": "3.358.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", "@aws-sdk/util-base64": "3.310.0", "@aws-sdk/util-body-length-browser": "3.310.0", "@aws-sdk/util-body-length-node": "3.310.0", - "@aws-sdk/util-defaults-mode-browser": "3.347.0", - "@aws-sdk/util-defaults-mode-node": "3.347.0", - "@aws-sdk/util-endpoints": "3.347.0", - "@aws-sdk/util-retry": "3.347.0", - "@aws-sdk/util-user-agent-browser": "3.347.0", - "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-defaults-mode-browser": "3.358.0", + "@aws-sdk/util-defaults-mode-node": "3.358.0", + "@aws-sdk/util-endpoints": "3.357.0", + "@aws-sdk/util-retry": "3.357.0", + "@aws-sdk/util-user-agent-browser": "3.357.0", + "@aws-sdk/util-user-agent-node": "3.357.0", "@aws-sdk/util-utf8": "3.310.0", "@smithy/protocol-http": "^1.0.1", "@smithy/types": "^1.0.0", @@ -266,46 +266,46 @@ } }, "@aws-sdk/client-sts": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.348.0.tgz", - "integrity": "sha512-4iaQlWAOHMEF4xjR/FB/ws3aUjXjJHwbsIcqbdYAxsKijDYYTZYCPc/gM0NE1yi28qlNYNhMzHipe5xTYbU2Eg==", + "version": "3.359.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.359.0.tgz", + "integrity": "sha512-zpyui8hXvEUvq8MwzZsm51ni0intvPjtV8dgx10nVJnm605nqrLlAMGqQ1S/UxO7CVmhqWbh5dnGHEc//UJlsw==", "requires": { "@aws-crypto/sha256-browser": "3.0.0", "@aws-crypto/sha256-js": "3.0.0", - "@aws-sdk/config-resolver": "3.347.0", - "@aws-sdk/credential-provider-node": "3.348.0", - "@aws-sdk/fetch-http-handler": "3.347.0", - "@aws-sdk/hash-node": "3.347.0", - "@aws-sdk/invalid-dependency": "3.347.0", - "@aws-sdk/middleware-content-length": "3.347.0", - "@aws-sdk/middleware-endpoint": "3.347.0", - "@aws-sdk/middleware-host-header": "3.347.0", - "@aws-sdk/middleware-logger": "3.347.0", - "@aws-sdk/middleware-recursion-detection": "3.347.0", - "@aws-sdk/middleware-retry": "3.347.0", - "@aws-sdk/middleware-sdk-sts": "3.347.0", - "@aws-sdk/middleware-serde": "3.347.0", - "@aws-sdk/middleware-signing": "3.347.0", - "@aws-sdk/middleware-stack": "3.347.0", - "@aws-sdk/middleware-user-agent": "3.347.0", - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/node-http-handler": "3.348.0", - "@aws-sdk/smithy-client": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", + "@aws-sdk/config-resolver": "3.357.0", + "@aws-sdk/credential-provider-node": "3.358.0", + "@aws-sdk/fetch-http-handler": "3.357.0", + "@aws-sdk/hash-node": "3.357.0", + "@aws-sdk/invalid-dependency": "3.357.0", + "@aws-sdk/middleware-content-length": "3.357.0", + "@aws-sdk/middleware-endpoint": "3.357.0", + "@aws-sdk/middleware-host-header": "3.357.0", + "@aws-sdk/middleware-logger": "3.357.0", + "@aws-sdk/middleware-recursion-detection": "3.357.0", + "@aws-sdk/middleware-retry": "3.357.0", + "@aws-sdk/middleware-sdk-sts": "3.357.0", + "@aws-sdk/middleware-serde": "3.357.0", + "@aws-sdk/middleware-signing": "3.357.0", + "@aws-sdk/middleware-stack": "3.357.0", + "@aws-sdk/middleware-user-agent": "3.357.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/node-http-handler": "3.357.0", + "@aws-sdk/smithy-client": "3.358.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", "@aws-sdk/util-base64": "3.310.0", "@aws-sdk/util-body-length-browser": "3.310.0", "@aws-sdk/util-body-length-node": "3.310.0", - "@aws-sdk/util-defaults-mode-browser": "3.347.0", - "@aws-sdk/util-defaults-mode-node": "3.347.0", - "@aws-sdk/util-endpoints": "3.347.0", - "@aws-sdk/util-retry": "3.347.0", - "@aws-sdk/util-user-agent-browser": "3.347.0", - "@aws-sdk/util-user-agent-node": "3.347.0", + "@aws-sdk/util-defaults-mode-browser": "3.358.0", + "@aws-sdk/util-defaults-mode-node": "3.358.0", + "@aws-sdk/util-endpoints": "3.357.0", + "@aws-sdk/util-retry": "3.357.0", + "@aws-sdk/util-user-agent-browser": "3.357.0", + "@aws-sdk/util-user-agent-node": "3.357.0", "@aws-sdk/util-utf8": "3.310.0", "@smithy/protocol-http": "^1.0.1", "@smithy/types": "^1.0.0", - "fast-xml-parser": "4.2.4", + "fast-xml-parser": "4.2.5", "tslib": "^2.5.0" }, "dependencies": { @@ -317,13 +317,13 @@ } }, "@aws-sdk/config-resolver": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/config-resolver/-/config-resolver-3.347.0.tgz", - "integrity": "sha512-2ja+Sf/VnUO7IQ3nKbDQ5aumYKKJUaTm/BuVJ29wNho8wYHfuf7wHZV0pDTkB8RF5SH7IpHap7zpZAj39Iq+EA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/config-resolver/-/config-resolver-3.357.0.tgz", + "integrity": "sha512-cukfg0nX7Tzx/xFyH5F4Eyb8DA1ITCGtSQv4vnEjgUop+bkzckuGLKEeBcBhyZY+aw+2C9CVwIHwIMhRm0ul5w==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-config-provider": "3.310.0", - "@aws-sdk/util-middleware": "3.347.0", + "@aws-sdk/util-middleware": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -335,13 +335,13 @@ } }, "@aws-sdk/credential-provider-cognito-identity": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-cognito-identity/-/credential-provider-cognito-identity-3.348.0.tgz", - "integrity": "sha512-VQQVEP844mAwn5iEIzc/hBOuSzMGBL61sqEGqqgxhe6Sjnd8NfGNlOjV6fOxlUHhOelumqBMXgn6liIZbfcqFQ==", + "version": "3.359.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-cognito-identity/-/credential-provider-cognito-identity-3.359.0.tgz", + "integrity": "sha512-dSuHTucXcjIFsjdOq0HeSk0niWJ7V2hWnwyYh7MCwv43dP9u4V+11boLC6zIrw2Epx++JnIqhggKJAi6l/occw==", "requires": { - "@aws-sdk/client-cognito-identity": "3.348.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/client-cognito-identity": "3.359.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -353,12 +353,12 @@ } }, "@aws-sdk/credential-provider-env": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.347.0.tgz", - "integrity": "sha512-UnEM+LKGpXKzw/1WvYEQsC6Wj9PupYZdQOE+e2Dgy2dqk/pVFy4WueRtFXYDT2B41ppv3drdXUuKZRIDVqIgNQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.357.0.tgz", + "integrity": "sha512-UOecwfqvXgJVqhfWSZ2S44v2Nq2oceW0PQVQp0JAa9opc2rxSVIfyOhPr0yMoPmpyNcP22rgeg6ce70KULYwiA==", "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -370,14 +370,14 @@ } }, "@aws-sdk/credential-provider-imds": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-imds/-/credential-provider-imds-3.347.0.tgz", - "integrity": "sha512-7scCy/DCDRLIhlqTxff97LQWDnRwRXji3bxxMg+xWOTTaJe7PWx+etGSbBWaL42vsBHFShQjSLvJryEgoBktpw==", - "requires": { - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-imds/-/credential-provider-imds-3.357.0.tgz", + "integrity": "sha512-upw/bfsl7/WydT6gM0lBuR4Ipp4fzYm/E3ObFr0Mg5OkgVPt5ZJE+eeFTvwCpDdBSTKs4JfrK6/iEK8A23Q1jQ==", + "requires": { + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -389,18 +389,18 @@ } }, "@aws-sdk/credential-provider-ini": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.348.0.tgz", - "integrity": "sha512-0IEH5mH/cz2iLyr/+pSa3sCsQcGADiLSEn6yivsXdfz1zDqBiv+ffDoL0+Pvnp+TKf8sA6OlX8PgoMoEBvBdKw==", - "requires": { - "@aws-sdk/credential-provider-env": "3.347.0", - "@aws-sdk/credential-provider-imds": "3.347.0", - "@aws-sdk/credential-provider-process": "3.347.0", - "@aws-sdk/credential-provider-sso": "3.348.0", - "@aws-sdk/credential-provider-web-identity": "3.347.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.358.0.tgz", + "integrity": "sha512-Blmw4bhGxpaYvPmrbRKAltqnNDDSf6ZegNqJasc5OWvAlHJNvB/hYPmyQN0oFy79BXn7PbBip1QaLWaEhJvpAA==", + "requires": { + "@aws-sdk/credential-provider-env": "3.357.0", + "@aws-sdk/credential-provider-imds": "3.357.0", + "@aws-sdk/credential-provider-process": "3.357.0", + "@aws-sdk/credential-provider-sso": "3.358.0", + "@aws-sdk/credential-provider-web-identity": "3.357.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -412,19 +412,19 @@ } }, "@aws-sdk/credential-provider-node": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.348.0.tgz", - "integrity": "sha512-ngRWphm9e36i58KqVi7Z8WOub+k0cSl+JZaAmgfFm0+dsfBG5uheo598OeiwWV0DqlilvaQZFaMVQgG2SX/tHg==", - "requires": { - "@aws-sdk/credential-provider-env": "3.347.0", - "@aws-sdk/credential-provider-imds": "3.347.0", - "@aws-sdk/credential-provider-ini": "3.348.0", - "@aws-sdk/credential-provider-process": "3.347.0", - "@aws-sdk/credential-provider-sso": "3.348.0", - "@aws-sdk/credential-provider-web-identity": "3.347.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.358.0.tgz", + "integrity": "sha512-iLjyRNOT0ycdLqkzXNW+V2zibVljkLjL8j45FpK6mNrAwc/Ynr7EYuRRp5OuRiiYDO3ZoneAxpBJQ5SqmK2Jfg==", + "requires": { + "@aws-sdk/credential-provider-env": "3.357.0", + "@aws-sdk/credential-provider-imds": "3.357.0", + "@aws-sdk/credential-provider-ini": "3.358.0", + "@aws-sdk/credential-provider-process": "3.357.0", + "@aws-sdk/credential-provider-sso": "3.358.0", + "@aws-sdk/credential-provider-web-identity": "3.357.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -436,13 +436,13 @@ } }, "@aws-sdk/credential-provider-process": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.347.0.tgz", - "integrity": "sha512-yl1z4MsaBdXd4GQ2halIvYds23S67kElyOwz7g8kaQ4kHj+UoYWxz3JVW/DGusM6XmQ9/F67utBrUVA0uhQYyw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.357.0.tgz", + "integrity": "sha512-qFWWilFPsc2hR7O0KIhwcE78w+pVIK+uQR6MQMfdRyxUndgiuCorJwVjedc3yZtmnoELHF34j+m8whTBXv9E7Q==", "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -454,15 +454,15 @@ } }, "@aws-sdk/credential-provider-sso": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.348.0.tgz", - "integrity": "sha512-5cQao705376KgGkLv9xgkQ3T5H7KdNddWuyoH2wDcrHd1BA2Lnrell3Yyh7R6jQeV7uCQE/z0ugUOKhDqNKIqQ==", - "requires": { - "@aws-sdk/client-sso": "3.348.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/token-providers": "3.348.0", - "@aws-sdk/types": "3.347.0", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.358.0.tgz", + "integrity": "sha512-hKu5NshKohSDoHaXKyeCW88J8dBt4TMljrL+WswTMifuThO9ptyMq4PCdl4z7CNjIq6zo3ftc/uNf8TY7Ga8+w==", + "requires": { + "@aws-sdk/client-sso": "3.358.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/token-providers": "3.358.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -474,12 +474,12 @@ } }, "@aws-sdk/credential-provider-web-identity": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.347.0.tgz", - "integrity": "sha512-DxoTlVK8lXjS1zVphtz/Ab+jkN/IZor9d6pP2GjJHNoAIIzXfRwwj5C8vr4eTayx/5VJ7GRP91J8GJ2cKly8Qw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.357.0.tgz", + "integrity": "sha512-0KRRAFrXy5HJe2vqnCWCoCS+fQw7IoIj3KQsuURJMW4F+ifisxCgEsh3brJ2LQlN4ElWTRJhlrDHNZ/pd61D4w==", "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -491,23 +491,23 @@ } }, "@aws-sdk/credential-providers": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.348.0.tgz", - "integrity": "sha512-lpq1aHjFyExqD/6L8BK0OaROpCJuhnexGrABYljGI6yaLsyHbQpdE2+Y/WaxuRAK9wyP5s+7KNJ1ZK1ktrk5uQ==", - "requires": { - "@aws-sdk/client-cognito-identity": "3.348.0", - "@aws-sdk/client-sso": "3.348.0", - "@aws-sdk/client-sts": "3.348.0", - "@aws-sdk/credential-provider-cognito-identity": "3.348.0", - "@aws-sdk/credential-provider-env": "3.347.0", - "@aws-sdk/credential-provider-imds": "3.347.0", - "@aws-sdk/credential-provider-ini": "3.348.0", - "@aws-sdk/credential-provider-node": "3.348.0", - "@aws-sdk/credential-provider-process": "3.347.0", - "@aws-sdk/credential-provider-sso": "3.348.0", - "@aws-sdk/credential-provider-web-identity": "3.347.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.359.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.359.0.tgz", + "integrity": "sha512-fwfdqoJihRUbk3KEYv8IfWRFI+cNQfXfVHLtDEcW3tCU8lqsL920YSEjqMuWGrWLp8dWESDX5C3wZugur0lnTQ==", + "requires": { + "@aws-sdk/client-cognito-identity": "3.359.0", + "@aws-sdk/client-sso": "3.358.0", + "@aws-sdk/client-sts": "3.359.0", + "@aws-sdk/credential-provider-cognito-identity": "3.359.0", + "@aws-sdk/credential-provider-env": "3.357.0", + "@aws-sdk/credential-provider-imds": "3.357.0", + "@aws-sdk/credential-provider-ini": "3.358.0", + "@aws-sdk/credential-provider-node": "3.358.0", + "@aws-sdk/credential-provider-process": "3.357.0", + "@aws-sdk/credential-provider-sso": "3.358.0", + "@aws-sdk/credential-provider-web-identity": "3.357.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -519,12 +519,12 @@ } }, "@aws-sdk/eventstream-codec": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/eventstream-codec/-/eventstream-codec-3.347.0.tgz", - "integrity": "sha512-61q+SyspjsaQ4sdgjizMyRgVph2CiW4aAtfpoH69EJFJfTxTR/OqnZ9Jx/3YiYi0ksrvDenJddYodfWWJqD8/w==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/eventstream-codec/-/eventstream-codec-3.357.0.tgz", + "integrity": "sha512-bqenTHG6GH6aCk/Il+ooWXVVAZuc8lOgVEy9bE2hI49oVqT8zSuXxQB+w1WWyZoAOPcelsjayB1wfPub8VDBxQ==", "requires": { "@aws-crypto/crc32": "3.0.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-hex-encoding": "3.310.0", "tslib": "^2.5.0" }, @@ -537,13 +537,13 @@ } }, "@aws-sdk/fetch-http-handler": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/fetch-http-handler/-/fetch-http-handler-3.347.0.tgz", - "integrity": "sha512-sQ5P7ivY8//7wdxfA76LT1sF6V2Tyyz1qF6xXf9sihPN5Q1Y65c+SKpMzXyFSPqWZ82+SQQuDliYZouVyS6kQQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/fetch-http-handler/-/fetch-http-handler-3.357.0.tgz", + "integrity": "sha512-5sPloTO8y8fAnS/6/Sfp/aVoL9zuhzkLdWBORNzMazdynVNEzWKWCPZ27RQpgkaCDHiXjqUY4kfuFXAGkvFfDQ==", "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/querystring-builder": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/querystring-builder": "3.357.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-base64": "3.310.0", "tslib": "^2.5.0" }, @@ -556,11 +556,11 @@ } }, "@aws-sdk/hash-node": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/hash-node/-/hash-node-3.347.0.tgz", - "integrity": "sha512-96+ml/4EaUaVpzBdOLGOxdoXOjkPgkoJp/0i1fxOJEvl8wdAQSwc3IugVK9wZkCxy2DlENtgOe6DfIOhfffm/g==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/hash-node/-/hash-node-3.357.0.tgz", + "integrity": "sha512-fq3LS9AxHKb7dTZkm6iM1TrGk6XOTZz96iEZPME1+vjiSEXGWuebHt87q92n+KozVGRypn9MId3lHOPBBjygNQ==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-buffer-from": "3.310.0", "@aws-sdk/util-utf8": "3.310.0", "tslib": "^2.5.0" @@ -574,11 +574,11 @@ } }, "@aws-sdk/invalid-dependency": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/invalid-dependency/-/invalid-dependency-3.347.0.tgz", - "integrity": "sha512-8imQcwLwqZ/wTJXZqzXT9pGLIksTRckhGLZaXT60tiBOPKuerTsus2L59UstLs5LP8TKaVZKFFSsjRIn9dQdmQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/invalid-dependency/-/invalid-dependency-3.357.0.tgz", + "integrity": "sha512-HnCYZczf0VdyxMVMMxmA3QJAyyPSFbcMtZzgKbxVTWTG7GKpQe0psWZu/7O2Nk31mKg6vEUdiP1FylqLBsgMOA==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -605,12 +605,12 @@ } }, "@aws-sdk/middleware-content-length": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-content-length/-/middleware-content-length-3.347.0.tgz", - "integrity": "sha512-i4qtWTDImMaDUtwKQPbaZpXsReiwiBomM1cWymCU4bhz81HL01oIxOxOBuiM+3NlDoCSPr3KI6txZSz/8cqXCQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-content-length/-/middleware-content-length-3.357.0.tgz", + "integrity": "sha512-zQOFEyzOXAgN4M54tYNWGxKxnyzY0WwYDTFzh9riJRmxN1hTEKHUKmze4nILIf5rkQmOG4kTf1qmfazjkvZAhw==", "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -622,14 +622,14 @@ } }, "@aws-sdk/middleware-endpoint": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-endpoint/-/middleware-endpoint-3.347.0.tgz", - "integrity": "sha512-unF0c6dMaUL1ffU+37Ugty43DgMnzPWXr/Jup/8GbK5fzzWT5NQq6dj9KHPubMbWeEjQbmczvhv25JuJdK8gNQ==", - "requires": { - "@aws-sdk/middleware-serde": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/url-parser": "3.347.0", - "@aws-sdk/util-middleware": "3.347.0", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-endpoint/-/middleware-endpoint-3.357.0.tgz", + "integrity": "sha512-ScJi0SL8X/Lyi0Fp5blg0QN/Z6PoRwV/ZJXd8dQkXSznkbSvJHfqPP0xk/w3GcQ1TKsu5YEPfeYy8ejcq+7Pgg==", + "requires": { + "@aws-sdk/middleware-serde": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/url-parser": "3.357.0", + "@aws-sdk/util-middleware": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -641,12 +641,12 @@ } }, "@aws-sdk/middleware-host-header": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.347.0.tgz", - "integrity": "sha512-kpKmR9OvMlnReqp5sKcJkozbj1wmlblbVSbnQAIkzeQj2xD5dnVR3Nn2ogQKxSmU1Fv7dEroBtrruJ1o3fY38A==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.357.0.tgz", + "integrity": "sha512-HuGLcP7JP1qJ5wGT9GSlEknDaTSnOzHY4T6IPFuvFjAy3PvY5siQNm6+VRqdVS+n6/kzpL3JP5sAVM3aoxHT6Q==", "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -658,11 +658,11 @@ } }, "@aws-sdk/middleware-logger": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.347.0.tgz", - "integrity": "sha512-NYC+Id5UCkVn+3P1t/YtmHt75uED06vwaKyxDy0UmB2K66PZLVtwWbLpVWrhbroaw1bvUHYcRyQ9NIfnVcXQjA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.357.0.tgz", + "integrity": "sha512-dncT3tr+lZ9+duZo52rASgO6AKVwRcsc2/T93gmaYVrJqI6WWAwQ7yML5s72l9ZjQ5LZ+4jjrgtlufavAS0eCg==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -674,12 +674,12 @@ } }, "@aws-sdk/middleware-recursion-detection": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.347.0.tgz", - "integrity": "sha512-qfnSvkFKCAMjMHR31NdsT0gv5Sq/ZHTUD4yQsSLpbVQ6iYAS834lrzXt41iyEHt57Y514uG7F/Xfvude3u4icQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.357.0.tgz", + "integrity": "sha512-AXC54IeDS3jC1dbbkYHML4STvBPcKZ4IJTWdjEK1RCOgqXd0Ze1cE1e21wyj1tM6prF03zLyvpBd+3TS++nqfA==", "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -691,15 +691,15 @@ } }, "@aws-sdk/middleware-retry": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-retry/-/middleware-retry-3.347.0.tgz", - "integrity": "sha512-CpdM+8dCSbX96agy4FCzOfzDmhNnGBM/pxrgIVLm5nkYTLuXp/d7ubpFEUHULr+4hCd5wakHotMt7yO29NFaVw==", - "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/service-error-classification": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/util-middleware": "3.347.0", - "@aws-sdk/util-retry": "3.347.0", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-retry/-/middleware-retry-3.357.0.tgz", + "integrity": "sha512-ZCbXCYv3nglQqwREYxxpclrnR9MYPAnHlLcC8e9PbApqxGnaZdhoywxoqbgqT3hf/RM7kput4vEHDl1fyymcRQ==", + "requires": { + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/service-error-classification": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/util-middleware": "3.357.0", + "@aws-sdk/util-retry": "3.357.0", "tslib": "^2.5.0", "uuid": "^8.3.2" }, @@ -717,12 +717,12 @@ } }, "@aws-sdk/middleware-sdk-sts": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-sts/-/middleware-sdk-sts-3.347.0.tgz", - "integrity": "sha512-38LJ0bkIoVF3W97x6Jyyou72YV9Cfbml4OaDEdnrCOo0EssNZM5d7RhjMvQDwww7/3OBY/BzeOcZKfJlkYUXGw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-sts/-/middleware-sdk-sts-3.357.0.tgz", + "integrity": "sha512-Ng2VjLrPiL02QOcs1qs9jG2boO4Gn+v3VIbOJLG4zXcfbSq55iIWtlmr2ljfw9vP5aLhWtcODfmKHS5Bp+019Q==", "requires": { - "@aws-sdk/middleware-signing": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/middleware-signing": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -734,11 +734,11 @@ } }, "@aws-sdk/middleware-serde": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-serde/-/middleware-serde-3.347.0.tgz", - "integrity": "sha512-x5Foi7jRbVJXDu9bHfyCbhYDH5pKK+31MmsSJ3k8rY8keXLBxm2XEEg/AIoV9/TUF9EeVvZ7F1/RmMpJnWQsEg==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-serde/-/middleware-serde-3.357.0.tgz", + "integrity": "sha512-bGI4kYuuEsFjlANbyJLyy4AovETnyf/SukgLOG7Qjbua+ZGuzvRhMsk21mBKKGrnsTO4PmtieJo6xClThGAN8g==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -750,15 +750,15 @@ } }, "@aws-sdk/middleware-signing": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-signing/-/middleware-signing-3.347.0.tgz", - "integrity": "sha512-zVBF/4MGKnvhAE/J+oAL/VAehiyv+trs2dqSQXwHou9j8eA8Vm8HS2NdOwpkZQchIxTuwFlqSusDuPEdYFbvGw==", - "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/signature-v4": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/util-middleware": "3.347.0", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-signing/-/middleware-signing-3.357.0.tgz", + "integrity": "sha512-yB9ewEqI6Fw1OrmKFrUypbCqN5ijk06UGPochybamMuPxxkwMT3bnrm7eezsCA+TZbJyKhpffpyobwuv+xGNrA==", + "requires": { + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/signature-v4": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/util-middleware": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -770,9 +770,9 @@ } }, "@aws-sdk/middleware-stack": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-stack/-/middleware-stack-3.347.0.tgz", - "integrity": "sha512-Izidg4rqtYMcKuvn2UzgEpPLSmyd8ub9+LQ2oIzG3mpIzCBITq7wp40jN1iNkMg+X6KEnX9vdMJIYZsPYMCYuQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-stack/-/middleware-stack-3.357.0.tgz", + "integrity": "sha512-nNV+jfwGwmbOGZujAY/U8AW3EbVlxa9DJDLz3TPp/39o6Vu5KEzHJyDDNreo2k9V/TMvV+nOzHafufgPdagv7w==", "requires": { "tslib": "^2.5.0" }, @@ -785,13 +785,13 @@ } }, "@aws-sdk/middleware-user-agent": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.347.0.tgz", - "integrity": "sha512-wJbGN3OE1/daVCrwk49whhIr9E0j1N4gWwN/wi4WuyYIA+5lMUfVp0aGIOvZR+878DxuFz2hQ4XcZVT4K2WvQw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.357.0.tgz", + "integrity": "sha512-M/CsAXjGblZS4rEbMb0Dn9IXbfq4EjVaTHBfvuILU/dKRppWvjnm2lRtqCZ+LIT3ATbAjA3/dY7dWsjxQWwijA==", "requires": { - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/types": "3.347.0", - "@aws-sdk/util-endpoints": "3.347.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/util-endpoints": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -803,13 +803,13 @@ } }, "@aws-sdk/node-config-provider": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/node-config-provider/-/node-config-provider-3.347.0.tgz", - "integrity": "sha512-faU93d3+5uTTUcotGgMXF+sJVFjrKh+ufW+CzYKT4yUHammyaIab/IbTPWy2hIolcEGtuPeVoxXw8TXbkh/tuw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/node-config-provider/-/node-config-provider-3.357.0.tgz", + "integrity": "sha512-kwBIzKCaW3UWqLdELhy7TcN8itNMOjbzga530nalFILMvn2IxrkdKQhNgxGBXy6QK6kCOtH6OmcrG3/oZkLwig==", "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -821,14 +821,14 @@ } }, "@aws-sdk/node-http-handler": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/node-http-handler/-/node-http-handler-3.348.0.tgz", - "integrity": "sha512-wxdgc4tO5F6lN4wHr0CZ4TyIjDW/ORp4SJZdWYNs2L5J7+/SwqgJY2lxRlGi0i7Md+apAdE3sT3ukVQ/9pVfPg==", - "requires": { - "@aws-sdk/abort-controller": "3.347.0", - "@aws-sdk/protocol-http": "3.347.0", - "@aws-sdk/querystring-builder": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/node-http-handler/-/node-http-handler-3.357.0.tgz", + "integrity": "sha512-uoab4xIJux+Q9hQ9A/vWEAjojtBQ0U4K7xEQVa0BXEv7MHH5zv51H+VtrelU1Ed6hsHq4Sx0bxBMFpbbWhNyjA==", + "requires": { + "@aws-sdk/abort-controller": "3.357.0", + "@aws-sdk/protocol-http": "3.357.0", + "@aws-sdk/querystring-builder": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -840,11 +840,11 @@ } }, "@aws-sdk/property-provider": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/property-provider/-/property-provider-3.347.0.tgz", - "integrity": "sha512-t3nJ8CYPLKAF2v9nIHOHOlF0CviQbTvbFc2L4a+A+EVd/rM4PzL3+3n8ZJsr0h7f6uD04+b5YRFgKgnaqLXlEg==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/property-provider/-/property-provider-3.357.0.tgz", + "integrity": "sha512-im4W0u8WaYxG7J7ko4Xl3OEzK3Mrm1Rz6/txTGe6hTIHlyUISu1ekOQJXK6XYPqNMn8v1G3BiQREoRXUEJFbHg==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -856,11 +856,11 @@ } }, "@aws-sdk/protocol-http": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/protocol-http/-/protocol-http-3.347.0.tgz", - "integrity": "sha512-2YdBhc02Wvy03YjhGwUxF0UQgrPWEy8Iq75pfS42N+/0B/+eWX1aQgfjFxIpLg7YSjT5eKtYOQGlYd4MFTgj9g==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/protocol-http/-/protocol-http-3.357.0.tgz", + "integrity": "sha512-w1JHiI50VEea7duDeAspUiKJmmdIQblvRyjVMOqWA6FIQAyDVuEiPX7/MdQr0ScxhtRQxHbP0I4MFyl7ctRQvA==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -872,11 +872,11 @@ } }, "@aws-sdk/querystring-builder": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-builder/-/querystring-builder-3.347.0.tgz", - "integrity": "sha512-phtKTe6FXoV02MoPkIVV6owXI8Mwr5IBN3bPoxhcPvJG2AjEmnetSIrhb8kwc4oNhlwfZwH6Jo5ARW/VEWbZtg==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-builder/-/querystring-builder-3.357.0.tgz", + "integrity": "sha512-aQcicqB6Y2cNaXPPwunz612a01SMiQQPsdz632F/3Lzn0ua82BJKobHOtaiTUlmVJ5Q4/EAeNfwZgL7tTUNtDQ==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-uri-escape": "3.310.0", "tslib": "^2.5.0" }, @@ -889,11 +889,11 @@ } }, "@aws-sdk/querystring-parser": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-parser/-/querystring-parser-3.347.0.tgz", - "integrity": "sha512-5VXOhfZz78T2W7SuXf2avfjKglx1VZgZgp9Zfhrt/Rq+MTu2D+PZc5zmJHhYigD7x83jLSLogpuInQpFMA9LgA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/querystring-parser/-/querystring-parser-3.357.0.tgz", + "integrity": "sha512-Svvq+atRNP9s2VxiklcUNgCzmt3T5kfs7X2C+yjmxHvOQTPjLNaNGbfC/vhjOK7aoXw0h+lBac48r5ymx1PbQA==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -905,16 +905,16 @@ } }, "@aws-sdk/service-error-classification": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/service-error-classification/-/service-error-classification-3.347.0.tgz", - "integrity": "sha512-xZ3MqSY81Oy2gh5g0fCtooAbahqh9VhsF8vcKjVX8+XPbGC8y+kej82+MsMg4gYL8gRFB9u4hgYbNgIS6JTAvg==" + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/service-error-classification/-/service-error-classification-3.357.0.tgz", + "integrity": "sha512-VuXeL4g5vKO9HjgCZlxmH8Uv1FcqUSjmbPpQkbNtYIDck6u0qzM0rG+n0/1EjyQbPSr3MhW/pkWs5nx2Nljlyg==" }, "@aws-sdk/shared-ini-file-loader": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/shared-ini-file-loader/-/shared-ini-file-loader-3.347.0.tgz", - "integrity": "sha512-Xw+zAZQVLb+xMNHChXQ29tzzLqm3AEHsD8JJnlkeFjeMnWQtXdUfOARl5s8NzAppcKQNlVe2gPzjaKjoy2jz1Q==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/shared-ini-file-loader/-/shared-ini-file-loader-3.357.0.tgz", + "integrity": "sha512-ceyqM4XxQe0Plb/oQAD2t1UOV2Iy4PFe1oAGM8dfJzYrRKu7zvMwru7/WaB3NYq+/mIY6RU+jjhRmjQ3GySVqA==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -926,15 +926,15 @@ } }, "@aws-sdk/signature-v4": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4/-/signature-v4-3.347.0.tgz", - "integrity": "sha512-58Uq1do+VsTHYkP11dTK+DF53fguoNNJL9rHRWhzP+OcYv3/mBMLoS2WPz/x9FO5mBg4ESFsug0I6mXbd36tjw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4/-/signature-v4-3.357.0.tgz", + "integrity": "sha512-itt4/Jh9FqnzK30qIjXFBvM4J7zN4S/AAqsRMnaX7U4f/MV+1YxQHmzimpdMnsCXXs2jqFqKVRu6DewxJ3nbxg==", "requires": { - "@aws-sdk/eventstream-codec": "3.347.0", + "@aws-sdk/eventstream-codec": "3.357.0", "@aws-sdk/is-array-buffer": "3.310.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "@aws-sdk/util-hex-encoding": "3.310.0", - "@aws-sdk/util-middleware": "3.347.0", + "@aws-sdk/util-middleware": "3.357.0", "@aws-sdk/util-uri-escape": "3.310.0", "@aws-sdk/util-utf8": "3.310.0", "tslib": "^2.5.0" @@ -948,12 +948,14 @@ } }, "@aws-sdk/smithy-client": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/smithy-client/-/smithy-client-3.347.0.tgz", - "integrity": "sha512-PaGTDsJLGK0sTjA6YdYQzILRlPRN3uVFyqeBUkfltXssvUzkm8z2t1lz2H4VyJLAhwnG5ZuZTNEV/2mcWrU7JQ==", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/smithy-client/-/smithy-client-3.358.0.tgz", + "integrity": "sha512-oqctxWb9yAqCh4ENwUkt9MC01l5uKoy+QCiSUUhQ76k7R3lyGOge9ycyRyoKl+oZWvEpnjZevXQFqEfGzkL7bA==", "requires": { - "@aws-sdk/middleware-stack": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/middleware-stack": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/util-stream": "3.358.0", + "@smithy/types": "^1.0.0", "tslib": "^2.5.0" }, "dependencies": { @@ -965,14 +967,14 @@ } }, "@aws-sdk/token-providers": { - "version": "3.348.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.348.0.tgz", - "integrity": "sha512-nTjoJkUsJUrJTZuqaeMD9PW2//Rdg2HgfDjiyC4jmAXtayWYCi11mqauurMaUHJ3p5qJ8f5xzxm6vBTbrftPag==", - "requires": { - "@aws-sdk/client-sso-oidc": "3.348.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/shared-ini-file-loader": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.358.0.tgz", + "integrity": "sha512-vATKNCwNhCSo2LzvtkIzW9Yp2/aKNR032VPtIWlDtWGGFhkzGi4FPS0VTdfefxz4rqPWfBz53mh54d9xylsWVw==", + "requires": { + "@aws-sdk/client-sso-oidc": "3.358.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/shared-ini-file-loader": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -984,9 +986,9 @@ } }, "@aws-sdk/types": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.347.0.tgz", - "integrity": "sha512-GkCMy79mdjU9OTIe5KT58fI/6uqdf8UmMdWqVHmFJ+UpEzOci7L/uw4sOXWo7xpPzLs6cJ7s5ouGZW4GRPmHFA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.357.0.tgz", + "integrity": "sha512-/riCRaXg3p71BeWnShrai0y0QTdXcouPSM0Cn1olZbzTf7s71aLEewrc96qFrL70XhY4XvnxMpqQh+r43XIL3g==", "requires": { "tslib": "^2.5.0" }, @@ -999,12 +1001,12 @@ } }, "@aws-sdk/url-parser": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/url-parser/-/url-parser-3.347.0.tgz", - "integrity": "sha512-lhrnVjxdV7hl+yCnJfDZOaVLSqKjxN20MIOiijRiqaWGLGEAiSqBreMhL89X1WKCifxAs4zZf9YB9SbdziRpAA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/url-parser/-/url-parser-3.357.0.tgz", + "integrity": "sha512-fAaU6cFsaAba01lCRsRJiYR/LfXvX2wudyEyutBVglE4dWSoSeu3QJNxImIzTBULfbiFhz59++NQ1JUVx88IVg==", "requires": { - "@aws-sdk/querystring-parser": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/querystring-parser": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -1093,12 +1095,12 @@ } }, "@aws-sdk/util-defaults-mode-browser": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-browser/-/util-defaults-mode-browser-3.347.0.tgz", - "integrity": "sha512-+JHFA4reWnW/nMWwrLKqL2Lm/biw/Dzi/Ix54DAkRZ08C462jMKVnUlzAI+TfxQE3YLm99EIa0G7jiEA+p81Qw==", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-browser/-/util-defaults-mode-browser-3.358.0.tgz", + "integrity": "sha512-KGfw64wRL/gROLD4Gatda8cUsaNKNhSnx+yDDcG2WkFlFfLr6FHvTijpRxvIM2Jau2ZhcdGzbegLjsFxviTJAA==", "requires": { - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "bowser": "^2.11.0", "tslib": "^2.5.0" }, @@ -1111,15 +1113,15 @@ } }, "@aws-sdk/util-defaults-mode-node": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-node/-/util-defaults-mode-node-3.347.0.tgz", - "integrity": "sha512-A8BzIVhAAZE5WEukoAN2kYebzTc99ZgncbwOmgCCbvdaYlk5tzguR/s+uoT4G0JgQGol/4hAMuJEl7elNgU6RQ==", - "requires": { - "@aws-sdk/config-resolver": "3.347.0", - "@aws-sdk/credential-provider-imds": "3.347.0", - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/property-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-defaults-mode-node/-/util-defaults-mode-node-3.358.0.tgz", + "integrity": "sha512-2C5on0yppDS0xGpFkHRqfrG9TeTq6ive1hPX1V8UCkiI/TBQYl88XCKCKct8zTcejyK9klZUDGI8QQTan2UWkw==", + "requires": { + "@aws-sdk/config-resolver": "3.357.0", + "@aws-sdk/credential-provider-imds": "3.357.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/property-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -1131,11 +1133,11 @@ } }, "@aws-sdk/util-endpoints": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.347.0.tgz", - "integrity": "sha512-/WUkirizeNAqwVj0zkcrqdQ9pUm1HY5kU+qy7xTR0OebkuJauglkmSTMD+56L1JPunWqHhlwCMVRaz5eaJdSEQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.357.0.tgz", + "integrity": "sha512-XHKyS5JClT9su9hDif715jpZiWHQF9gKZXER8tW0gOizU3R9cyWc9EsJ2BRhFNhi7nt/JF/CLUEc5qDx3ETbUw==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -1177,9 +1179,9 @@ } }, "@aws-sdk/util-middleware": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-middleware/-/util-middleware-3.347.0.tgz", - "integrity": "sha512-8owqUA3ePufeYTUvlzdJ7Z0miLorTwx+rNol5lourGQZ9JXsVMo23+yGA7nOlFuXSGkoKpMOtn6S0BT2bcfeiw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-middleware/-/util-middleware-3.357.0.tgz", + "integrity": "sha512-pV1krjZs7BdahZBfsCJMatE8kcor7GFsBOWrQgQDm9T0We5b5xPpOO2vxAD0RytBpY8Ky2ELs/+qXMv7l5fWIA==", "requires": { "tslib": "^2.5.0" }, @@ -1192,11 +1194,33 @@ } }, "@aws-sdk/util-retry": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-retry/-/util-retry-3.347.0.tgz", - "integrity": "sha512-NxnQA0/FHFxriQAeEgBonA43Q9/VPFQa8cfJDuT2A1YZruMasgjcltoZszi1dvoIRWSZsFTW42eY2gdOd0nffQ==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-retry/-/util-retry-3.357.0.tgz", + "integrity": "sha512-SUqYJE9msbuOVq+vnUy+t0LH7XuYNFz66dSF8q6tedsbJK4j8tgya0I1Ct3m06ynGrXDJMaj39I7AXCyW9bjtw==", "requires": { - "@aws-sdk/service-error-classification": "3.347.0", + "@aws-sdk/service-error-classification": "3.357.0", + "tslib": "^2.5.0" + }, + "dependencies": { + "tslib": { + "version": "2.5.3", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.3.tgz", + "integrity": "sha512-mSxlJJwl3BMEQCUNnxXBU9jP4JBktcEGhURcPR6VQVlnP0FdDEsIaz0C35dXNGLyRfrATNofF0F5p2KPxQgB+w==" + } + } + }, + "@aws-sdk/util-stream": { + "version": "3.358.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-stream/-/util-stream-3.358.0.tgz", + "integrity": "sha512-zUhpjxAXV2+0eALlTU6uXRYMs10XYpcYzl3NtLRe4wWgnrOOOZnF/t5LQDoKXOfaMdzwZ+i90+PYr+6JQ58+7g==", + "requires": { + "@aws-sdk/fetch-http-handler": "3.357.0", + "@aws-sdk/node-http-handler": "3.357.0", + "@aws-sdk/types": "3.357.0", + "@aws-sdk/util-base64": "3.310.0", + "@aws-sdk/util-buffer-from": "3.310.0", + "@aws-sdk/util-hex-encoding": "3.310.0", + "@aws-sdk/util-utf8": "3.310.0", "tslib": "^2.5.0" }, "dependencies": { @@ -1223,11 +1247,11 @@ } }, "@aws-sdk/util-user-agent-browser": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.347.0.tgz", - "integrity": "sha512-ydxtsKVtQefgbk1Dku1q7pMkjDYThauG9/8mQkZUAVik55OUZw71Zzr3XO8J8RKvQG8lmhPXuAQ0FKAyycc0RA==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.357.0.tgz", + "integrity": "sha512-JHaWlNIUkPNvXkqeDOrqFzAlAgdwZK5mZw7FQnCRvf8tdSogpGZSkuyb9Z6rLD9gC40Srbc2nepO1cFpeMsDkA==", "requires": { - "@aws-sdk/types": "3.347.0", + "@aws-sdk/types": "3.357.0", "bowser": "^2.11.0", "tslib": "^2.5.0" }, @@ -1240,12 +1264,12 @@ } }, "@aws-sdk/util-user-agent-node": { - "version": "3.347.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.347.0.tgz", - "integrity": "sha512-6X0b9qGsbD1s80PmbaB6v1/ZtLfSx6fjRX8caM7NN0y/ObuLoX8LhYnW6WlB2f1+xb4EjaCNgpP/zCf98MXosw==", + "version": "3.357.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.357.0.tgz", + "integrity": "sha512-RdpQoaJWQvcS99TVgSbT451iGrlH4qpWUWFA9U1IRhxOSsmC1hz8ME7xc8nci9SREx/ZlfT3ai6LpoAzAtIEMA==", "requires": { - "@aws-sdk/node-config-provider": "3.347.0", - "@aws-sdk/types": "3.347.0", + "@aws-sdk/node-config-provider": "3.357.0", + "@aws-sdk/types": "3.357.0", "tslib": "^2.5.0" }, "dependencies": { @@ -2538,11 +2562,11 @@ } }, "@smithy/protocol-http": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-1.0.1.tgz", - "integrity": "sha512-9OrEn0WfOVtBNYJUjUAn9AOiJ4lzERCJJ/JeZs8E6yajTGxBaFRxUnNBHiNqoDJVg076hY36UmEnPx7xXrvUSg==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-1.1.0.tgz", + "integrity": "sha512-H5y/kZOqfJSqRkwtcAoVbqONmhdXwSgYNJ1Glk5Ry8qlhVVy5qUzD9EklaCH8/XLnoCsLO/F/Giee8MIvaBRkg==", "requires": { - "@smithy/types": "^1.0.0", + "@smithy/types": "^1.1.0", "tslib": "^2.5.0" }, "dependencies": { @@ -2554,9 +2578,9 @@ } }, "@smithy/types": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@smithy/types/-/types-1.0.0.tgz", - "integrity": "sha512-kc1m5wPBHQCTixwuaOh9vnak/iJm21DrSf9UK6yDE5S3mQQ4u11pqAUiKWnlrZnYkeLfAI9UEHj9OaMT1v5Umg==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-1.1.0.tgz", + "integrity": "sha512-KzmvisMmuwD2jZXuC9e65JrgsZM97y5NpDU7g347oB+Q+xQLU6hQZ5zFNNbEfwwOJHoOvEVTna+dk1h/lW7alw==", "requires": { "tslib": "^2.5.0" }, @@ -4470,9 +4494,9 @@ "integrity": "sha512-R9bHCvweUxxwkDwhjav5vxpFvdPGlVngtqmx4pIZfSUhM/Q4NiIUHB456BAf+Q1Nwu3HEZYONtu+Rya+af4jiQ==" }, "fast-xml-parser": { - "version": "4.2.4", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.2.4.tgz", - "integrity": "sha512-fbfMDvgBNIdDJLdLOwacjFAPYt67tr31H9ZhWSm45CDAxvd0I6WTlSOUo7K2P/K5sA5JgMKG64PI3DMcaFdWpQ==", + "version": "4.2.5", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.2.5.tgz", + "integrity": "sha512-B9/wizE4WngqQftFPmdaMYlXoJlJOYxGQOanC77fq9k8+Z0v5dDSVh+3glErdIROP//s/jgb7ZuxKfB8nVyo0g==", "requires": { "strnum": "^1.0.5" } diff --git a/frontend/server/package.json b/frontend/server/package.json index d36e6b2225..8395b2c49c 100644 --- a/frontend/server/package.json +++ b/frontend/server/package.json @@ -2,7 +2,7 @@ "description": "Frontend webserver package for Kubeflow Pipelines", "main": "server.js", "dependencies": { - "@aws-sdk/credential-providers": "^3.348.0", + "@aws-sdk/credential-providers": "^3.359.0", "@google-cloud/storage": "^2.5.0", "@kubernetes/client-node": "^0.8.2", "axios": ">=0.21.1", From 283f74b4ac8afb7e2b018ff2ff8cc8a9247a1370 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Wed, 12 Jul 2023 09:28:08 -0700 Subject: [PATCH 021/253] chore(frontend): Remove unnecessary getVersionTemplate() call in PipelineDetails (#9712) * Remove unnecessary getVersionTemplate() api call in pipeline details router. * Add an unit test. * Update unit tests. * Separate the unit test for 2 cases (undefined and invalid pipeline_spec) * Add log error for undefined pipeline_spec case. --- frontend/src/pages/PipelineDetails.test.tsx | 225 +++++++++++------- frontend/src/pages/PipelineDetails.tsx | 20 +- .../src/pages/PipelineDetailsTest.test.tsx | 82 ++++--- 3 files changed, 190 insertions(+), 137 deletions(-) diff --git a/frontend/src/pages/PipelineDetails.test.tsx b/frontend/src/pages/PipelineDetails.test.tsx index 0dbdf51185..9c29c69faa 100644 --- a/frontend/src/pages/PipelineDetails.test.tsx +++ b/frontend/src/pages/PipelineDetails.test.tsx @@ -53,10 +53,6 @@ describe('PipelineDetails', () => { const getV2RecurringRunSpy = jest.spyOn(Apis.recurringRunServiceApi, 'getRecurringRun'); const getExperimentSpy = jest.spyOn(Apis.experimentServiceApiV2, 'getExperiment'); const deletePipelineVersionSpy = jest.spyOn(Apis.pipelineServiceApiV2, 'deletePipelineVersion'); - const getPipelineVersionTemplateSpy = jest.spyOn( - Apis.pipelineServiceApi, - 'getPipelineVersionTemplate', - ); const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); let tree: ShallowWrapper | ReactWrapper; @@ -198,10 +194,6 @@ describe('PipelineDetails', () => { display_name: 'test experiment', } as V2beta1Experiment), ); - // getTemplateSpy.mockImplementation(() => Promise.resolve({ template: 'test template' })); - getPipelineVersionTemplateSpy.mockImplementation(() => - Promise.resolve({ template: 'test template' }), - ); createGraphSpy.mockImplementation(() => new graphlib.Graph()); }); @@ -214,7 +206,6 @@ describe('PipelineDetails', () => { it('shows pipeline name in page name, and breadcrumb to go back to pipelines', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -232,7 +223,6 @@ describe('PipelineDetails', () => { await getV1RunSpy; await getV2RunSpy; await createGraphSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -256,7 +246,6 @@ describe('PipelineDetails', () => { tree = shallow(); await getV1RecurringRunSpy; await getV2RecurringRunSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -285,7 +274,6 @@ describe('PipelineDetails', () => { await getV1RunSpy; await getV2RunSpy; await getExperimentSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -318,7 +306,6 @@ describe('PipelineDetails', () => { await getV1RecurringRunSpy; await getV2RecurringRunSpy; await getExperimentSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -368,7 +355,12 @@ describe('PipelineDetails', () => { it( 'directly use pipeline_manifest dumped from ' + 'pipeline_spec in run as template string (v2)', async () => { - jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); testV2Run.pipeline_spec = { spec: { arguments: { parameters: [{ name: 'output' }] } } }; tree = shallow(); @@ -386,7 +378,12 @@ describe('PipelineDetails', () => { 'directly use pipeline_manifest dumped from pipeline_spec ' + 'in recurring run as template string (v2)', async () => { - jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); testV2RecurringRun.pipeline_spec = { spec: { arguments: { parameters: [{ name: 'output' }] } }, }; @@ -403,7 +400,12 @@ describe('PipelineDetails', () => { ); it('use pipeline_version_id in run to get pipeline template string (v2)', async () => { - jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); testV2Run.pipeline_version_reference.pipeline_id = 'test-pipeline-id'; testV2Run.pipeline_version_reference.pipeline_version_id = 'test-pipeline-version-id'; @@ -411,7 +413,6 @@ describe('PipelineDetails', () => { await getV1RunSpy; await getV2RunSpy; await getV2PipelineVersionSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(tree.state('templateString')).toBe( @@ -431,7 +432,12 @@ describe('PipelineDetails', () => { }); it('use pipeline_version_id in recurring run to get pipeline template string (v2)', async () => { - jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); testV2RecurringRun.pipeline_version_reference.pipeline_id = 'test-pipeline-id'; testV2RecurringRun.pipeline_version_reference.pipeline_version_id = 'test-pipeline-version-id'; @@ -439,7 +445,6 @@ describe('PipelineDetails', () => { await getV1RecurringRunSpy; await getV2RecurringRunSpy; await getV2PipelineVersionSpy; - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); expect(tree.state('templateString')).toBe( @@ -455,20 +460,22 @@ describe('PipelineDetails', () => { pipeline_id: 'run-pipeline-id', workflow_manifest: 'not valid JSON', }; - render(); + await waitFor(() => { - expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error - expect(updateBannerSpy).toHaveBeenLastCalledWith( - expect.objectContaining({ - additionalInfo: 'Unexpected token o in JSON at position 1', - message: `Failed to parse pipeline spec from run with ID: ${ - testV1Run.run!.id - }. Click Details for more information.`, - mode: 'error', - }), - ); + expect(getV1RunSpy).toHaveBeenCalled(); }); + + expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error + expect(updateBannerSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + additionalInfo: 'Unexpected token o in JSON at position 1', + message: `Failed to parse pipeline spec from run with ID: ${ + testV1Run.run!.id + }. Click Details for more information.`, + mode: 'error', + }), + ); }, ); @@ -507,20 +514,6 @@ describe('PipelineDetails', () => { }, ); - it('uses an empty string and does not show error when getTemplate response is empty', async () => { - getPipelineVersionTemplateSpy.mockImplementationOnce(() => Promise.resolve({})); - - tree = shallow(); - await getV1PipelineSpy; - await TestUtils.flushPromises(); - - // No errors - expect(updateBannerSpy).toHaveBeenCalledTimes(1); // Once to clear banner - expect(updateBannerSpy).toHaveBeenLastCalledWith(expect.objectContaining({})); - - expect(tree.state('templateString')).toBe(''); - }); - it('shows load error banner when failing to get pipeline', async () => { TestUtils.makeErrorResponseOnce(getV1PipelineSpy, 'woops'); tree = shallow(); @@ -536,34 +529,114 @@ describe('PipelineDetails', () => { ); }); - it('shows load error banner when failing to get pipeline template', async () => { - TestUtils.makeErrorResponseOnce(getPipelineVersionTemplateSpy, 'woops'); - tree = shallow(); - await getV1PipelineSpy; - await TestUtils.flushPromises(); - expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error + it('shows load error banner when failing to get pipeline version', async () => { + TestUtils.makeErrorResponse(getV2PipelineVersionSpy, 'No pipeline version is found'); + render(); + + await waitFor(() => { + // one for selected Version, another for template string + expect(getV2PipelineVersionSpy).toHaveBeenCalledTimes(2); + // get version error will use empty string as template string, which won't call createGraph() + expect(createGraphSpy).toHaveBeenCalledTimes(0); + }); + + expect(updateBannerSpy).toHaveBeenCalledTimes(3); // Clear banner, show error two times expect(updateBannerSpy).toHaveBeenLastCalledWith( expect.objectContaining({ - additionalInfo: 'woops', - message: 'Cannot retrieve pipeline template. Click Details for more information.', + additionalInfo: 'No pipeline version is found', + message: 'Cannot retrieve pipeline version. Click Details for more information.', mode: 'error', }), ); }); + it( + 'uses an empty string and does not show error ' + + 'when pipeline_spec in the response of getPipelineVersion() is undefined', + async () => { + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); + getV2PipelineVersionSpy.mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: undefined, // empty pipeline_spec + }); + render(); + + await waitFor(() => { + expect(getV2PipelineVersionSpy).toHaveBeenCalled(); + // empty template string from empty pipeline_spec and it won't call createGraph() + expect(createGraphSpy).toHaveBeenCalledTimes(0); + }); + + // No errors + expect(updateBannerSpy).toHaveBeenCalledTimes(1); // Once to clear banner + expect(updateBannerSpy).toHaveBeenLastCalledWith(expect.objectContaining({})); + }, + ); + + it( + 'shows no graph error banner ' + + 'when pipeline_spec in the response of getPipelineVersion() is invalid format', + async () => { + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); + getV2PipelineVersionSpy.mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: {}, // invalid pipeline_spec + }); + render(); + + await waitFor(() => { + expect(getV2PipelineVersionSpy).toHaveBeenCalled(); + }); + + expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error + expect(updateBannerSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + additionalInfo: 'Important infomation is missing. Pipeline Spec is invalid.', + message: 'Error: failed to generate Pipeline graph. Click Details for more information.', + mode: 'error', + }), + ); + }, + ); + it('shows no graph error banner when failing to parse graph', async () => { - getPipelineVersionTemplateSpy.mockResolvedValue({ - template: ` - apiVersion: argoproj.io/v1alpha1 - kind: Workflow - metadata: - generateName: entry-point-test- - `, + jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { + if (featureKey === features.FeatureKey.V2_ALPHA) { + return true; + } + return false; + }); + getV2PipelineVersionSpy.mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: { + apiVersion: 'argoproj.io/v1alpha1', + kind: 'Workflow', + }, }); TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); - tree = shallow(); - await getPipelineVersionTemplateSpy; - await TestUtils.flushPromises(); + render(); + + await waitFor(() => { + expect(getV2PipelineVersionSpy).toHaveBeenCalled(); + expect(createGraphSpy).toHaveBeenCalled(); + }); + expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error expect(updateBannerSpy).toHaveBeenLastCalledWith( expect.objectContaining({ @@ -574,27 +647,8 @@ describe('PipelineDetails', () => { ); }); - it('clears the error banner when refreshing the page', async () => { - TestUtils.makeErrorResponseOnce(getPipelineVersionTemplateSpy, 'woops'); - tree = shallow(); - await TestUtils.flushPromises(); - - expect(updateBannerSpy).toHaveBeenLastCalledWith( - expect.objectContaining({ - additionalInfo: 'woops', - message: 'Cannot retrieve pipeline template. Click Details for more information.', - mode: 'error', - }), - ); - - (tree.instance() as PipelineDetails).refresh(); - - expect(updateBannerSpy).toHaveBeenLastCalledWith({}); - }); - it('has a new experiment button if it has a pipeline reference', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const newExperimentBtn = instance.getInitialToolbarState().actions[ButtonKeys.NEW_EXPERIMENT]; @@ -603,7 +657,6 @@ describe('PipelineDetails', () => { it("has 'clone run' toolbar button if viewing an embedded pipeline", async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run and create pipeline version, so 2 */ @@ -614,7 +667,6 @@ describe('PipelineDetails', () => { it("has 'clone recurring run' toolbar button if viewing an embedded pipeline from recurring run", async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run and create pipeline version, so 2 */ @@ -662,7 +714,6 @@ describe('PipelineDetails', () => { it("has 'create run' toolbar button if not viewing an embedded pipeline", async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run, create pipeline version, create experiment and delete run, so 4 */ @@ -713,7 +764,6 @@ describe('PipelineDetails', () => { it('clicking new experiment button navigates to new experiment page', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const newExperimentBtn = instance.getInitialToolbarState().actions[ButtonKeys.NEW_EXPERIMENT]; @@ -742,7 +792,6 @@ describe('PipelineDetails', () => { it('has a delete button and it is enabled for pipeline version deletion', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const deleteBtn = instance.getInitialToolbarState().actions[ButtonKeys.DELETE_RUN]; @@ -757,7 +806,6 @@ describe('PipelineDetails', () => { }; tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const deleteBtn = instance.getInitialToolbarState().actions[ButtonKeys.DELETE_RUN]; @@ -793,7 +841,6 @@ describe('PipelineDetails', () => { it('calls delete API when delete dialog is confirmed', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN @@ -829,7 +876,6 @@ describe('PipelineDetails', () => { it('shows error dialog if deletion fails', async () => { tree = shallow(); TestUtils.makeErrorResponseOnce(deletePipelineVersionSpy, 'woops'); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN @@ -849,7 +895,6 @@ describe('PipelineDetails', () => { it('shows success snackbar if deletion succeeds', async () => { tree = shallow(); - await getPipelineVersionTemplateSpy; await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN diff --git a/frontend/src/pages/PipelineDetails.tsx b/frontend/src/pages/PipelineDetails.tsx index 412f2b1551..a4627bfac0 100644 --- a/frontend/src/pages/PipelineDetails.tsx +++ b/frontend/src/pages/PipelineDetails.tsx @@ -32,7 +32,7 @@ import * as WorkflowUtils from 'src/lib/v2/WorkflowUtils'; import { convertYamlToV2PipelineSpec } from 'src/lib/v2/WorkflowUtils'; import { classes } from 'typestyle'; import { Workflow } from 'src/third_party/mlmd/argo_template'; -import { ApiGetTemplateResponse, ApiPipeline, ApiPipelineVersion } from 'src/apis/pipeline'; +import { ApiPipeline, ApiPipelineVersion } from 'src/apis/pipeline'; import { V2beta1ListPipelineVersionsResponse, V2beta1Pipeline, @@ -627,24 +627,16 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { pipelineVersion = await Apis.pipelineServiceApiV2.getPipelineVersion(pipelineId, versionId); pipelineSpecInVersion = pipelineVersion.pipeline_spec; } - const templateStrFromSpec = pipelineSpecInVersion - ? JsYaml.safeDump(pipelineSpecInVersion) - : ''; - // Get template string from template or pipeline version template (v1 API) - let templateResponse: ApiGetTemplateResponse; - if (versionId) { - templateResponse = await Apis.pipelineServiceApi.getPipelineVersionTemplate(versionId); + if (pipelineSpecInVersion) { + return JsYaml.safeDump(pipelineSpecInVersion); } else { - templateResponse = await Apis.pipelineServiceApi.getTemplate(pipelineId); + logger.error('No template string is found'); + return ''; } - - return WorkflowUtils.isTemplateV2(templateStrFromSpec) - ? templateStrFromSpec - : templateResponse.template || ''; } catch (err) { this.setStateSafe({ graphIsLoading: false }); - await this.showPageError('Cannot retrieve pipeline template.', err); + await this.showPageError('Cannot retrieve pipeline version.', err); logger.error('Cannot retrieve pipeline details.', err); } return ''; diff --git a/frontend/src/pages/PipelineDetailsTest.test.tsx b/frontend/src/pages/PipelineDetailsTest.test.tsx index 1d64c53241..cd5602a13f 100644 --- a/frontend/src/pages/PipelineDetailsTest.test.tsx +++ b/frontend/src/pages/PipelineDetailsTest.test.tsx @@ -14,7 +14,7 @@ * limitations under the License. */ -import { render, screen } from '@testing-library/react'; +import { render, screen, waitFor } from '@testing-library/react'; import { graphlib } from 'dagre'; import * as JsYaml from 'js-yaml'; import React from 'react'; @@ -190,12 +190,6 @@ spec: Apis.pipelineServiceApi.listPipelineVersions = jest .fn() .mockResolvedValue({ versions: [testV1PipelineVersion] }); - Apis.pipelineServiceApi.getTemplate = jest - .fn() - .mockResolvedValue({ template: 'test template' }); - Apis.pipelineServiceApi.getPipelineVersionTemplate = jest - .fn() - .mockResolvedValue({ template: 'test template' }); Apis.runServiceApi.getRun = jest.fn().mockResolvedValue(testV1Run); Apis.pipelineServiceApiV2.getPipeline = jest.fn().mockResolvedValue(testV2Pipeline); @@ -221,9 +215,15 @@ spec: jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { return false; }); - Apis.pipelineServiceApi.getPipelineVersionTemplate = jest - .fn() - .mockResolvedValue({ template: 'bad graph' }); + Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: { + apiVersion: 'bad apiversion', + kind: 'bad kind', + }, + }); const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); @@ -247,19 +247,22 @@ spec: jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { return false; }); - Apis.pipelineServiceApi.getPipelineVersionTemplate = jest.fn().mockResolvedValue({ - template: ` - apiVersion: argoproj.io/v1alpha1 - kind: Workflow - metadata: - generateName: entry-point-test- - `, + Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: { + apiVersion: 'argoproj.io/v1alpha1', + kind: 'Workflow', + }, }); const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); - render(); - await TestUtils.flushPromises(); + + await waitFor(() => { + expect(createGraphSpy).toHaveBeenCalled(); + }); screen.getByTestId('pipeline-detail-v1'); expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error @@ -280,16 +283,21 @@ spec: } return false; }); - const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); - TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', pipeline_spec: JsYaml.safeLoad( 'spec:\n arguments:\n parameters:\n - name: output\n', ), }); - + const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); + TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); render(); - await TestUtils.flushPromises(); + + await waitFor(() => { + expect(createGraphSpy).toHaveBeenCalledTimes(0); + }); screen.getByTestId('pipeline-detail-v1'); expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error @@ -313,13 +321,15 @@ spec: const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); createGraphSpy.mockImplementation(() => new graphlib.Graph()); - Apis.pipelineServiceApi.getTemplate = jest - .fn() - .mockResolvedValue({ template: v1PipelineSpecTemplate }); - Apis.pipelineServiceApi.getPipelineVersionTemplate = jest - .fn() - .mockResolvedValue({ template: v1PipelineSpecTemplate }); - Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({}); + Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: { + apiVersion: 'argoproj.io/v1alpha1', + kind: 'Workflow', + }, + }); render(); await TestUtils.flushPromises(); @@ -333,9 +343,15 @@ spec: jest.spyOn(features, 'isFeatureEnabled').mockImplementation(featureKey => { return false; }); - Apis.pipelineServiceApi.getPipelineVersionTemplate = jest - .fn() - .mockResolvedValue({ template: v1PipelineSpecTemplate }); + Apis.pipelineServiceApiV2.getPipelineVersion = jest.fn().mockResolvedValue({ + display_name: 'test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + pipeline_spec: { + apiVersion: 'argoproj.io/v1alpha1', + kind: 'Workflow', + }, + }); const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); createGraphSpy.mockImplementation(() => new graphlib.Graph()); From 213dd5a1afc436ea207a466f69af3f6bd528b058 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Thu, 13 Jul 2023 00:27:24 +0300 Subject: [PATCH 022/253] fix(backend): Fix performance issue within a mysql request (#9680) * fix(backend): Fix performance issue within a mysql request Reprace the existing mysql request that use nested select, with inner join for better performance. The fix levarage 'SQLDialect' interface, because the new request is not supported by sqllite (used for testing) This interface bridges the difference between mysql (production) and sqlite // (test) Issue: https://github.com/kubeflow/pipelines/issues/6845 Signed-off-by: diana * For sqlite use UPDATE FROM to join the target table against another table in the database in order to help compute Try to generalize the method in SQLDialect interface Signed-off-by: diana * Add unit tests Signed-off-by: diana * Replace nested query for Jobs and start using pre-comit Signed-off-by: diana * Fix: Use LEFT JOIN instead of INNER JOIN Signed-off-by: diana --------- Signed-off-by: diana --- backend/src/apiserver/storage/db.go | 13 ++++ backend/src/apiserver/storage/db_test.go | 30 +++++++- .../src/apiserver/storage/experiment_store.go | 70 +++++-------------- 3 files changed, 58 insertions(+), 55 deletions(-) diff --git a/backend/src/apiserver/storage/db.go b/backend/src/apiserver/storage/db.go index d15d66c72d..9afd59e4b9 100644 --- a/backend/src/apiserver/storage/db.go +++ b/backend/src/apiserver/storage/db.go @@ -59,6 +59,9 @@ type SQLDialect interface { // Inserts new rows and updates duplicates based on the key column. Upsert(query string, key string, overwrite bool, columns ...string) string + + // Updates a table using UPDATE with JOIN (mysql/production) or UPDATE FROM (sqlite/test). + UpdateWithJointOrFrom(targetTable, joinTable, setClause, joinClause, whereClause string) string } // MySQLDialect implements SQLDialect with mysql dialect implementation. @@ -88,6 +91,11 @@ func (d MySQLDialect) IsDuplicateError(err error) bool { return ok && sqlError.Number == mysqlerr.ER_DUP_ENTRY } +// UpdateFromOrJoin TODO(gkcalat): deprecate resource_references table once we migration to v2beta1 is available. +func (d MySQLDialect) UpdateWithJointOrFrom(targetTable, joinTable, setClause, joinClause, whereClause string) string { + return fmt.Sprintf("UPDATE %s LEFT JOIN %s ON %s SET %s WHERE %s", targetTable, joinTable, joinClause, setClause, whereClause) +} + // SQLiteDialect implements SQLDialect with sqlite dialect implementation. type SQLiteDialect struct{} @@ -131,6 +139,11 @@ func (d SQLiteDialect) IsDuplicateError(err error) bool { return ok && sqlError.Code == sqlite3.ErrConstraint } +// UpdateFromOrJoin TODO(gkcalat): deprecate resource_references table once we migration to v2beta1 is available. +func (d SQLiteDialect) UpdateWithJointOrFrom(targetTable, joinTable, setClause, joinClause, whereClause string) string { + return fmt.Sprintf("UPDATE %s SET %s FROM %s WHERE %s AND %s", targetTable, setClause, joinTable, joinClause, whereClause) +} + func NewMySQLDialect() MySQLDialect { return MySQLDialect{} } diff --git a/backend/src/apiserver/storage/db_test.go b/backend/src/apiserver/storage/db_test.go index 256ac4d263..a89568b033 100644 --- a/backend/src/apiserver/storage/db_test.go +++ b/backend/src/apiserver/storage/db_test.go @@ -103,11 +103,35 @@ func TestSQLiteDialect_Upsert(t *testing.T) { } func TestMySQLDialect_Upsert(t *testing.T) { - sqliteDialect := NewMySQLDialect() - actualQuery := sqliteDialect.Upsert(`insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow")`, "namespace", true, []string{"uuid", "name"}...) + mysqlDialect := NewMySQLDialect() + actualQuery := mysqlDialect.Upsert(`insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow")`, "namespace", true, []string{"uuid", "name"}...) expectedQuery := `insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow") ON DUPLICATE KEY UPDATE uuid=VALUES(uuid),name=VALUES(name)` assert.Equal(t, expectedQuery, actualQuery) - actualQuery2 := sqliteDialect.Upsert(`insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow")`, "namespace", false, []string{"uuid", "name"}...) + actualQuery2 := mysqlDialect.Upsert(`insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow")`, "namespace", false, []string{"uuid", "name"}...) expectedQuery2 := `insert into table (uuid, name, namespace) values ("a", "item1", "kubeflow"),("b", "item1", "kubeflow") ON DUPLICATE KEY UPDATE uuid=uuid,name=name` assert.Equal(t, expectedQuery2, actualQuery2) } + +func TestMySQLDialect_UpdateWithJointOrFrom(t *testing.T) { + mysqlDialect := NewMySQLDialect() + actualQuery := mysqlDialect.UpdateWithJointOrFrom( + "target_table", + "other_table", + "State = ?", + "target_table.Name = other_table.Name", + "target_table.status = ?") + expectedQuery := `UPDATE target_table LEFT JOIN other_table ON target_table.Name = other_table.Name SET State = ? WHERE target_table.status = ?` + assert.Equal(t, expectedQuery, actualQuery) +} + +func TestSQLiteDialect_UpdateWithJointOrFrom(t *testing.T) { + sqliteDialect := NewSQLiteDialect() + actualQuery := sqliteDialect.UpdateWithJointOrFrom( + "target_table", + "other_table", + "State = ?", + "target_table.Name = other_table.Name", + "target_table.status = ?") + expectedQuery := `UPDATE target_table SET State = ? FROM other_table WHERE target_table.Name = other_table.Name AND target_table.status = ?` + assert.Equal(t, expectedQuery, actualQuery) +} diff --git a/backend/src/apiserver/storage/experiment_store.go b/backend/src/apiserver/storage/experiment_store.go index d254537a28..f394e2f903 100644 --- a/backend/src/apiserver/storage/experiment_store.go +++ b/backend/src/apiserver/storage/experiment_store.go @@ -309,31 +309,14 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { "Failed to create query to archive experiment %s. error: '%v'", expId, err.Error()) } - // TODO(gkcalat): deprecate resource_references table once we migration to v2beta1 is available. - // TODO(jingzhang36): use inner join to replace nested query for better performance. - filteredRunsSql, filteredRunsArgs, err := sq.Select("ResourceUUID"). - From("resource_references as rf"). - Where(sq.And{ - sq.Eq{"rf.ResourceType": model.RunResourceType}, - sq.Eq{"rf.ReferenceUUID": expId}, - sq.Eq{"rf.ReferenceType": model.ExperimentResourceType}, - }).ToSql() - if err != nil { - return util.NewInternalServerError(err, - "Failed to create query to filter the runs in an experiment %s. error: '%v'", expId, err.Error()) - } - updateRunsSql, updateRunsArgs, err := sq. - Update("run_details"). - SetMap(sq.Eq{ - "StorageState": model.StorageStateArchived.ToString(), - }). - Where(sq.NotEq{"StorageState": model.StorageStateArchived.ToString()}). - Where(fmt.Sprintf("UUID in (%s) OR ExperimentUUID = '%s'", filteredRunsSql, expId), filteredRunsArgs...). - ToSql() - if err != nil { - return util.NewInternalServerError(err, - "Failed to create query to archive the runs in an experiment %s. error: '%v'", expId, err.Error()) - } + var updateRunsArgs []interface{} + updateRunsArgs = append(updateRunsArgs, model.StorageStateArchived.ToString(), model.RunResourceType, expId, model.ExperimentResourceType) + updateRunsSQL := s.db.UpdateWithJointOrFrom( + "run_details", + "resource_references", + "StorageState = ?", + "run_details.UUID = resource_references.ResourceUUID", + "resource_references.ResourceType = ? AND resource_references.ReferenceUUID = ? AND resource_references.ReferenceType = ?") updateRunsWithExperimentUUIDSql, updateRunsWithExperimentUUIDArgs, err := sq. Update("run_details"). @@ -348,32 +331,15 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { "Failed to create query to archive the runs in an experiment %s. error: '%v'", expId, err.Error()) } - // TODO(jingzhang36): use inner join to replace nested query for better performance. - filteredJobsSql, filteredJobsArgs, err := sq.Select("ResourceUUID"). - From("resource_references as rf"). - Where(sq.And{ - sq.Eq{"rf.ResourceType": model.JobResourceType}, - sq.Eq{"rf.ReferenceUUID": expId}, - sq.Eq{"rf.ReferenceType": model.ExperimentResourceType}, - }).ToSql() - if err != nil { - return util.NewInternalServerError(err, - "Failed to create query to filter the jobs in an experiment %s. error: '%v'", expId, err.Error()) - } + var updateJobsArgs []interface{} now := s.time.Now().Unix() - updateJobsSql, updateJobsArgs, err := sq. - Update("jobs"). - SetMap(sq.Eq{ - "Enabled": false, - "UpdatedAtInSec": now, - }). - Where(sq.Eq{"Enabled": true}). - Where(fmt.Sprintf("UUID in (%s) OR ExperimentUUID = '%s'", filteredJobsSql, expId), filteredJobsArgs...). - ToSql() - if err != nil { - return util.NewInternalServerError(err, - "Failed to create query to archive the jobs in an experiment %s. error: '%v'", expId, err.Error()) - } + updateJobsArgs = append(updateJobsArgs, false, now, model.JobResourceType, expId, model.ExperimentResourceType) + updateJobsSQL := s.db.UpdateWithJointOrFrom( + "jobs", + "resource_references", + "Enabled = ?, UpdatedAtInSec = ?", + "jobs.UUID = resource_references.ResourceUUID", + "resource_references.ResourceType = ? AND resource_references.ReferenceUUID = ? AND resource_references.ReferenceType = ?") // In a single transaction, we update experiments, run_details and jobs tables. tx, err := s.db.Begin() @@ -388,7 +354,7 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { "Failed to archive experiment %s. error: '%v'", expId, err.Error()) } - _, err = tx.Exec(updateRunsSql, updateRunsArgs...) + _, err = tx.Exec(updateRunsSQL, updateRunsArgs...) if err != nil { tx.Rollback() return util.NewInternalServerError(err, @@ -402,7 +368,7 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { "Failed to archive runs with ExperimentUUID being %s. error: '%v'", expId, err.Error()) } - _, err = tx.Exec(updateJobsSql, updateJobsArgs...) + _, err = tx.Exec(updateJobsSQL, updateJobsArgs...) if err != nil { tx.Rollback() return util.NewInternalServerError(err, From 49bfda90cadc6437173909dea5b02cffc7cd7e66 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:49:24 -0700 Subject: [PATCH 023/253] feat(manifests): Add a postgresql deployment manifest in third-party folder (#9581) --- .../third-party/postgresql/README.md | 15 +++++++ .../postgresql/base/kustomization.yaml | 8 ++++ .../postgresql/base/pg-deployment.yaml | 42 +++++++++++++++++++ .../third-party/postgresql/base/pg-pvc.yaml | 12 ++++++ .../postgresql/base/pg-secret.yaml | 7 ++++ .../postgresql/base/pg-service.yaml | 12 ++++++ .../postgresql/base/pg-serviceaccount.yaml | 5 +++ 7 files changed, 101 insertions(+) create mode 100644 manifests/kustomize/third-party/postgresql/README.md create mode 100644 manifests/kustomize/third-party/postgresql/base/kustomization.yaml create mode 100644 manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml create mode 100644 manifests/kustomize/third-party/postgresql/base/pg-pvc.yaml create mode 100644 manifests/kustomize/third-party/postgresql/base/pg-secret.yaml create mode 100644 manifests/kustomize/third-party/postgresql/base/pg-service.yaml create mode 100644 manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml diff --git a/manifests/kustomize/third-party/postgresql/README.md b/manifests/kustomize/third-party/postgresql/README.md new file mode 100644 index 0000000000..5096dddf30 --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/README.md @@ -0,0 +1,15 @@ +## Build PostgreSQL yaml + +```bash +# In this folder of manifests/kustomize/third-party/postgresql +rm -rf build +mkdir buidl +kustomize build ./base -o build +``` + +## Deploy PostgreSQL container + +```bash +# In this folder of manifests/kustomize/third-party/postgresql +kubectl apply -f build +``` \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/kustomization.yaml b/manifests/kustomize/third-party/postgresql/base/kustomization.yaml new file mode 100644 index 0000000000..cc22ca5f7e --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- pg-deployment.yaml +- pg-pvc.yaml +- pg-service.yaml +- pg-secret.yaml +- pg-serviceaccount.yaml \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml b/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml new file mode 100644 index 0000000000..9979be0238 --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres-deployment + labels: + app: postgres +spec: + replicas: 2 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + serviceAccountName: postgresql + containers: + - image: postgres:14.7-alpine3.17 + name: postgres + env: + - name: POSTGRES_DB + value: postgres + - name: POSTGRES_USER + value: user + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: root_password + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + ports: + - containerPort: 5432 + name: postgres + volumeMounts: + - name: postgres-stateful-data + mountPath: /var/lib/postgresql/data + volumes: + - name: postgres-stateful-data + persistentVolumeClaim: + claimName: postgres-pvc \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/pg-pvc.yaml b/manifests/kustomize/third-party/postgresql/base/pg-pvc.yaml new file mode 100644 index 0000000000..b59616a73c --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/pg-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres-pvc + labels: + app: postgres +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/pg-secret.yaml b/manifests/kustomize/third-party/postgresql/base/pg-secret.yaml new file mode 100644 index 0000000000..22648465f3 --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/pg-secret.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Secret +metadata: + name: postgres-secret +type: Opaque +data: + root_password: password \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/pg-service.yaml b/manifests/kustomize/third-party/postgresql/base/pg-service.yaml new file mode 100644 index 0000000000..3e365fbdd4 --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/pg-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres-service + labels: + app: postgres +spec: + ports: + - port: 5432 + type: LoadBalancer + selector: + app: postgres \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml b/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml new file mode 100644 index 0000000000..87dacc7a3d --- /dev/null +++ b/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: postgresql + From e532039d57969e24cbd1603aef6f8198a468edbc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 13 Jul 2023 18:25:25 +0000 Subject: [PATCH 024/253] chore(deps): bump semver from 5.7.1 to 5.7.2 in /frontend/server (#9729) Bumps [semver](https://github.com/npm/node-semver) from 5.7.1 to 5.7.2. - [Release notes](https://github.com/npm/node-semver/releases) - [Changelog](https://github.com/npm/node-semver/blob/v5.7.2/CHANGELOG.md) - [Commits](https://github.com/npm/node-semver/compare/v5.7.1...v5.7.2) --- updated-dependencies: - dependency-name: semver dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- frontend/server/package-lock.json | 42 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index 07472df459..44be64ff5d 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -5242,9 +5242,9 @@ }, "dependencies": { "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true } } @@ -5276,9 +5276,9 @@ } }, "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true }, "supports-color": { @@ -6064,9 +6064,9 @@ "dev": true }, "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true }, "supports-color": { @@ -7316,9 +7316,9 @@ } }, "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true }, "supports-color": { @@ -7416,9 +7416,9 @@ } }, "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true }, "supports-color": { @@ -8164,9 +8164,9 @@ }, "dependencies": { "semver": { - "version": "6.3.0", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz", - "integrity": "sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==", + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true, "optional": true }, @@ -8834,9 +8834,9 @@ } }, "semver": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", - "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==" + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", + "integrity": "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==" }, "send": { "version": "0.17.2", From 5a0e2bdef086cdcb96c7a33ff6d883cd063cb375 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 13 Jul 2023 15:27:26 -0700 Subject: [PATCH 025/253] fix(components): fix parent_model parameter of ModelUploadOp ignored PiperOrigin-RevId: 547942553 --- .../v1/model/upload_model/component.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py index 71ea87e032..412cd5c94d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py @@ -142,10 +142,10 @@ def model_upload( '{{$}}', IfPresentPlaceholder( input_name='parent_model', - then=ConcatPlaceholder([ - '--parent_model_name ', + then=[ + '--parent_model_name', parent_model.metadata['resourceName'], - ]), + ], ), ], ) From 474cc454d33757d6da5e9e85776cbbfa87ffd94f Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 13 Jul 2023 16:04:22 -0700 Subject: [PATCH 026/253] chore(components): release GCPC v2.1.0 PiperOrigin-RevId: 547954593 --- components/google-cloud/RELEASE.md | 6 ++++++ components/google-cloud/docs/source/conf.py | 1 + .../google_cloud_pipeline_components/container/Dockerfile | 2 +- .../container/cloudbuild.yaml | 2 +- .../google_cloud_pipeline_components/version.py | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 450778b9fe..c02e27f5a7 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,11 @@ ## Upcoming release +## Release 2.1.0 +* Add AutoML tabular and forecasting components to `preview` namespace +* Fix bug where `parent_model` parameter of `ModelUploadOp` ignored +* Fix circular import bug for model evaluation components +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.0.0 Google Cloud Pipeline Components v2 is generally available! diff --git a/components/google-cloud/docs/source/conf.py b/components/google-cloud/docs/source/conf.py index a169f2ee86..db2433ef96 100644 --- a/components/google-cloud/docs/source/conf.py +++ b/components/google-cloud/docs/source/conf.py @@ -119,6 +119,7 @@ def __getitem__(self, type_) -> str: '2.0.0b4', '2.0.0b5', '2.0.0', + '2.1.0', ] # The short X.Y version diff --git a/components/google-cloud/google_cloud_pipeline_components/container/Dockerfile b/components/google-cloud/google_cloud_pipeline_components/container/Dockerfile index 015614e3f8..0df9dedf2f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/Dockerfile +++ b/components/google-cloud/google_cloud_pipeline_components/container/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.0.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml b/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml index f4825cc5bf..0456b92833 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml @@ -1,5 +1,5 @@ steps: - name: 'gcr.io/kaniko-project/executor:latest' args: - - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:2.0.0 + - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:2.1.0 - --cache=false diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 3e9709ecb4..50bf7a59b5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.0.0" +__version__ = "2.1.0" From 66742416e17b7deca8dd8e2b09dacac8ea00ba2c Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 17 Jul 2023 12:18:45 -0700 Subject: [PATCH 027/253] docs(components): point GCPC README at v2 reference documentation PiperOrigin-RevId: 548764838 --- components/google-cloud/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/README.md b/components/google-cloud/README.md index b7297b7ab0..ffe894c026 100644 --- a/components/google-cloud/README.md +++ b/components/google-cloud/README.md @@ -12,7 +12,7 @@ Please see the [Google Cloud Pipeline Components user guide](https://cloud.google.com/vertex-ai/docs/pipelines/components-introduction). ### API documentation -Please see the [Google Cloud Pipeline Components API reference documentation](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-1.0.41/). +Please see the [Google Cloud Pipeline Components API reference documentation](https://google-cloud-pipeline-components.readthedocs.io/). ### Release details For details about previous and upcoming releases, please see the [release notes](https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/RELEASE.md). From 1045c9551763d03ba9e638753fc217638b8b5b0b Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 17 Jul 2023 15:54:37 -0400 Subject: [PATCH 028/253] chore(sdk): resolve mypy finding (#9742) --- sdk/python/kfp/compiler/pipeline_spec_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 3d63a78c4d..b276f892c1 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -1770,7 +1770,7 @@ def convert_pipeline_outputs_to_dict( def write_pipeline_spec_to_file( pipeline_spec: pipeline_spec_pb2.PipelineSpec, - pipeline_description: str, + pipeline_description: Union[str, None], platform_spec: pipeline_spec_pb2.PlatformSpec, package_path: str, ) -> None: From d9a3eca05ee62690c14a03e9000f5d4676d2c61a Mon Sep 17 00:00:00 2001 From: gkcalat <35157096+gkcalat@users.noreply.github.com> Date: Mon, 17 Jul 2023 18:42:21 -0700 Subject: [PATCH 029/253] chose(test): Set image-type to cos_containerd and hard-code GKE version to 1.25 in tests (#9743) * Set image-type to cos_containerd in tests Reverting the previous change #9705 which removed the `--image-type`. /hold Check if this resolves [e2e test failures](https://oss.gprow.dev/view/gs/oss-prow/pr-logs/pull/kubeflow_pipelines/9730/kubeflow-pipeline-e2e-test/1680988618279096320#1:build-log.txt%3A2) in #9730 * Update deploy-cluster.sh * Set cluster-version to 1.25.10-gke.1200 --- test/deploy-cluster.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/deploy-cluster.sh b/test/deploy-cluster.sh index 50cbdab53e..0662dd6b04 100755 --- a/test/deploy-cluster.sh +++ b/test/deploy-cluster.sh @@ -88,7 +88,11 @@ else fi # Use regular release channel to keep up with newly created clusters in Google Cloud Marketplace. # TODO(#9706): Switch back to regular channel once we stop building test images via dind. - gcloud container clusters create ${TEST_CLUSTER} --release-channel stable ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} + # Temporarily use cos as image type until docker dependencies gets removed. + # reference: https://github.com/kubeflow/pipelines/issues/6696 + # Hard-coded GKE to 1.25.10-gke.1200 (the latest 1.25 in STABLE channel). Reference: + # https://github.com/kubeflow/pipelines/issues/9704#issuecomment-1622310358 + gcloud container clusters create ${TEST_CLUSTER} --image-type cos_containerd --release-channel stable --cluster-version 1.25.10-gke.1200 ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} fi gcloud container clusters get-credentials ${TEST_CLUSTER} From de89b1c6580d6efb69a4234d7d490ac24db9b3c9 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Tue, 18 Jul 2023 17:02:22 +0300 Subject: [PATCH 030/253] fix(backend) Replace LEFT with INNER JOIN when Archive Experiment (#9730) * fix(backend) Replace LEFT with INNER JOIN when Archive Experiment Signed-off-by: diana * Replace FEFT JOIN With INNER JOIN Signed-off-by: diana --------- Signed-off-by: diana --- backend/src/apiserver/storage/db.go | 4 +--- backend/src/apiserver/storage/db_test.go | 2 +- backend/src/apiserver/storage/experiment_store.go | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/src/apiserver/storage/db.go b/backend/src/apiserver/storage/db.go index 9afd59e4b9..f6ff3e7bf1 100644 --- a/backend/src/apiserver/storage/db.go +++ b/backend/src/apiserver/storage/db.go @@ -91,9 +91,8 @@ func (d MySQLDialect) IsDuplicateError(err error) bool { return ok && sqlError.Number == mysqlerr.ER_DUP_ENTRY } -// UpdateFromOrJoin TODO(gkcalat): deprecate resource_references table once we migration to v2beta1 is available. func (d MySQLDialect) UpdateWithJointOrFrom(targetTable, joinTable, setClause, joinClause, whereClause string) string { - return fmt.Sprintf("UPDATE %s LEFT JOIN %s ON %s SET %s WHERE %s", targetTable, joinTable, joinClause, setClause, whereClause) + return fmt.Sprintf("UPDATE %s INNER JOIN %s ON %s SET %s WHERE %s", targetTable, joinTable, joinClause, setClause, whereClause) } // SQLiteDialect implements SQLDialect with sqlite dialect implementation. @@ -139,7 +138,6 @@ func (d SQLiteDialect) IsDuplicateError(err error) bool { return ok && sqlError.Code == sqlite3.ErrConstraint } -// UpdateFromOrJoin TODO(gkcalat): deprecate resource_references table once we migration to v2beta1 is available. func (d SQLiteDialect) UpdateWithJointOrFrom(targetTable, joinTable, setClause, joinClause, whereClause string) string { return fmt.Sprintf("UPDATE %s SET %s FROM %s WHERE %s AND %s", targetTable, setClause, joinTable, joinClause, whereClause) } diff --git a/backend/src/apiserver/storage/db_test.go b/backend/src/apiserver/storage/db_test.go index a89568b033..c68510c2e7 100644 --- a/backend/src/apiserver/storage/db_test.go +++ b/backend/src/apiserver/storage/db_test.go @@ -120,7 +120,7 @@ func TestMySQLDialect_UpdateWithJointOrFrom(t *testing.T) { "State = ?", "target_table.Name = other_table.Name", "target_table.status = ?") - expectedQuery := `UPDATE target_table LEFT JOIN other_table ON target_table.Name = other_table.Name SET State = ? WHERE target_table.status = ?` + expectedQuery := `UPDATE target_table INNER JOIN other_table ON target_table.Name = other_table.Name SET State = ? WHERE target_table.status = ?` assert.Equal(t, expectedQuery, actualQuery) } diff --git a/backend/src/apiserver/storage/experiment_store.go b/backend/src/apiserver/storage/experiment_store.go index f394e2f903..febfa8b2d0 100644 --- a/backend/src/apiserver/storage/experiment_store.go +++ b/backend/src/apiserver/storage/experiment_store.go @@ -311,6 +311,7 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { var updateRunsArgs []interface{} updateRunsArgs = append(updateRunsArgs, model.StorageStateArchived.ToString(), model.RunResourceType, expId, model.ExperimentResourceType) + // TODO(gkcalat): deprecate resource_references table once we migrate to v2beta1 and switch to filtering on Run's 'experiment_id' instead. updateRunsSQL := s.db.UpdateWithJointOrFrom( "run_details", "resource_references", @@ -334,6 +335,7 @@ func (s *ExperimentStore) ArchiveExperiment(expId string) error { var updateJobsArgs []interface{} now := s.time.Now().Unix() updateJobsArgs = append(updateJobsArgs, false, now, model.JobResourceType, expId, model.ExperimentResourceType) + // TODO(gkcalat): deprecate resource_references table once we migrate to v2beta1 and switch to filtering on Job's `experiment_id' instead. updateJobsSQL := s.db.UpdateWithJointOrFrom( "jobs", "resource_references", From 81e989a8c7b9cd715a91f855eb85175c054df7a9 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 18 Jul 2023 11:08:01 -0700 Subject: [PATCH 031/253] test(components): INTERNAL PiperOrigin-RevId: 549055602 --- .../component_info.textproto | 17 +++++++++++++++++ .../component_reference_doc.md | 3 +++ .../component_release_note.md | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto new file mode 100644 index 0000000000..c8803fcce2 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto @@ -0,0 +1,17 @@ +# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto +# proto-message: GalleryMetadata + +# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto +upload_template_request { + vertex_template_gallery_metadata { + vertex_gallery_categorization { + type: COMPONENT + integration: BIG_QUERY + } + display_name: "Big Query - Query Job Component" + } + description: "Launch a BigQuery query job and waits for it to finish." +} + +# Which python function defines the pipeline. +pipeline_func: "google_cloud_pipeline_components.v1.bigquery.query_job.BigqueryQueryJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md new file mode 100644 index 0000000000..ed16f716a7 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md @@ -0,0 +1,3 @@ +Launch a BigQuery SQL query job and waits for it to finish. + +Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md new file mode 100644 index 0000000000..424f4d4ae6 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md @@ -0,0 +1,3 @@ +2023-07-17 + +Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From fdb25f6e6d6bce988388979f45b51608878fbe43 Mon Sep 17 00:00:00 2001 From: rd-pong Date: Tue, 18 Jul 2023 11:37:22 -0700 Subject: [PATCH 032/253] test(components): fix k8s_client 401 unauthorized error (#9749) * Initiate a new k8s client when calling _get_resource * Remove k8s_client for methods that use _get_resource * Initiate a new k8s client when calling _delete_resource --- .../component_tests/test_v2_hosting.py | 16 +++++------ ..._v2_monitoring_job_definition_component.py | 5 ++-- .../test_v2_monitoring_schedule_component.py | 19 +++++-------- .../test_v2_train_component.py | 6 ++--- .../integration_tests/utils/ack_utils.py | 27 ++++++++++--------- 5 files changed, 30 insertions(+), 43 deletions(-) diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_hosting.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_hosting.py index 65fd738e48..317a841537 100644 --- a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_hosting.py +++ b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_hosting.py @@ -28,7 +28,6 @@ def test_create_v2_endpoint(kfp_client, experiment_id, boto3_session, test_file_ shallow_canary=True, ) ) - k8s_client = ack_utils.k8s_client() input_model_name = utils.generate_random_string(10) + "-v2-model" input_endpoint_config_name = ( utils.generate_random_string(10) + "-v2-endpoint-config" @@ -63,7 +62,7 @@ def test_create_v2_endpoint(kfp_client, experiment_id, boto3_session, test_file_ ) endpoint_describe = ack_utils._get_resource( - k8s_client, input_endpoint_name, "endpoints" + input_endpoint_name, "endpoints" ) outputs = { @@ -142,11 +141,11 @@ def test_create_v2_endpoint(kfp_client, experiment_id, boto3_session, test_file_ ) utils.remove_dir(download_dir) finally: - ack_utils._delete_resource(k8s_client, input_endpoint_name, "endpoints") + ack_utils._delete_resource(input_endpoint_name, "endpoints") ack_utils._delete_resource( - k8s_client, input_endpoint_config_name, "endpointconfigs" + input_endpoint_config_name, "endpointconfigs" ) - ack_utils._delete_resource(k8s_client, input_model_name, "models") + ack_utils._delete_resource(input_model_name, "models") @pytest.mark.v2 @@ -159,7 +158,6 @@ def test_terminate_v2_endpoint(kfp_client, experiment_id): os.path.join(download_dir, "config.yaml"), ) ) - k8s_client = ack_utils.k8s_client() input_model_name = utils.generate_random_string(10) + "-v2-model" input_endpoint_config_name = ( utils.generate_random_string(10) + "-v2-endpoint-config" @@ -181,7 +179,6 @@ def test_terminate_v2_endpoint(kfp_client, experiment_id): "running", ) assert ack_utils.wait_for_condition( - k8s_client, input_endpoint_name, ack_utils.does_endpoint_exist, wait_periods=12, @@ -189,7 +186,6 @@ def test_terminate_v2_endpoint(kfp_client, experiment_id): ) kfp_client_utils.terminate_run(kfp_client, run_id) assert ack_utils.wait_for_condition( - k8s_client, input_endpoint_name, ack_utils.is_endpoint_deleted, wait_periods=20, @@ -197,6 +193,6 @@ def test_terminate_v2_endpoint(kfp_client, experiment_id): ) finally: ack_utils._delete_resource( - k8s_client, input_endpoint_config_name, "endpointconfigs" + input_endpoint_config_name, "endpointconfigs" ) - ack_utils._delete_resource(k8s_client, input_model_name, "models") + ack_utils._delete_resource(input_model_name, "models") diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_job_definition_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_job_definition_component.py index 2e8eed1f42..8a1dcf279a 100644 --- a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_job_definition_component.py +++ b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_job_definition_component.py @@ -33,7 +33,6 @@ def test_job_definitions(kfp_client, experiment_id, test_file_dir, deploy_endpoi os.path.join(download_dir, "config.yaml"), ) ) - k8s_client = ack_utils.k8s_client() job_definition_name = ( utils.generate_random_string(10) + "-v2-" + test_params["TestName"] ) @@ -56,7 +55,7 @@ def test_job_definitions(kfp_client, experiment_id, test_file_dir, deploy_endpoi # Verify if the job definition CR is created job_definition_describe = ack_utils._get_resource( - k8s_client, job_definition_name, test_params["Plural"] + job_definition_name, test_params["Plural"] ) assert ( job_definition_name @@ -88,5 +87,5 @@ def test_job_definitions(kfp_client, experiment_id, test_file_dir, deploy_endpoi finally: ack_utils._delete_resource( - k8s_client, job_definition_name, test_params["Plural"] + job_definition_name, test_params["Plural"] ) diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_schedule_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_schedule_component.py index 7edd7bd853..30f56663fd 100644 --- a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_schedule_component.py +++ b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_monitoring_schedule_component.py @@ -59,7 +59,6 @@ def test_create_v2_monitoring_schedule( os.path.join(download_dir, "config.yaml"), ) ) - k8s_client = ack_utils.k8s_client() # parameters for model bias job definition job_definition_name = ( @@ -92,7 +91,7 @@ def test_create_v2_monitoring_schedule( # Verify if the job definition CR is created properly job_definition_describe = ack_utils._get_resource( - k8s_client, job_definition_name, "modelbiasjobdefinitions" + job_definition_name, "modelbiasjobdefinitions" ) assert ( job_definition_name @@ -107,7 +106,7 @@ def test_create_v2_monitoring_schedule( # Verify if monitoring schedule CR is created properly monitoring_schedule_describe = ack_utils._get_resource( - k8s_client, monitoring_schedule_name, "monitoringschedules" + monitoring_schedule_name, "monitoringschedules" ) assert ( monitoring_schedule_name @@ -124,14 +123,12 @@ def test_create_v2_monitoring_schedule( finally: ack_utils._delete_resource( - k8s_client, job_definition_name, "modelbiasjobdefinitions", wait_periods=10, period_length=30, ) ack_utils._delete_resource( - k8s_client, monitoring_schedule_name, "monitoringschedules", wait_periods=10, @@ -164,7 +161,6 @@ def test_update_v2_monitoring_schedule( os.path.join(download_dir, "config.yaml"), ) ) - k8s_client = ack_utils.k8s_client() # parameters for job definition test_params["Arguments"][test_params["JobInputName"]]["endpointInput"][ @@ -202,7 +198,7 @@ def test_update_v2_monitoring_schedule( # Verify if monitoring schedule CR is created properly monitoring_schedule_describe = ack_utils._get_resource( - k8s_client, monitoring_schedule_name, "monitoringschedules" + monitoring_schedule_name, "monitoringschedules" ) assert ( monitoring_schedule_name @@ -221,7 +217,7 @@ def test_update_v2_monitoring_schedule( # Verify if the job definition CR is created properly job_definition_1_describe = ack_utils._get_resource( - k8s_client, job_definition_name_1, "dataqualityjobdefinitions" + job_definition_name_1, "dataqualityjobdefinitions" ) assert ( job_definition_name_1 @@ -262,7 +258,7 @@ def test_update_v2_monitoring_schedule( # Verify if monitoring schedule is updated with correct job definition monitoring_schedule_updated_describe = ack_utils._get_resource( - k8s_client, monitoring_schedule_name, "monitoringschedules" + monitoring_schedule_name, "monitoringschedules" ) assert ( monitoring_schedule_updated_describe["status"]["monitoringScheduleStatus"] @@ -277,7 +273,7 @@ def test_update_v2_monitoring_schedule( # Verify if the new job definition CR is created properly job_definition_2_describe = ack_utils._get_resource( - k8s_client, job_definition_name_2, "dataqualityjobdefinitions" + job_definition_name_2, "dataqualityjobdefinitions" ) assert ( job_definition_name_2 @@ -296,21 +292,18 @@ def test_update_v2_monitoring_schedule( finally: ack_utils._delete_resource( - k8s_client, job_definition_name_1, test_params["Plural"], wait_periods=10, period_length=30, ) ack_utils._delete_resource( - k8s_client, job_definition_name_2, test_params["Plural"], wait_periods=10, period_length=30, ) ack_utils._delete_resource( - k8s_client, monitoring_schedule_name, "monitoringschedules", wait_periods=10, diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_train_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_train_component.py index 34e1dc7e62..19378beafd 100644 --- a/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_train_component.py +++ b/components/aws/sagemaker/tests/integration_tests/component_tests/test_v2_train_component.py @@ -18,7 +18,6 @@ ], ) def test_trainingjobV2(kfp_client, experiment_id, test_file_dir): - k8s_client = ack_utils.k8s_client() test_file_dir = "resources/config/ack-training-job" download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated")) test_params = utils.load_params( @@ -68,7 +67,7 @@ def test_trainingjobV2(kfp_client, experiment_id, test_file_dir): # Verify Training job was successful on SageMaker print(f"training job name: {input_job_name}") - train_response = ack_utils._get_resource(k8s_client, input_job_name, "trainingjobs") + train_response = ack_utils._get_resource(input_job_name, "trainingjobs") assert ( train_response["status"]["trainingJobStatus"] == output_training_job_status @@ -87,7 +86,6 @@ def test_trainingjobV2(kfp_client, experiment_id, test_file_dir): @pytest.mark.v2 def test_terminate_trainingjob(kfp_client, experiment_id): - k8s_client = ack_utils.k8s_client() test_file_dir = "resources/config/ack-training-job" download_dir = utils.mkdir(os.path.join(test_file_dir + "/generated_terminate")) @@ -114,7 +112,7 @@ def test_terminate_trainingjob(kfp_client, experiment_id): kfp_client_utils.terminate_run(kfp_client, run_id) desiredStatuses = ["Stopping", "Stopped"] training_status_reached = ack_utils.wait_for_trainingjob_status( - k8s_client, input_job_name, desiredStatuses, 10, 6 + input_job_name, desiredStatuses, 10, 6 ) assert training_status_reached diff --git a/components/aws/sagemaker/tests/integration_tests/utils/ack_utils.py b/components/aws/sagemaker/tests/integration_tests/utils/ack_utils.py index e5751c3672..ae48426dcf 100644 --- a/components/aws/sagemaker/tests/integration_tests/utils/ack_utils.py +++ b/components/aws/sagemaker/tests/integration_tests/utils/ack_utils.py @@ -7,13 +7,14 @@ def k8s_client(): return config.new_client_from_config() -def _get_resource(k8s_client, job_name, plural): +def _get_resource(job_name, plural): """Get the custom resource detail similar to: kubectl describe JOB_NAME -n NAMESPACE. Returns: None or object: None if the resource doesn't exist in server or there is an error, otherwise the custom object. """ - _api = client.CustomObjectsApi(k8s_client) + # Instantiate a new client every time to avoid connection issues. + _api = client.CustomObjectsApi(k8s_client()) namespace = os.environ.get("NAMESPACE") try: job_description = _api.get_namespaced_custom_object( @@ -29,12 +30,12 @@ def _get_resource(k8s_client, job_name, plural): return job_description -def _delete_resource(k8s_client, job_name, plural, wait_periods=10, period_length=20): +def _delete_resource(job_name, plural, wait_periods=10, period_length=20): """Delete the custom resource Returns: True or False: True if the resource is deleted, False if the resource deletion times out """ - _api = client.CustomObjectsApi(k8s_client) + _api = client.CustomObjectsApi(k8s_client()) namespace = os.environ.get("NAMESPACE") try: @@ -50,7 +51,7 @@ def _delete_resource(k8s_client, job_name, plural, wait_periods=10, period_lengt for _ in range(wait_periods): sleep(period_length) - if _get_resource(k8s_client, job_name, plural) is None: + if _get_resource(job_name, plural) is None: print(f"Resource {job_name} deleted successfully.") return True @@ -60,10 +61,10 @@ def _delete_resource(k8s_client, job_name, plural, wait_periods=10, period_lengt # TODO: Make this a generalized function for non-job resources. def wait_for_trainingjob_status( - k8s_client, training_job_name, desiredStatuses, wait_periods, period_length + training_job_name, desiredStatuses, wait_periods, period_length ): for _ in range(wait_periods): - response = _get_resource(k8s_client, training_job_name, "trainingjobs") + response = _get_resource(training_job_name, "trainingjobs") if response["status"]["trainingJobStatus"] in desiredStatuses: return True sleep(period_length) @@ -71,19 +72,19 @@ def wait_for_trainingjob_status( def wait_for_condition( - k8s_client, resource_name, validator_function, wait_periods=10, period_length=8 + resource_name, validator_function, wait_periods=10, period_length=8 ): for _ in range(wait_periods): - if not validator_function(k8s_client, resource_name): + if not validator_function(resource_name): sleep(period_length) else: return True return False -def does_endpoint_exist(k8s_client, endpoint_name): +def does_endpoint_exist(endpoint_name): try: - response = _get_resource(k8s_client, endpoint_name, "endpoints") + response = _get_resource(endpoint_name, "endpoints") if response: return True if response is None: # kubernetes module error @@ -92,8 +93,8 @@ def does_endpoint_exist(k8s_client, endpoint_name): return False -def is_endpoint_deleted(k8s_client, endpoint_name): - response = _get_resource(k8s_client, endpoint_name, "endpoints") +def is_endpoint_deleted(endpoint_name): + response = _get_resource(endpoint_name, "endpoints") if response: return False if response is None: From 79d31db90610e1965b702b258805939962b9a773 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 18 Jul 2023 12:19:29 -0700 Subject: [PATCH 033/253] chore(components): remove unused GCPC files PiperOrigin-RevId: 549075960 --- components/google-cloud/docs/make.bat | 35 --------------------------- components/google-cloud/setup.cfg | 2 -- components/google-cloud/tox.ini | 30 ----------------------- 3 files changed, 67 deletions(-) delete mode 100644 components/google-cloud/docs/make.bat delete mode 100644 components/google-cloud/setup.cfg delete mode 100644 components/google-cloud/tox.ini diff --git a/components/google-cloud/docs/make.bat b/components/google-cloud/docs/make.bat deleted file mode 100644 index 6247f7e231..0000000000 --- a/components/google-cloud/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/components/google-cloud/setup.cfg b/components/google-cloud/setup.cfg deleted file mode 100644 index 224a77957f..0000000000 --- a/components/google-cloud/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/components/google-cloud/tox.ini b/components/google-cloud/tox.ini deleted file mode 100644 index 8183b85387..0000000000 --- a/components/google-cloud/tox.ini +++ /dev/null @@ -1,30 +0,0 @@ -[tox] -envlist = clean,py39 -skip_missing_interpreters = true - -[testenv] -usedevelop = True -install_command = pip install -U {opts} {packages} -extras = tests -testpaths = tests -setenv = - TEST_UNDECLARED_OUTPUTS_DIR = . - -deps = - pytest - pytest-cov - absl-py - -depends = - {py39}: clean - report: py39 -commands = - py.test --cov=google_cloud_pipeline_components --cov-append --cov-report=term-missing -vvv -s {posargs} - -[coverage:report] -skip_empty = true - -[testenv:clean] -deps = coverage -skip_install = true -commands = coverage erase From d331ca0204359d67e03fcd9b903ed2eff2b299a6 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Tue, 18 Jul 2023 14:11:55 -0700 Subject: [PATCH 034/253] feat(components): Allow ImportModelEvaluationOp to take LLM metrics from --metrics and --problem_type parameters PiperOrigin-RevId: 549107922 --- .../import_evaluation/component.py | 26 +++++++++++++------ .../import_model_evaluation.py | 5 ++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py index e33c75058b..952f0b3066 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py @@ -58,15 +58,25 @@ def model_evaluation_import( uploaded evaluation. metrics: Path of metrics generated from an evaluation component. problem_type: The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is provided. - classification_metrics: Path of classification metrics generated from the - classification evaluation component. - forecasting_metrics: Path of forecasting metrics generated from the - forecasting evaluation component. - regression_metrics: Path of regression metrics generated from the regression - evaluation component. + classification_metrics: google.ClassificationMetrics artifact generated from + the ModelEvaluationClassificationOp component. + forecasting_metrics: google.ForecastingMetrics artifact generated from + the ModelEvaluationForecastingOp component. + regression_metrics: google.ClassificationMetrics artifact generated from + the ModelEvaluationRegressionOp component. + text_generation_metrics: system.Metrics artifact generated from + the ModelEvaluationTextGenerationOp component. Subject to change to + google.TextGenerationMetrics. + question_answering_metrics: system.Metrics artifact generated from + the ModelEvaluationTextGenerationOp component. Subject to change to + google.QuestionAnsweringMetrics. + summarization_metrics: system.Metrics artifact generated from + the ModelEvaluationTextGenerationOp component. Subject to change to + google.SummarizationMetrics. explanation: Path for model explanation metrics generated from an evaluation component. feature_attributions: The feature attributions metrics artifact generated @@ -138,7 +148,7 @@ def model_evaluation_import( input_name="summarization_metrics", then=[ "--summarization_metrics", - "{{$.inputs.artifacts['summarization_metrics'].uri}}", + summarization_metrics.uri, ], ), dsl.IfPresentPlaceholder( diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py index 1bcf928eb6..2fdffb5f07 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py @@ -163,6 +163,11 @@ def main(argv): else: metrics_file_path = parsed_args.metrics problem_type = parsed_args.problem_type + if problem_type not in PROBLEM_TYPE_TO_SCHEMA_URI: + raise ValueError( + 'Unsupported problem_type: {}. Supported problem types are: {}' + .format(problem_type, list(PROBLEM_TYPE_TO_SCHEMA_URI.keys())) + ) logging.info('metrics_file_path: %s', metrics_file_path) logging.info('problem_type: %s', problem_type) From b1c3dffd81cddabd043256c0177a3765f07fdeb7 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 19 Jul 2023 09:24:24 -0700 Subject: [PATCH 035/253] test(components): INTERNAL PiperOrigin-RevId: 549338123 --- .../component_info.textproto | 17 +++++++++++++++++ .../component_reference_doc.md | 3 +++ .../component_release_note.md | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto new file mode 100644 index 0000000000..38751475a4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto @@ -0,0 +1,17 @@ +# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto +# proto-message: GalleryMetadata + +# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto +upload_template_request { + vertex_template_gallery_metadata { + vertex_gallery_categorization { + type: COMPONENT + integration: BIG_QUERY + } + display_name: "Big Query - Detect Anomalies Model Component" + } + description: "Launch a BigQuery detect anomalies model job and waits for it to finish." +} + +# Which python function defines the pipeline. +pipeline_func: "google_cloud_pipeline_components.v1.bigquery.detect_anomalies_model.BigqueryDetectAnomaliesModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md new file mode 100644 index 0000000000..d9c6dcf868 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md @@ -0,0 +1,3 @@ +Launch a BigQuery detect anomalies model job and waits for it to finish. + +Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md new file mode 100644 index 0000000000..eca0cbd72c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md @@ -0,0 +1,3 @@ +2023-07-18 + +Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From 3c6d74fb1eb957ccd008116dd8c4b795c5f00430 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 19 Jul 2023 09:24:36 -0700 Subject: [PATCH 036/253] test(components): INTERNAL PiperOrigin-RevId: 549338176 --- .../component_info.textproto | 17 +++++++++++++++++ .../component_reference_doc.md | 3 +++ .../component_release_note.md | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto new file mode 100644 index 0000000000..5798c9207b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto @@ -0,0 +1,17 @@ +# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto +# proto-message: GalleryMetadata + +# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto +upload_template_request { + vertex_template_gallery_metadata { + vertex_gallery_categorization { + type: COMPONENT + integration: BIG_QUERY + } + display_name: "Big Query - Evaluate Model Component" + } + description: "Launch a BigQuery evaluate model job and waits for it to finish." +} + +# Which python function defines the pipeline. +pipeline_func: "google_cloud_pipeline_components.v1.bigquery.evaluate_model.BigqueryEvaluateModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md new file mode 100644 index 0000000000..6403940787 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md @@ -0,0 +1,3 @@ +Launch a BigQuery evaluate model job and waits for it to finish. + +Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md new file mode 100644 index 0000000000..eca0cbd72c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md @@ -0,0 +1,3 @@ +2023-07-18 + +Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From 998e856579e8303d78dc2fb1fe67155ad07ab5b6 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 19 Jul 2023 10:28:26 -0700 Subject: [PATCH 037/253] test(components): INTERNAL PiperOrigin-RevId: 549355953 --- .../component_info.textproto | 17 +++++++++++++++++ .../component_reference_doc.md | 3 +++ .../component_release_note.md | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto new file mode 100644 index 0000000000..512f3511c8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto @@ -0,0 +1,17 @@ +# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto +# proto-message: GalleryMetadata + +# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto +upload_template_request { + vertex_template_gallery_metadata { + vertex_gallery_categorization { + type: COMPONENT + integration: BIG_QUERY + } + display_name: "BigQuery - Create Model Component" + } + description: "Launches a BigQuery create model job and waits for it to finish." +} + +# Which python function defines the pipeline. +pipeline_func: "google_cloud_pipeline_components.v1.bigquery.create_model.BigqueryCreateModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md new file mode 100644 index 0000000000..7b30d8bc0f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md @@ -0,0 +1,3 @@ +Launches a BigQuery create model job and waits for it to finish. + +Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md new file mode 100644 index 0000000000..eca0cbd72c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md @@ -0,0 +1,3 @@ +2023-07-18 + +Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From 273c772a9999c8f036443df936bc6647e7ade4d7 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 19 Jul 2023 10:35:00 -0700 Subject: [PATCH 038/253] test(components): INTERNAL PiperOrigin-RevId: 549357956 --- .../component_info.textproto | 17 +++++++++++++++++ .../component_reference_doc.md | 3 +++ .../component_release_note.md | 3 +++ 3 files changed, 23 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md create mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto new file mode 100644 index 0000000000..6833c8bd51 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto @@ -0,0 +1,17 @@ +# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto +# proto-message: GalleryMetadata + +# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto +upload_template_request { + vertex_template_gallery_metadata { + vertex_gallery_categorization { + type: COMPONENT + integration: BIG_QUERY + } + display_name: "BigQuery - Drop Model Component" + } + description: "Launches a BigQuery drop model job and waits for it to finish." +} + +# Which python function defines the pipeline. +pipeline_func: "google_cloud_pipeline_components.v1.bigquery.drop_model.BigqueryDropModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md new file mode 100644 index 0000000000..30a4c93770 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md @@ -0,0 +1,3 @@ +Launches a BigQuery drop model job and waits for it to finish. + +Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md new file mode 100644 index 0000000000..eca0cbd72c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md @@ -0,0 +1,3 @@ +2023-07-18 + +Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From 79b079c3698cada78bde988e9ef5a4c3bed7ebf2 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 19 Jul 2023 14:43:15 -0700 Subject: [PATCH 039/253] chore(components): internal PiperOrigin-RevId: 549430554 --- .../_image.py | 1 + .../types/artifact_types.py | 39 +++++++++++++++++-- .../google_cloud_pipeline_components/utils.py | 18 +++++++-- components/google-cloud/setup.py | 19 ++++----- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_image.py b/components/google-cloud/google_cloud_pipeline_components/_image.py index 63dbc85814..2e8777527f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_image.py +++ b/components/google-cloud/google_cloud_pipeline_components/_image.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Constants for the GCPC image.""" from google_cloud_pipeline_components import version GCPC_IMAGE_NAME = 'gcr.io/ml-pipeline/google-cloud-pipeline-components' diff --git a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py index c05920043d..1878b50432 100644 --- a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py +++ b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py @@ -38,7 +38,7 @@ ] import textwrap -from typing import Dict, Optional +from typing import Any, Dict, Optional from kfp import dsl _RESOURCE_NAME_KEY = 'resourceName' @@ -74,6 +74,9 @@ def create( projects/{project}/locations/{location}/models/{model}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/get + + Returns: + VertexModel instance. """ return cls( name=name, @@ -112,6 +115,9 @@ def create( projects/{project}/locations/{location}/endpoints/{endpoint}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/get + + Returns: + VertexEndpoint instance. """ return cls( name=name, @@ -172,6 +178,9 @@ def create( created, into which the prediction output is written. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo + + Returns: + VertexBatchPredictionJob instance. """ return cls( name=name, @@ -215,6 +224,9 @@ def create( projects/{project}/locations/{location}/datasets/{datasets_name}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets/get + + Returns: + VertexDataset instance. """ return cls( uri=uri, @@ -254,6 +266,9 @@ def create( dataset_id: The ID of the dataset containing this model. model_id: The ID of the model. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/models#ModelReference + + Returns: + BQMLModel instance. """ return cls( name=name, @@ -299,6 +314,9 @@ def create( dataset_id: The ID of the dataset containing this table. table_id: The ID of the table. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/TableReference + + Returns: + BQTable instance. """ return cls( name=name, @@ -365,8 +383,8 @@ class UnmanagedContainerModel(dsl.Artifact): @classmethod def create( cls, - predict_schemata: Dict, - container_spec: Dict, + predict_schemata: Dict[str, str], + container_spec: Dict[str, Any], ) -> 'UnmanagedContainerModel': """Create a UnmanagedContainerModel artifact instance. @@ -379,6 +397,9 @@ def create( fields in this message correspond to fields in the Kubernetes Container v1 core specification. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ModelContainerSpec + + Returns: + UnmanagedContainerModel instance. """ return cls( metadata={ @@ -531,6 +552,7 @@ def create( """Create a ClassificationMetrics artifact instance. Args: + name: The artifact name. recall: Recall (True Positive Rate) for the given confidence threshold. precision: Precision for the given confidence threshold. f1_score: The harmonic mean of recall and precision. @@ -538,6 +560,9 @@ def create( au_prc: The Area Under Precision-Recall Curve metric. au_roc: The Area Under Receiver Operating Characteristic curve metric. log_loss: The Log Loss metric. + + Returns: + ClassificationMetrics instance. """ metadata = {} if recall is not None: @@ -598,12 +623,16 @@ def create( """Create a RegressionMetrics artifact instance. Args: + name: The artifact name. root_mean_squared_error: Root Mean Squared Error (RMSE). mean_absolute_error: Mean Absolute Error (MAE). mean_absolute_percentage_error: Mean absolute percentage error. r_squared: Coefficient of determination as Pearson correlation coefficient. root_mean_squared_log_error: Root mean squared log error. + + Returns: + RegressionMetrics instance. """ metadata = {} if root_mean_squared_error is not None: @@ -686,6 +715,7 @@ def create( """Create a ForecastingMetrics artifact instance. Args: + name: The artifact name. root_mean_squared_error: Root Mean Squared Error (RMSE). mean_absolute_error: Mean Absolute Error (MAE). mean_absolute_percentage_error: Mean absolute percentage error. @@ -700,6 +730,9 @@ def create( Square root of MSPE. Undefined/imaginary when MSPE is negative. symmetric_mean_absolute_percentage_error: Symmetric Mean Absolute Percentage Error. + + Returns: + ForecastingMetrics instance. """ metadata = {} if root_mean_squared_error is not None: diff --git a/components/google-cloud/google_cloud_pipeline_components/utils.py b/components/google-cloud/google_cloud_pipeline_components/utils.py index c8939399a3..3168e9be3d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/utils.py @@ -16,7 +16,7 @@ import copy import json import re -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional from google_cloud_pipeline_components import _image from kfp import components @@ -143,12 +143,22 @@ def unquote_nonstring_placeholders( def gcpc_output_name_converter( new_name: str, original_name: Optional[str] = None, -): +) -> Callable[ + [components.base_component.BaseComponent], + components.base_component.BaseComponent, +]: """Replace the output with original_name with a new_name in a component decorated with an @dsl.container_component decorator. Enables authoring components that have an input and output with the same key/name. + Args: + new_name: The new name for the output. + original_name: The original name of the output. + + Returns: + A decorator that takes modifies a component in place. + Example usage: @utils.gcpc_output_name_converter('output__gcp_resources', 'gcp_resources') @@ -254,8 +264,8 @@ def replace_output_name_in_dag_outputs( ) def replace_output_name_in_executor( - command: list, - args: list, + command: List[str], + args: List[str], original_name: str, new_name: str, ): diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index ab6134e892..0561139b6a 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -18,8 +18,7 @@ import os import types -from setuptools import find_packages -from setuptools import setup +import setuptools relative_directory = os.path.relpath(os.path.dirname(os.path.abspath(__file__))) GCPC_DIR_NAME = "google_cloud_pipeline_components" @@ -36,7 +35,11 @@ with open("README.md") as fp: _GCPC_LONG_DESCRIPTION = fp.read() -setup( +yaml_data = glob.glob(relative_data_path + "/**/*.yaml", recursive=True) +json_data = glob.glob( + relative_data_path + "/**/automl/**/*.json", recursive=True +) +setuptools.setup( name="google-cloud-pipeline-components", version=version.__version__, description=( @@ -87,7 +90,6 @@ "https://google-cloud-pipeline-components.readthedocs.io/" ), "Source": "https://github.com/kubeflow/pipelines/tree/master/components/google-cloud", - # TODO: update to point to reference documentation release notes once available post GCPC v2 GA "Release Notes": "https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/RELEASE.md", }, dependency_links=[], @@ -114,16 +116,11 @@ package_dir={ GCPC_DIR_NAME: os.path.join(relative_directory, GCPC_DIR_NAME) }, - packages=find_packages(where=relative_directory, include="*"), + packages=setuptools.find_packages(where=relative_directory, include="*"), package_data={ GCPC_DIR_NAME: [ x.replace(relative_data_path + "/", "") - for x in glob.glob( - relative_data_path + "/**/*.yaml", recursive=True - ) - + glob.glob( - relative_data_path + "/**/automl/**/*.json", recursive=True - ) + for x in yaml_data + json_data ] }, ) From c917fb9e4c23e6108a21a3822ea2d40a4890b184 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 19 Jul 2023 14:43:32 -0700 Subject: [PATCH 040/253] chore(components): internal PiperOrigin-RevId: 549430619 --- .../{google_cloud_pipeline_components/container => }/Dockerfile | 0 .../container => }/cloudbuild.yaml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename components/google-cloud/{google_cloud_pipeline_components/container => }/Dockerfile (100%) rename components/google-cloud/{google_cloud_pipeline_components/container => }/cloudbuild.yaml (87%) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/Dockerfile b/components/google-cloud/Dockerfile similarity index 100% rename from components/google-cloud/google_cloud_pipeline_components/container/Dockerfile rename to components/google-cloud/Dockerfile diff --git a/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml b/components/google-cloud/cloudbuild.yaml similarity index 87% rename from components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml rename to components/google-cloud/cloudbuild.yaml index 0456b92833..714c86e1ed 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/cloudbuild.yaml +++ b/components/google-cloud/cloudbuild.yaml @@ -1,5 +1,5 @@ steps: - name: 'gcr.io/kaniko-project/executor:latest' args: - - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:2.1.0 + - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:$_IMAGE_SUFFIX - --cache=false From fde2a3df28e473dad821ecec10b840178dd57c9d Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 20 Jul 2023 07:22:01 -0700 Subject: [PATCH 041/253] chore(components): internal PiperOrigin-RevId: 549622878 --- .../component_info.textproto | 17 ----------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- .../component_info.textproto | 17 ----------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- .../component_info.textproto | 17 ----------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- .../component_info.textproto | 17 ----------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- .../component_info.textproto | 17 ----------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- .../component_info.textproto | 30 ------------------- .../component_reference_doc.md | 3 -- .../component_release_note.md | 3 -- 18 files changed, 151 deletions(-) delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_info.textproto delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_reference_doc.md delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_release_note.md diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto deleted file mode 100644 index 512f3511c8..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,17 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - vertex_gallery_categorization { - type: COMPONENT - integration: BIG_QUERY - } - display_name: "BigQuery - Create Model Component" - } - description: "Launches a BigQuery create model job and waits for it to finish." -} - -# Which python function defines the pipeline. -pipeline_func: "google_cloud_pipeline_components.v1.bigquery.create_model.BigqueryCreateModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md deleted file mode 100644 index 7b30d8bc0f..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Launches a BigQuery create model job and waits for it to finish. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md deleted file mode 100644 index eca0cbd72c..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-07-18 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto deleted file mode 100644 index 38751475a4..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,17 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - vertex_gallery_categorization { - type: COMPONENT - integration: BIG_QUERY - } - display_name: "Big Query - Detect Anomalies Model Component" - } - description: "Launch a BigQuery detect anomalies model job and waits for it to finish." -} - -# Which python function defines the pipeline. -pipeline_func: "google_cloud_pipeline_components.v1.bigquery.detect_anomalies_model.BigqueryDetectAnomaliesModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md deleted file mode 100644 index d9c6dcf868..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Launch a BigQuery detect anomalies model job and waits for it to finish. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md deleted file mode 100644 index eca0cbd72c..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-07-18 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto deleted file mode 100644 index 6833c8bd51..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,17 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - vertex_gallery_categorization { - type: COMPONENT - integration: BIG_QUERY - } - display_name: "BigQuery - Drop Model Component" - } - description: "Launches a BigQuery drop model job and waits for it to finish." -} - -# Which python function defines the pipeline. -pipeline_func: "google_cloud_pipeline_components.v1.bigquery.drop_model.BigqueryDropModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md deleted file mode 100644 index 30a4c93770..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Launches a BigQuery drop model job and waits for it to finish. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md deleted file mode 100644 index eca0cbd72c..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-07-18 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto deleted file mode 100644 index 5798c9207b..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,17 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - vertex_gallery_categorization { - type: COMPONENT - integration: BIG_QUERY - } - display_name: "Big Query - Evaluate Model Component" - } - description: "Launch a BigQuery evaluate model job and waits for it to finish." -} - -# Which python function defines the pipeline. -pipeline_func: "google_cloud_pipeline_components.v1.bigquery.evaluate_model.BigqueryEvaluateModelJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md deleted file mode 100644 index 6403940787..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Launch a BigQuery evaluate model job and waits for it to finish. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md deleted file mode 100644 index eca0cbd72c..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-07-18 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto deleted file mode 100644 index c8803fcce2..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,17 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - vertex_gallery_categorization { - type: COMPONENT - integration: BIG_QUERY - } - display_name: "Big Query - Query Job Component" - } - description: "Launch a BigQuery query job and waits for it to finish." -} - -# Which python function defines the pipeline. -pipeline_func: "google_cloud_pipeline_components.v1.bigquery.query_job.BigqueryQueryJobOp" \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md deleted file mode 100644 index ed16f716a7..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Launch a BigQuery SQL query job and waits for it to finish. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/bigqueryml-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md deleted file mode 100644 index 424f4d4ae6..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-07-17 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_info.textproto b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_info.textproto deleted file mode 100644 index 296accb6b6..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_info.textproto +++ /dev/null @@ -1,30 +0,0 @@ -# proto-file: third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata.proto -# proto-message: GalleryMetadata - -# Forked from google3/third_party/py/google_cloud_pipeline_components/templates_in_gallery/protos/gallery_metadata_sample.textproto -upload_template_request { - vertex_template_gallery_metadata { - # Refer to - # https://source.corp.google.com/piper///depot/google3/google/devtools/artifactregistry/main/kfp_artifact.proto;rcl=536882057;l=27 - # for the list of options. - # - # And if you would like to propose some new categorization or types, please review - # https://docs.google.com/spreadsheets/d/16em2Dp-sHpJW61rP8SiItyty2Dor4DquDORQEIWyo-0/edit?resourcekey=0-pMwjYRX_DwHd1U0lRdEWEA#gid=0 - # and contact @desmliu - vertex_gallery_categorization { - type: COMPONENT - integration: VERTEX_AI - } - # This field is a temporary solution. Will remove shortly after b/285601340. - # Please add display name in the python file, example of - # http://google3/third_party/py/google_cloud_pipeline_components/google_cloud_pipeline_components/google/template_in_gallery_test/python/component.py;rcl=529459116. - # TODO(b/285601340) remove this field. - display_name: "Vertex AI Custom Job Component" - } - # This field is a temporary solution. Will remove shortly after b/285601340. - # Please add description in the python file, example of - # http://google3/third_party/py/google_cloud_pipeline_components/google_cloud_pipeline_components/google/template_in_gallery_test/python/component.py;rcl=529459116. - # TODO(b/285601340) remove this field. - description: "Launch a Custom training job using Vertex CustomJob API." -} -pipeline_func: "google_cloud_pipeline_components.v1.custom_job.CustomTrainingJobOp" diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_reference_doc.md b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_reference_doc.md deleted file mode 100644 index 61b8cbb971..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_reference_doc.md +++ /dev/null @@ -1,3 +0,0 @@ -Custom training jobs let you run your custom machine learning (ML) training code in Vertex AI. - -Refer to the [User Doc](https://cloud.google.com/vertex-ai/docs/pipelines/customjob-component) \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_release_note.md b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_release_note.md deleted file mode 100644 index 32f5ad1478..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/template_in_gallery/component_release_note.md +++ /dev/null @@ -1,3 +0,0 @@ -2023-05-18 - -Release to Vertex AI Pipelines Template Gallery. \ No newline at end of file From e5b365261664f8e2b4d959f88ccafa2cf845c786 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Thu, 20 Jul 2023 11:21:24 -0700 Subject: [PATCH 042/253] chore(frontend): Avoid redundant v2 API call in PipelineDetails. (#9728) * Change not put the versionId in URL to avoid redundant API call. * Remove unnecessary setStateSafe() * Add an unit tests to varify that the latest version is used if more than one version are existing in the response of listPipelineVersions(). --- frontend/src/pages/PipelineDetails.test.tsx | 133 ++++++++++++-------- frontend/src/pages/PipelineDetails.tsx | 61 +++------ 2 files changed, 101 insertions(+), 93 deletions(-) diff --git a/frontend/src/pages/PipelineDetails.test.tsx b/frontend/src/pages/PipelineDetails.test.tsx index 9c29c69faa..051c942f57 100644 --- a/frontend/src/pages/PipelineDetails.test.tsx +++ b/frontend/src/pages/PipelineDetails.test.tsx @@ -54,6 +54,7 @@ describe('PipelineDetails', () => { const getExperimentSpy = jest.spyOn(Apis.experimentServiceApiV2, 'getExperiment'); const deletePipelineVersionSpy = jest.spyOn(Apis.pipelineServiceApiV2, 'deletePipelineVersion'); const createGraphSpy = jest.spyOn(StaticGraphParser, 'createGraph'); + const PIPELINE_VERSION_ID = 'test-pipeline-version-id'; let tree: ShallowWrapper | ReactWrapper; let testV1Pipeline: ApiPipeline = {}; @@ -61,19 +62,23 @@ describe('PipelineDetails', () => { let testV1Run: ApiRunDetail = {}; let testV1RecurringRun: ApiJob = {}; let testV2Pipeline: V2beta1Pipeline = {}; - let testV2PipelineVersion: V2beta1PipelineVersion = {}; + let originalTestV2PipelineVersion: V2beta1PipelineVersion = {}; + let newTestV2PipelineVersion: V2beta1PipelineVersion = {}; let testV2Run: V2beta1Run = {}; let testV2RecurringRun: V2beta1RecurringRun = {}; - function generateProps(fromRunSpec = false, fromRecurringRunSpec = false): PageProps { + function generateProps( + versionId?: string, + fromRunSpec = false, + fromRecurringRunSpec = false, + ): PageProps { let params = {}; // If no fromXXX parameter is provided, it means KFP UI expects to // show Pipeline detail with pipeline version ID if (!fromRunSpec && !fromRecurringRunSpec) { params = { - [RouteParams.pipelineId]: testV1Pipeline.id, - [RouteParams.pipelineVersionId]: - (testV1Pipeline.default_version && testV1Pipeline.default_version!.id) || '', + [RouteParams.pipelineId]: testV2Pipeline.pipeline_id, + [RouteParams.pipelineVersionId]: versionId || '', }; } @@ -151,7 +156,7 @@ describe('PipelineDetails', () => { display_name: 'test pipeline', }; - testV2PipelineVersion = { + originalTestV2PipelineVersion = { display_name: 'test-pipeline-version', pipeline_id: 'test-pipeline-id', pipeline_version_id: 'test-pipeline-version-id', @@ -160,6 +165,15 @@ describe('PipelineDetails', () => { ), }; + newTestV2PipelineVersion = { + display_name: 'new-test-pipeline-version', + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'new-test-pipeline-version-id', + pipeline_spec: JsYaml.safeLoad( + 'spec:\n arguments:\n parameters:\n - name: output\n', + ), + }; + testV2Run = { run_id: 'test-run-id', display_name: 'test run', @@ -181,9 +195,11 @@ describe('PipelineDetails', () => { getV1RecurringRunSpy.mockImplementation(() => Promise.resolve(testV1RecurringRun)); getV2PipelineSpy.mockImplementation(() => Promise.resolve(testV2Pipeline)); - getV2PipelineVersionSpy.mockImplementation(() => Promise.resolve(testV2PipelineVersion)); + getV2PipelineVersionSpy.mockImplementation(() => + Promise.resolve(originalTestV2PipelineVersion), + ); listV2PipelineVersionsSpy.mockImplementation(() => - Promise.resolve({ versions: [testV2PipelineVersion] }), + Promise.resolve({ pipeline_versions: [originalTestV2PipelineVersion] }), ); getV2RunSpy.mockImplementation(() => Promise.resolve(testV2Run)); getV2RecurringRunSpy.mockImplementation(() => Promise.resolve(testV2RecurringRun)); @@ -210,7 +226,8 @@ describe('PipelineDetails', () => { expect(updateToolbarSpy).toHaveBeenLastCalledWith( expect.objectContaining({ breadcrumbs: [{ displayName: 'Pipelines', href: RoutePage.PIPELINES }], - pageTitle: testV1Pipeline.name + ' (' + testV1PipelineVersion.name + ')', + pageTitle: + testV2Pipeline.display_name + ' (' + originalTestV2PipelineVersion.display_name + ')', }), ); }); @@ -219,7 +236,7 @@ describe('PipelineDetails', () => { 'shows all runs breadcrumbs, and "Pipeline details" as page title when the pipeline ' + 'comes from a run spec that does not have an experiment', async () => { - tree = shallow(); + tree = shallow(); await getV1RunSpy; await getV2RunSpy; await createGraphSpy; @@ -243,7 +260,7 @@ describe('PipelineDetails', () => { 'shows all runs breadcrumbs, and "Pipeline details" as page title when the pipeline ' + 'comes from a recurring run spec that does not have an experiment', async () => { - tree = shallow(); + tree = shallow(); await getV1RecurringRunSpy; await getV2RecurringRunSpy; await TestUtils.flushPromises(); @@ -270,7 +287,7 @@ describe('PipelineDetails', () => { 'comes from a run spec that has an experiment', async () => { testV2Run.experiment_id = 'test-experiment-id'; - tree = shallow(); + tree = shallow(); await getV1RunSpy; await getV2RunSpy; await getExperimentSpy; @@ -302,7 +319,7 @@ describe('PipelineDetails', () => { 'comes from a recurring run spec that has an experiment', async () => { testV2RecurringRun.experiment_id = 'test-experiment-id'; - tree = shallow(); + tree = shallow(); await getV1RecurringRunSpy; await getV2RecurringRunSpy; await getExperimentSpy; @@ -341,7 +358,7 @@ describe('PipelineDetails', () => { workflow_manifest: '{"spec": {"arguments": {"parameters": [{"name": "output"}]}}}', }; - tree = shallow(); + tree = shallow(); await getV1RunSpy; await getV2RunSpy; await TestUtils.flushPromises(); @@ -363,7 +380,7 @@ describe('PipelineDetails', () => { }); testV2Run.pipeline_spec = { spec: { arguments: { parameters: [{ name: 'output' }] } } }; - tree = shallow(); + tree = shallow(); await getV1RunSpy; await getV2RunSpy; await TestUtils.flushPromises(); @@ -388,7 +405,7 @@ describe('PipelineDetails', () => { spec: { arguments: { parameters: [{ name: 'output' }] } }, }; - tree = shallow(); + tree = shallow(); await getV1RecurringRunSpy; await getV2RecurringRunSpy; await TestUtils.flushPromises(); @@ -409,7 +426,7 @@ describe('PipelineDetails', () => { testV2Run.pipeline_version_reference.pipeline_id = 'test-pipeline-id'; testV2Run.pipeline_version_reference.pipeline_version_id = 'test-pipeline-version-id'; - tree = shallow(); + tree = shallow(); await getV1RunSpy; await getV2RunSpy; await getV2PipelineVersionSpy; @@ -420,9 +437,29 @@ describe('PipelineDetails', () => { ); }); + it('calls listPipelineVersions() if no pipeline version id', async () => { + listV2PipelineVersionsSpy.mockImplementation(() => + Promise.resolve({ + pipeline_versions: [newTestV2PipelineVersion, originalTestV2PipelineVersion], + }), + ); + render(); + + await waitFor(() => { + expect(listV2PipelineVersionsSpy).toHaveBeenCalled(); + }); + + expect(updateToolbarSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + breadcrumbs: [{ displayName: 'Pipelines', href: RoutePage.PIPELINES }], + pageTitle: testV2Pipeline.display_name + ' (' + newTestV2PipelineVersion.display_name + ')', + }), + ); + }); + it('renders "No graph to show" if it is empty pipeline', async () => { TestUtils.makeErrorResponse(getV2PipelineVersionSpy, 'No pipeline version is found'); - render(); + render(); await waitFor(() => { expect(getV2PipelineVersionSpy).toHaveBeenCalled(); @@ -441,7 +478,7 @@ describe('PipelineDetails', () => { testV2RecurringRun.pipeline_version_reference.pipeline_id = 'test-pipeline-id'; testV2RecurringRun.pipeline_version_reference.pipeline_version_id = 'test-pipeline-version-id'; - tree = shallow(); + tree = shallow(); await getV1RecurringRunSpy; await getV2RecurringRunSpy; await getV2PipelineVersionSpy; @@ -460,7 +497,7 @@ describe('PipelineDetails', () => { pipeline_id: 'run-pipeline-id', workflow_manifest: 'not valid JSON', }; - render(); + render(); await waitFor(() => { expect(getV1RunSpy).toHaveBeenCalled(); @@ -481,7 +518,7 @@ describe('PipelineDetails', () => { it('shows load error banner when failing to get run details, when loading from run spec', async () => { TestUtils.makeErrorResponseOnce(getV1RunSpy, 'woops'); - tree = shallow(); + tree = shallow(); await getV1PipelineSpy; await TestUtils.flushPromises(); expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error @@ -500,7 +537,7 @@ describe('PipelineDetails', () => { async () => { testV2Run.experiment_id = 'test-experiment-id'; TestUtils.makeErrorResponse(getExperimentSpy, 'woops'); - tree = shallow(); + tree = shallow(); await getV1PipelineSpy; await TestUtils.flushPromises(); expect(updateBannerSpy).toHaveBeenCalledTimes(2); // Once to clear banner, once to show error @@ -531,16 +568,15 @@ describe('PipelineDetails', () => { it('shows load error banner when failing to get pipeline version', async () => { TestUtils.makeErrorResponse(getV2PipelineVersionSpy, 'No pipeline version is found'); - render(); + render(); await waitFor(() => { - // one for selected Version, another for template string - expect(getV2PipelineVersionSpy).toHaveBeenCalledTimes(2); + expect(getV2PipelineVersionSpy).toHaveBeenCalled(); // get version error will use empty string as template string, which won't call createGraph() expect(createGraphSpy).toHaveBeenCalledTimes(0); }); - expect(updateBannerSpy).toHaveBeenCalledTimes(3); // Clear banner, show error two times + expect(updateBannerSpy).toHaveBeenCalledTimes(2); // // Once to clear banner, once to show error expect(updateBannerSpy).toHaveBeenLastCalledWith( expect.objectContaining({ additionalInfo: 'No pipeline version is found', @@ -566,7 +602,7 @@ describe('PipelineDetails', () => { pipeline_version_id: 'test-pipeline-version-id', pipeline_spec: undefined, // empty pipeline_spec }); - render(); + render(); await waitFor(() => { expect(getV2PipelineVersionSpy).toHaveBeenCalled(); @@ -596,7 +632,7 @@ describe('PipelineDetails', () => { pipeline_version_id: 'test-pipeline-version-id', pipeline_spec: {}, // invalid pipeline_spec }); - render(); + render(); await waitFor(() => { expect(getV2PipelineVersionSpy).toHaveBeenCalled(); @@ -630,7 +666,7 @@ describe('PipelineDetails', () => { }, }); TestUtils.makeErrorResponse(createGraphSpy, 'bad graph'); - render(); + render(); await waitFor(() => { expect(getV2PipelineVersionSpy).toHaveBeenCalled(); @@ -656,7 +692,7 @@ describe('PipelineDetails', () => { }); it("has 'clone run' toolbar button if viewing an embedded pipeline", async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run and create pipeline version, so 2 */ @@ -666,7 +702,7 @@ describe('PipelineDetails', () => { }); it("has 'clone recurring run' toolbar button if viewing an embedded pipeline from recurring run", async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run and create pipeline version, so 2 */ @@ -681,7 +717,7 @@ describe('PipelineDetails', () => { 'clicking clone run button when viewing embedded pipeline navigates to ' + 'the new run page (clone a run) with run ID', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const cloneRunBtn = instance.getInitialToolbarState().actions[ButtonKeys.CLONE_RUN]; @@ -697,7 +733,7 @@ describe('PipelineDetails', () => { 'clicking clone recurring run button when viewing embedded pipeline from recurring run' + 'navigates to the new run page (clone a recurring run) with recurring run ID', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const cloneRecurringRunBtn = instance.getInitialToolbarState().actions[ @@ -713,7 +749,7 @@ describe('PipelineDetails', () => { ); it("has 'create run' toolbar button if not viewing an embedded pipeline", async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; /* create run, create pipeline version, create experiment and delete run, so 4 */ @@ -725,7 +761,7 @@ describe('PipelineDetails', () => { }); it('clicking new run button navigates to the new run page', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const newRunFromPipelineVersionBtn = instance.getInitialToolbarState().actions[ @@ -735,9 +771,7 @@ describe('PipelineDetails', () => { expect(historyPushSpy).toHaveBeenCalledTimes(1); expect(historyPushSpy).toHaveBeenLastCalledWith( RoutePage.NEW_RUN + - `?${QUERY_PARAMS.pipelineId}=${testV1Pipeline.id}&${ - QUERY_PARAMS.pipelineVersionId - }=${testV1Pipeline.default_version!.id!}`, + `?${QUERY_PARAMS.pipelineId}=${testV2Pipeline.pipeline_id}&${QUERY_PARAMS.pipelineVersionId}=${PIPELINE_VERSION_ID}`, ); }); @@ -745,7 +779,7 @@ describe('PipelineDetails', () => { 'clicking new run button when viewing half-loaded page navigates to ' + 'the new run page with pipeline ID and version ID', async () => { - tree = shallow(); + tree = shallow(); // Intentionally don't wait until all network requests finish. const instance = tree.instance() as PipelineDetails; const newRunFromPipelineVersionBtn = instance.getInitialToolbarState().actions[ @@ -755,9 +789,7 @@ describe('PipelineDetails', () => { expect(historyPushSpy).toHaveBeenCalledTimes(1); expect(historyPushSpy).toHaveBeenLastCalledWith( RoutePage.NEW_RUN + - `?${QUERY_PARAMS.pipelineId}=${testV1Pipeline.id}&${ - QUERY_PARAMS.pipelineVersionId - }=${testV1Pipeline.default_version!.id!}`, + `?${QUERY_PARAMS.pipelineId}=${testV2Pipeline.pipeline_id}&${QUERY_PARAMS.pipelineVersionId}=${PIPELINE_VERSION_ID}`, ); }, ); @@ -791,7 +823,7 @@ describe('PipelineDetails', () => { ); it('has a delete button and it is enabled for pipeline version deletion', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const instance = tree.instance() as PipelineDetails; const deleteBtn = instance.getInitialToolbarState().actions[ButtonKeys.DELETE_RUN]; @@ -801,9 +833,6 @@ describe('PipelineDetails', () => { it('has a delete button, and it is disabled because no version is selected', async () => { let pageProps = generateProps(); - pageProps.match.params = { - [RouteParams.pipelineId]: testV1Pipeline.id, - }; tree = shallow(); await TestUtils.flushPromises(); @@ -814,7 +843,7 @@ describe('PipelineDetails', () => { }); it('shows delete confirmation dialog when delete button is clicked', async () => { - tree = shallow(); + tree = shallow(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN ]; @@ -828,7 +857,7 @@ describe('PipelineDetails', () => { }); it('does not call delete API for selected pipeline when delete dialog is canceled', async () => { - tree = shallow(); + tree = shallow(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN ]; @@ -840,7 +869,7 @@ describe('PipelineDetails', () => { }); it('calls delete API when delete dialog is confirmed', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN @@ -857,7 +886,7 @@ describe('PipelineDetails', () => { }); it('calls delete API when delete dialog is confirmed and page is half-loaded', async () => { - tree = shallow(); + tree = shallow(); // Intentionally don't wait until all network requests finish. const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN @@ -874,7 +903,7 @@ describe('PipelineDetails', () => { }); it('shows error dialog if deletion fails', async () => { - tree = shallow(); + tree = shallow(); TestUtils.makeErrorResponseOnce(deletePipelineVersionSpy, 'woops'); await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ @@ -894,7 +923,7 @@ describe('PipelineDetails', () => { }); it('shows success snackbar if deletion succeeds', async () => { - tree = shallow(); + tree = shallow(); await TestUtils.flushPromises(); const deleteBtn = (tree.instance() as PipelineDetails).getInitialToolbarState().actions[ ButtonKeys.DELETE_RUN diff --git a/frontend/src/pages/PipelineDetails.tsx b/frontend/src/pages/PipelineDetails.tsx index a4627bfac0..d415797a99 100644 --- a/frontend/src/pages/PipelineDetails.tsx +++ b/frontend/src/pages/PipelineDetails.tsx @@ -273,20 +273,20 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { // We don't have default version in v2 pipeline proto, choose the latest version instead. private async getSelectedVersion(pipelineId: string, versionId?: string) { + let selectedVersion: V2beta1PipelineVersion; // Get specific version if version id is provided if (versionId) { try { - return await Apis.pipelineServiceApiV2.getPipelineVersion(pipelineId, versionId); + selectedVersion = await Apis.pipelineServiceApiV2.getPipelineVersion(pipelineId, versionId); } catch (err) { this.setStateSafe({ graphIsLoading: false }); await this.showPageError('Cannot retrieve pipeline version.', err); logger.error('Cannot retrieve pipeline version.', err); - return; + return undefined; } } else { // Get the latest version if no version id let listVersionsResponse: V2beta1ListPipelineVersionsResponse; - let latesetVersion: V2beta1PipelineVersion; try { listVersionsResponse = await Apis.pipelineServiceApiV2.listPipelineVersions( pipelineId, @@ -295,22 +295,22 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { 'created_at desc', ); - if (listVersionsResponse.pipeline_versions) { - latesetVersion = listVersionsResponse.pipeline_versions[0]; - // Append version id to URL for create run (new run switcher call getPipelineVersion) - this.props.history.replace({ - pathname: `/pipelines/details/${pipelineId}/version/${latesetVersion.pipeline_version_id}`, - }); - return latesetVersion; + if ( + listVersionsResponse.pipeline_versions && + listVersionsResponse.pipeline_versions.length > 0 + ) { + selectedVersion = listVersionsResponse.pipeline_versions[0]; + } else { + return undefined; } - return undefined; } catch (err) { this.setStateSafe({ graphIsLoading: false }); await this.showPageError('Cannot retrieve pipeline version list.', err); logger.error('Cannot retrieve pipeline version list.', err); - return; + return undefined; } } + return selectedVersion; } public async load(): Promise { @@ -523,10 +523,7 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { logger.error('Cannot retrieve pipeline versions.', err); return; } - templateString = await this._getTemplateString( - pipelineId, - v2SelectedVersion ? v2SelectedVersion.pipeline_version_id! : v1SelectedVersion?.id!, - ); + templateString = await this._getTemplateString(v2SelectedVersion); } breadcrumbs = [{ displayName: 'Pipelines', href: RoutePage.PIPELINES }]; @@ -583,10 +580,7 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { ')', ); - const selectedVersionPipelineTemplate = await this._getTemplateString( - this.state.v2Pipeline.pipeline_id!, - versionId, - ); + const selectedVersionPipelineTemplate = await this._getTemplateString(v2SelectedVersion); this.props.history.replace({ pathname: `/pipelines/details/${this.state.v2Pipeline.pipeline_id}/version/${versionId}`, }); @@ -618,28 +612,13 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { } } - private async _getTemplateString(pipelineId: string, versionId: string): Promise { - try { - // Get template string from pipeline_spec in pipeline version (v2 API) - let pipelineVersion; - let pipelineSpecInVersion; - if (pipelineId && versionId) { - pipelineVersion = await Apis.pipelineServiceApiV2.getPipelineVersion(pipelineId, versionId); - pipelineSpecInVersion = pipelineVersion.pipeline_spec; - } - - if (pipelineSpecInVersion) { - return JsYaml.safeDump(pipelineSpecInVersion); - } else { - logger.error('No template string is found'); - return ''; - } - } catch (err) { - this.setStateSafe({ graphIsLoading: false }); - await this.showPageError('Cannot retrieve pipeline version.', err); - logger.error('Cannot retrieve pipeline details.', err); + private async _getTemplateString(pipelineVersion?: V2beta1PipelineVersion): Promise { + if (pipelineVersion?.pipeline_spec) { + return JsYaml.safeDump(pipelineVersion.pipeline_spec); + } else { + logger.error('No template string is found'); + return ''; } - return ''; } private async _createGraph( From b834c8a5eef2ff21ce855d4e74b66673bb05d204 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 20 Jul 2023 14:44:40 -0700 Subject: [PATCH 043/253] chore(components): clean up pipeline yaml PiperOrigin-RevId: 549745244 --- .../v1/automl/forecasting/bqml_arima_train_pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml index 1d23bd2993..5ccd0fc5be 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -4258,7 +4258,7 @@ deploymentSpec: \ *\n\ndef create_metrics_artifact(\n metrics_rows: List[Dict[str, str]],\n\ \ evaluation_metrics: dsl.Output[dsl.Metrics],\n) -> None:\n \"\"\"\ Converts the rows of a metrics table into an Artifact.\"\"\"\n # Use the\ - \ Vertex Eval component's Metrics metadata naming from\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/metadata/schema/google/artifact_schema.py?cl=467006447&l=344\n\ + \ Vertex Eval component's Metrics metadata naming from\n\ \ metric_name_map = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE':\ \ 'rootMeanSquaredError',\n 'MAPE': 'meanAbsolutePercentageError',\n\ \ }\n metrics = {metric_name_map[k]: v for k, v in dict(metrics_rows[0]).items()}\n\ From 1619f2c0b605f202c1cbb8488c64805ccff759c8 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 20 Jul 2023 14:46:09 -0700 Subject: [PATCH 044/253] docs(components): present all v2 GCPC versions on all pages of v2 GCPC docs PiperOrigin-RevId: 549745616 --- .../google-cloud/docs/add_gcpc_version.sh | 26 ++++++++++++++++++ components/google-cloud/docs/source/conf.py | 26 +----------------- .../google-cloud/docs/source/versions.json | 27 +++++++++++++++++++ 3 files changed, 54 insertions(+), 25 deletions(-) create mode 100644 components/google-cloud/docs/add_gcpc_version.sh create mode 100644 components/google-cloud/docs/source/versions.json diff --git a/components/google-cloud/docs/add_gcpc_version.sh b/components/google-cloud/docs/add_gcpc_version.sh new file mode 100644 index 0000000000..836c9c9a1f --- /dev/null +++ b/components/google-cloud/docs/add_gcpc_version.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# read the current version from environment variable +GCPC_VERSION=$1 +SCRIPT_DIR=$(dirname "$0") + +# check if jq is installed +if ! command -v jq &> /dev/null +then + echo "jq could not be found" + echo "Please install jq using the following command:" + echo "sudo apt-get install jq" + exit +fi + +# create a new JSON object +new_version=$(cat < $SCRIPT_DIR/temp.json && mv $SCRIPT_DIR/temp.json $SCRIPT_DIR/source/versions.json diff --git a/components/google-cloud/docs/source/conf.py b/components/google-cloud/docs/source/conf.py index db2433ef96..dc5f68c3ed 100644 --- a/components/google-cloud/docs/source/conf.py +++ b/components/google-cloud/docs/source/conf.py @@ -112,23 +112,6 @@ def __getitem__(self, type_) -> str: dsl.Output = Output -# order from earliest to latest -# start with 2.0.0b3, which is the first time we're using the new theme -V2_DROPDOWN_VERSIONS = [ - '2.0.0b3', - '2.0.0b4', - '2.0.0b5', - '2.0.0', - '2.1.0', -] - -# The short X.Y version -# update for each release -LATEST_VERSION = V2_DROPDOWN_VERSIONS[-1] - -# The full version, including alpha/beta/rc tags -release = LATEST_VERSION - # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -209,14 +192,7 @@ def __getitem__(self, type_) -> str: }], 'font': {'text': 'Open Sans'}, 'version_dropdown': True, - 'version_info': [ - { - 'version': f'https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-{version}', - 'title': version, - 'aliases': [], - } - for version in reversed(V2_DROPDOWN_VERSIONS) - ], + 'version_json': 'https://raw.githubusercontent.com/kubeflow/pipelines/test-gcpc-dropdown/versions.json', # "toc_title_is_page_title": True, } # Add any paths that contain templates here, relative to this directory. diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json new file mode 100644 index 0000000000..c600b1005b --- /dev/null +++ b/components/google-cloud/docs/source/versions.json @@ -0,0 +1,27 @@ +[ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.0", + "title": "2.1.0", + "aliases": [] + }, + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.0.0", + "title": "2.0.0", + "aliases": [] + }, + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.0.0b5", + "title": "2.0.0b5", + "aliases": [] + }, + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.0.0b4", + "title": "2.0.0b4", + "aliases": [] + }, + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.0.0b3", + "title": "2.0.0b3", + "aliases": [] + } +] From c27d23a79455e18dc6ad362d2ee7353028f32ca6 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Tue, 25 Jul 2023 13:37:13 -0700 Subject: [PATCH 045/253] feat(components): Output imported evaluation resource name in ImportModelEvaluationOp PiperOrigin-RevId: 550982226 --- .../model_evaluation/import_evaluation/component.py | 3 +++ .../model_evaluation/import_model_evaluation.py | 13 ++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py index 952f0b3066..b7be65d5c1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py @@ -29,6 +29,7 @@ def model_evaluation_import( model: Input[VertexModel], gcp_resources: dsl.OutputPath(str), + evaluation_resource_name: dsl.OutputPath(str), metrics: Optional[Input[Metrics]] = None, problem_type: Optional[str] = None, classification_metrics: Optional[Input[ClassificationMetrics]] = None, @@ -181,5 +182,7 @@ def model_evaluation_import( model.metadata["resourceName"], "--gcp_resources", gcp_resources, + "--evaluation_resource_name", + evaluation_resource_name, ], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py index 2fdffb5f07..4ee02b6041 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py @@ -129,13 +129,20 @@ def _make_parent_dirs_and_return_path(file_path: str): required=True, default=argparse.SUPPRESS, ) +parser.add_argument( + '--evaluation_resource_name', + dest='evaluation_resource_name', + type=_make_parent_dirs_and_return_path, + required=True, + default=argparse.SUPPRESS, +) def main(argv): """Calls ModelService.ImportModelEvaluation.""" parsed_args, _ = parser.parse_known_args(argv) - if parsed_args.model_name.startswith('publishers'): + if 'publishers/google' in parsed_args.model_name: return _, project_id, _, location, _, model_id = parsed_args.model_name.split('/') @@ -275,6 +282,10 @@ def main(argv): ) model_evaluation_name = import_model_evaluation_response.name + # Write the model evaluation resource to evaluation_resource_name output. + with open(parsed_args.evaluation_resource_name, 'w') as f: + f.write(model_evaluation_name) + resources = GcpResources() # Write the model evaluation resource to GcpResources output. model_eval_resource = resources.resources.add() From 99830e3fe7bcef8ae8fcc05039f15799d2e69914 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 25 Jul 2023 14:51:26 -0700 Subject: [PATCH 046/253] chore(components): move output artifact util to common utils dir PiperOrigin-RevId: 551004173 --- .../_implementation/model/get_model/get_model.py | 4 ++-- .../utils/artifact_util.py => utils/artifact_utils.py} | 9 +++++++-- .../container/v1/batch_prediction_job/remote_runner.py | 4 ++-- .../container/v1/bigquery/create_model/remote_runner.py | 4 ++-- .../v1/bigquery/evaluate_model/remote_runner.py | 4 ++-- .../v1/bigquery/feature_importance/remote_runner.py | 4 ++-- .../v1/bigquery/ml_advanced_weights/remote_runner.py | 4 ++-- .../v1/bigquery/ml_arima_coefficients/remote_runner.py | 4 ++-- .../v1/bigquery/ml_arima_evaluate/remote_runner.py | 4 ++-- .../container/v1/bigquery/ml_centroids/remote_runner.py | 4 ++-- .../v1/bigquery/ml_feature_info/remote_runner.py | 4 ++-- .../v1/bigquery/ml_training_info/remote_runner.py | 4 ++-- .../container/v1/bigquery/ml_trial_info/remote_runner.py | 4 ++-- .../container/v1/bigquery/ml_weights/remote_runner.py | 4 ++-- .../container/v1/bigquery/utils/bigquery_util.py | 4 ++-- .../v1/endpoint/create_endpoint/remote_runner.py | 4 ++-- .../container/v1/model/upload_model/remote_runner.py | 4 ++-- 17 files changed, 39 insertions(+), 34 deletions(-) rename components/google-cloud/google_cloud_pipeline_components/container/{v1/gcp_launcher/utils/artifact_util.py => utils/artifact_utils.py} (95%) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model/get_model/get_model.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model/get_model/get_model.py index e3aaf662f1..b8cc1ced75 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model/get_model/get_model.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model/get_model/get_model.py @@ -20,7 +20,7 @@ from google.api_core import gapic_v1 from google.cloud import aiplatform -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources from google_cloud_pipeline_components.types.artifact_types import VertexModel @@ -120,7 +120,7 @@ def _get_model( 'model', vertex_uri_prefix + model_resource_name, model_resource_name ) # TODO(b/266848949): Output Artifact should use correct MLMD artifact. - artifact_util.update_output_artifacts(executor_input, [vertex_model]) + artifact_utils.update_output_artifacts(executor_input, [vertex_model]) resources = GcpResources() model_resource = resources.resources.add() diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/artifact_util.py b/components/google-cloud/google_cloud_pipeline_components/container/utils/artifact_utils.py similarity index 95% rename from components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/artifact_util.py rename to components/google-cloud/google_cloud_pipeline_components/container/utils/artifact_utils.py index 59fccb2458..45dab98864 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/artifact_util.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/utils/artifact_utils.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Utilities for working with artifacts.""" + import json import os @@ -21,7 +23,7 @@ def update_output_artifact( target_artifact_name: str, uri: str, metadata: dict = {}, -): +) -> None: """Updates the output artifact with the new uri and metadata.""" executor_input_json = json.loads(executor_input) executor_output = {'artifacts': {}} @@ -47,7 +49,10 @@ def update_output_artifact( # Writes a list of Artifacts to the executor output file. -def update_output_artifacts(executor_input: str, artifacts: list): +def update_output_artifacts( + executor_input: str, + artifacts: list, +) -> None: """Updates a list of Artifacts to the executor output file.""" executor_input_json = json.loads(executor_input) executor_output = {'artifacts': {}} diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/batch_prediction_job/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/batch_prediction_job/remote_runner.py index cb872700ad..8a0a0f9468 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/batch_prediction_job/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/batch_prediction_job/remote_runner.py @@ -19,8 +19,8 @@ from google.api_core import retry from google.cloud.aiplatform import explain +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.gcp_launcher import job_remote_runner -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import gcp_labels_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util @@ -241,6 +241,6 @@ def create_batch_prediction_job( ) ) - artifact_util.update_output_artifacts(executor_input, output_artifacts) + artifact_utils.update_output_artifacts(executor_input, output_artifacts) except (ConnectionError, RuntimeError) as err: error_util.exit_with_internal_error(err.args[0]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/create_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/create_model/remote_runner.py index 42122d78b0..9fa0e21a48 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/create_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/create_model/remote_runner.py @@ -15,8 +15,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util from google_cloud_pipeline_components.types.artifact_types import BQMLModel @@ -96,4 +96,4 @@ def bigquery_create_model_job( # tableId is the model ID modelId = query_result['ddlTargetTable']['tableId'] bqml_model_artifact = BQMLModel.create('model', projectId, datasetId, modelId) - artifact_util.update_output_artifacts(executor_input, [bqml_model_artifact]) + artifact_utils.update_output_artifacts(executor_input, [bqml_model_artifact]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/evaluate_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/evaluate_model/remote_runner.py index 7eab4354e2..c80666cd92 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/evaluate_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/evaluate_model/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_evaluate_model_job( @@ -126,7 +126,7 @@ def bigquery_evaluate_model_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'evaluation_metrics', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/feature_importance/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/feature_importance/remote_runner.py index 3327f28917..adc1d136e5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/feature_importance/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/feature_importance/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_feature_importance_job( @@ -83,7 +83,7 @@ def bigquery_ml_feature_importance_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'feature_importance', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_advanced_weights/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_advanced_weights/remote_runner.py index d5e6d41f18..cb43dbf1f9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_advanced_weights/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_advanced_weights/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_advanced_weights_job( @@ -90,7 +90,7 @@ def bigquery_ml_advanced_weights_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'advanced_weights', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_coefficients/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_coefficients/remote_runner.py index d39c42599d..9d2c877602 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_coefficients/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_coefficients/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_arima_coefficients( @@ -78,7 +78,7 @@ def bigquery_ml_arima_coefficients( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'arima_coefficients', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_evaluate/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_evaluate/remote_runner.py index 43fc52282e..f9ae51d4fe 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_evaluate/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_arima_evaluate/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_arima_evaluate_job( @@ -103,7 +103,7 @@ def bigquery_ml_arima_evaluate_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'arima_evaluation_metrics', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_centroids/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_centroids/remote_runner.py index e7ba48e4ed..ac426c91c0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_centroids/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_centroids/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_centroids_job( @@ -100,7 +100,7 @@ def bigquery_ml_centroids_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'centroids', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_feature_info/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_feature_info/remote_runner.py index b1e37597e0..cc6a808293 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_feature_info/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_feature_info/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_feature_info_job( @@ -90,7 +90,7 @@ def bigquery_ml_feature_info_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'feature_info', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_training_info/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_training_info/remote_runner.py index 7128287923..a2677989d4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_training_info/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_training_info/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_training_info_job( @@ -90,7 +90,7 @@ def bigquery_ml_training_info_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'ml_training_info', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_trial_info/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_trial_info/remote_runner.py index 37fbf5edcb..56c9587a45 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_trial_info/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_trial_info/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_trial_info_job( @@ -94,7 +94,7 @@ def bigquery_ml_trial_info_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'trial_info', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_weights/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_weights/remote_runner.py index 7e3b0b1219..85b64b95c1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_weights/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/ml_weights/remote_runner.py @@ -17,8 +17,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.bigquery.utils import bigquery_util -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util def bigquery_ml_weights_job( @@ -90,7 +90,7 @@ def bigquery_ml_weights_job( query_results = bigquery_util.get_query_results( project, job_id, location, creds ) - artifact_util.update_output_artifact( + artifact_utils.update_output_artifact( executor_input, 'weights', '', diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/utils/bigquery_util.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/utils/bigquery_util.py index fe1bc44b5a..cbfce404c9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/utils/bigquery_util.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/bigquery/utils/bigquery_util.py @@ -22,8 +22,8 @@ import google.auth import google.auth.transport.requests +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.utils import execution_context -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import gcp_labels_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util from google_cloud_pipeline_components.proto import gcp_resources_pb2 @@ -334,4 +334,4 @@ def bigquery_query_job( bq_table_artifact = BQTable.create( artifact_name, projectId, datasetId, tableId ) - artifact_util.update_output_artifacts(executor_input, [bq_table_artifact]) + artifact_utils.update_output_artifacts(executor_input, [bq_table_artifact]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/create_endpoint/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/create_endpoint/remote_runner.py index 36edb1711e..f92ebbd4e8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/create_endpoint/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/create_endpoint/remote_runner.py @@ -13,8 +13,8 @@ # limitations under the License. import json +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import gcp_labels_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util from google_cloud_pipeline_components.types.artifact_types import VertexEndpoint @@ -56,4 +56,4 @@ def create_endpoint( vertex_uri_prefix + endpoint_resource_name, endpoint_resource_name, ) - artifact_util.update_output_artifacts(executor_input, [vertex_endpoint]) + artifact_utils.update_output_artifacts(executor_input, [vertex_endpoint]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py index ebb6c2c8db..5ea07e9a2b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py @@ -15,8 +15,8 @@ import json from typing import Optional +from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import artifact_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import gcp_labels_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util @@ -108,6 +108,6 @@ def upload_model( vertex_model = VertexModel.create( 'model', vertex_uri_prefix + model_resource_name, model_resource_name ) - artifact_util.update_output_artifacts(executor_input, [vertex_model]) + artifact_utils.update_output_artifacts(executor_input, [vertex_model]) except (ConnectionError, RuntimeError) as err: error_util.exit_with_internal_error(err.args[0]) From 525ff906845e8ee79c5cac1a5a49e99fb6320e02 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 25 Jul 2023 16:34:02 -0700 Subject: [PATCH 047/253] chore(sdk): extract DSL into `kfp-dsl` package (#9738) * move dsl dir * make kfp-dsl a package * make kfp-dsl a package * additional changes * address review feedback --- sdk/python/build.sh | 2 +- sdk/python/install_from_source.sh | 4 + sdk/python/kfp-dsl/README.md | 25 +++ sdk/python/kfp-dsl/build.sh | 30 +++ sdk/python/{ => kfp-dsl}/kfp/dsl/__init__.py | 2 + .../{ => kfp-dsl}/kfp/dsl/base_component.py | 5 +- .../kfp/dsl/component_decorator.py | 0 .../kfp/dsl/component_factory.py | 16 +- sdk/python/{ => kfp-dsl}/kfp/dsl/constants.py | 0 .../container_component_artifact_channel.py | 0 .../kfp/dsl/container_component_class.py | 0 .../kfp/dsl/container_component_decorator.py | 0 sdk/python/{ => kfp-dsl}/kfp/dsl/executor.py | 0 .../{ => kfp-dsl}/kfp/dsl/executor_main.py | 0 sdk/python/{ => kfp-dsl}/kfp/dsl/for_loop.py | 0 .../{ => kfp-dsl}/kfp/dsl/graph_component.py | 10 +- .../kfp/dsl/importer_component.py | 0 .../{ => kfp-dsl}/kfp/dsl/importer_node.py | 0 .../{ => kfp-dsl}/kfp/dsl/kfp_config.py | 0 .../{ => kfp-dsl}/kfp/dsl/pipeline_channel.py | 0 .../{ => kfp-dsl}/kfp/dsl/pipeline_context.py | 0 .../{ => kfp-dsl}/kfp/dsl/pipeline_task.py | 5 +- .../{ => kfp-dsl}/kfp/dsl/placeholders.py | 0 .../{ => kfp-dsl}/kfp/dsl/python_component.py | 0 .../{ => kfp-dsl}/kfp/dsl/structures.py | 143 ++++++-------- .../kfp/dsl/task_final_status.py | 0 .../{ => kfp-dsl}/kfp/dsl/tasks_group.py | 0 .../{ => kfp-dsl}/kfp/dsl/types/__init__.py | 0 .../kfp/dsl/types/artifact_types.py | 0 .../kfp/dsl/types/custom_artifact_types.py | 0 .../kfp/dsl/types/type_annotations.py | 0 .../{ => kfp-dsl}/kfp/dsl/types/type_utils.py | 105 ++++++---- sdk/python/{ => kfp-dsl}/kfp/dsl/utils.py | 0 .../{ => kfp-dsl}/kfp/dsl/v1_modelbase.py | 0 .../{ => kfp-dsl}/kfp/dsl/v1_structures.py | 12 -- .../{ => kfp-dsl}/kfp/dsl/yaml_component.py | 14 +- .../execute_commands_args_test.py | 139 +++++++++++++ .../runtime_tests}/executor_test.py | 0 .../runtime_tests/import_objects_test.py | 21 ++ .../pipeline_with_task_final_status.py | 58 ++++++ .../pipeline_with_task_final_status.yaml | 183 ++++++++++++++++++ sdk/python/kfp-dsl/setup.py | 52 +++++ .../kfp/compiler/pipeline_spec_builder.py | 1 + .../kfp/components/load_yaml_utilities.py | 102 +++++++++- .../components/load_yaml_utilities_test.py | 43 ++++ .../{dsl => dsl-test}/base_component_test.py | 0 .../component_decorator_test.py | 5 +- .../component_factory_test.py | 0 ...ntainer_component_artifact_channel_test.py | 0 .../container_component_decorator_test.py | 0 .../kfp/{dsl => dsl-test}/for_loop_test.py | 0 .../{dsl => dsl-test}/importer_node_test.py | 0 .../pipeline_channel_test.py | 0 .../{dsl => dsl-test}/pipeline_task_test.py | 49 ++--- .../{dsl => dsl-test}/placeholders_test.py | 0 .../kfp/{dsl => dsl-test}/structures_test.py | 60 +----- .../kfp/{dsl => dsl-test}/tasks_group_test.py | 0 .../types/artifact_types_test.py | 0 .../types/custom_artifact_types_test.py | 0 ...expected_bulk_loaded_confusion_matrix.json | 2 +- .../test_data/expected_confusion_matrix.json | 2 +- ...ypes_bulk_load_classification_metrics.json | 14 +- ...ected_io_types_classification_metrics.json | 10 +- .../types/type_annotations_test.py | 0 .../types/type_utils_test.py | 2 + .../kfp/{dsl => dsl-test}/utils_test.py | 0 sdk/python/kfp/dsl/v1_components.py | 44 ----- sdk/python/requirements.in | 1 + .../test_data/components/add_numbers.yaml | 2 +- .../component_with_metadata_fields.yaml | 2 +- .../component_with_pip_install.yaml | 2 +- .../component_with_task_final_status.yaml | 2 +- .../test_data/components/concat_message.yaml | 2 +- .../test_data/components/dict_input.yaml | 2 +- sdk/python/test_data/components/identity.yaml | 2 +- .../test_data/components/input_artifact.yaml | 2 +- .../test_data/components/nested_return.yaml | 2 +- .../test_data/components/output_metrics.yaml | 2 +- .../test_data/components/preprocess.yaml | 2 +- .../component_with_optional_inputs.yaml | 2 +- .../component_with_pip_index_urls.yaml | 2 +- .../components_with_optional_artifacts.yaml | 4 +- ...lightweight_python_functions_pipeline.yaml | 4 +- ...tweight_python_functions_with_outputs.yaml | 8 +- .../parallelfor_fan_in/artifacts_complex.yaml | 10 +- .../parallelfor_fan_in/artifacts_simple.yaml | 4 +- .../conditional_producer_and_consumers.yaml | 4 +- .../nested_with_parameters.yaml | 8 +- .../parameters_complex.yaml | 14 +- .../parallelfor_fan_in/parameters_simple.yaml | 4 +- .../pipeline_producer_consumer.yaml | 8 +- .../pipelines/pipeline_as_exit_task.yaml | 8 +- .../pipelines/pipeline_in_pipeline.yaml | 4 +- .../pipeline_in_pipeline_complex.yaml | 4 +- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 6 +- .../pipelines/pipeline_with_condition.yaml | 10 +- ...peline_with_dynamic_importer_metadata.yaml | 2 +- .../pipelines/pipeline_with_env.yaml | 2 +- .../pipelines/pipeline_with_exit_handler.yaml | 6 +- .../pipeline_with_google_artifact_type.yaml | 4 +- .../pipelines/pipeline_with_importer.yaml | 4 +- .../pipelines/pipeline_with_loops.yaml | 16 +- .../pipeline_with_loops_and_conditions.yaml | 26 +-- .../pipeline_with_metadata_fields.yaml | 4 +- .../pipeline_with_metrics_outputs.yaml | 4 +- .../pipeline_with_multiple_exit_handlers.yaml | 14 +- .../pipeline_with_nested_conditions.yaml | 16 +- .../pipelines/pipeline_with_nested_loops.yaml | 6 +- .../pipelines/pipeline_with_outputs.yaml | 4 +- ...pipeline_with_parallelfor_parallelism.yaml | 12 +- ...ipeline_with_params_containing_format.yaml | 6 +- .../pipelines/pipeline_with_placeholders.yaml | 10 +- .../pipelines/pipeline_with_retry.yaml | 2 +- .../pipeline_with_task_final_status.yaml | 6 +- ...th_task_using_ignore_upstream_failure.yaml | 4 +- test/presubmit-component-yaml.sh | 4 +- test/presubmit-test-kfp-dsl-runtime-code.sh | 23 +++ test/presubmit-test-kfp-kubernetes-library.sh | 4 +- test/presubmit-tests-sdk.sh | 6 +- test/presubmit-tests-tfx.sh | 4 +- 120 files changed, 1040 insertions(+), 436 deletions(-) create mode 100644 sdk/python/install_from_source.sh create mode 100644 sdk/python/kfp-dsl/README.md create mode 100755 sdk/python/kfp-dsl/build.sh rename sdk/python/{ => kfp-dsl}/kfp/dsl/__init__.py (98%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/base_component.py (97%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/component_decorator.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/component_factory.py (98%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/constants.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/container_component_artifact_channel.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/container_component_class.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/container_component_decorator.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/executor.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/executor_main.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/for_loop.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/graph_component.py (92%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/importer_component.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/importer_node.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/kfp_config.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/pipeline_channel.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/pipeline_context.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/pipeline_task.py (99%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/placeholders.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/python_component.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/structures.py (92%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/task_final_status.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/tasks_group.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/types/__init__.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/types/artifact_types.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/types/custom_artifact_types.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/types/type_annotations.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/types/type_utils.py (86%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/utils.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/v1_modelbase.py (100%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/v1_structures.py (98%) rename sdk/python/{ => kfp-dsl}/kfp/dsl/yaml_component.py (80%) create mode 100644 sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py rename sdk/python/{kfp/dsl => kfp-dsl/runtime_tests}/executor_test.py (100%) create mode 100644 sdk/python/kfp-dsl/runtime_tests/import_objects_test.py create mode 100644 sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py create mode 100644 sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml create mode 100644 sdk/python/kfp-dsl/setup.py rename sdk/python/kfp/{dsl => dsl-test}/base_component_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/component_decorator_test.py (97%) rename sdk/python/kfp/{dsl => dsl-test}/component_factory_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/container_component_artifact_channel_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/container_component_decorator_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/for_loop_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/importer_node_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/pipeline_channel_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/pipeline_task_test.py (88%) rename sdk/python/kfp/{dsl => dsl-test}/placeholders_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/structures_test.py (94%) rename sdk/python/kfp/{dsl => dsl-test}/tasks_group_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/types/artifact_types_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/types/custom_artifact_types_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/types/test_data/expected_bulk_loaded_confusion_matrix.json (98%) rename sdk/python/kfp/{dsl => dsl-test}/types/test_data/expected_confusion_matrix.json (98%) rename sdk/python/kfp/{dsl => dsl-test}/types/test_data/expected_io_types_bulk_load_classification_metrics.json (83%) rename sdk/python/kfp/{dsl => dsl-test}/types/test_data/expected_io_types_classification_metrics.json (85%) rename sdk/python/kfp/{dsl => dsl-test}/types/type_annotations_test.py (100%) rename sdk/python/kfp/{dsl => dsl-test}/types/type_utils_test.py (99%) rename sdk/python/kfp/{dsl => dsl-test}/utils_test.py (100%) delete mode 100644 sdk/python/kfp/dsl/v1_components.py create mode 100644 test/presubmit-test-kfp-dsl-runtime-code.sh diff --git a/sdk/python/build.sh b/sdk/python/build.sh index a18d0d3c0e..6ec5cc49c4 100755 --- a/sdk/python/build.sh +++ b/sdk/python/build.sh @@ -21,7 +21,7 @@ # ./build.sh [output_file] -target_archive_file=${1:-kfp.tar.gz} +target_archive_file=$1 pushd "$(dirname "$0")" dist_dir=$(mktemp -d) diff --git a/sdk/python/install_from_source.sh b/sdk/python/install_from_source.sh new file mode 100644 index 0000000000..6fb0bce65e --- /dev/null +++ b/sdk/python/install_from_source.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +pip3 install -e sdk/python/kfp-dsl +pip3 install -e sdk/python diff --git a/sdk/python/kfp-dsl/README.md b/sdk/python/kfp-dsl/README.md new file mode 100644 index 0000000000..bf898f9757 --- /dev/null +++ b/sdk/python/kfp-dsl/README.md @@ -0,0 +1,25 @@ +## kfp-dsl package + +`kfp-dsl` is a subpackage of the KFP SDK that is released separately in order to provide a minimal dependency runtime package for Lightweight Python Components. **`kfp-dsl` should not be installed and used directly.** + +`kfp-dsl` enables the KFP runtime code and objects to be installed at Lightweight Python Component runtime without needing to install the full KFP SDK package. + +### Release +`kfp-dsl` should be released immediately prior to each full `kfp` release. The version of `kfp-dsl` should match the version of `kfp` that depends on it. + +### Development +To develop on `kfp` with a version of `kfp-dsl` built from source, run the following from the repository root: + +```sh +source sdk/python/install_from_source.sh +``` + +**Note:** Modules in the `kfp-dsl` package are only permitted to have *top-level* imports from the Python standard library, the `typing-extensions` package, and the `kfp-dsl` package itself. Imports from other subpackages of the main `kfp` package or its transitive dependencies must be nested within functions to avoid runtime import errors when only `kfp-dsl` is installed. + +### Testing +The `kfp-dsl` code is tested alongside the full KFP SDK in `sdk/python/kfp/dsl-test`. This is because many of the DSL tests require the full KFP SDK to be installed (e.g., requires creating and compiling a component/pipeline). + +There are also dedicated `kfp-dsl` tests `./sdk/python/kfp-dsl/runtime_tests/` which test the dedicated runtime code in `kfp-dsl` and should *not* be run with the full KFP SDK installed. Specifically, these tests ensure: +* That KFP runtime logic is correct +* That `kfp-dsl` specifies all of its dependencies (i.e., no module not found errors from missing `kfp-dsl` dependencies) +* That `kfp-dsl` dependencies on the main `kfp` package have associated imports nested inside function calls (i.e., no module not found errors from missing `kfp` dependencies) diff --git a/sdk/python/kfp-dsl/build.sh b/sdk/python/kfp-dsl/build.sh new file mode 100755 index 0000000000..6ec5cc49c4 --- /dev/null +++ b/sdk/python/kfp-dsl/build.sh @@ -0,0 +1,30 @@ +#!/bin/bash -ex +# +# Copyright 2018 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# The scripts creates the Kubeflow Pipelines python SDK package. +# +# Usage: +# ./build.sh [output_file] + + +target_archive_file=$1 + +pushd "$(dirname "$0")" +dist_dir=$(mktemp -d) +python3 setup.py sdist --format=gztar --dist-dir "$dist_dir" +cp "$dist_dir"/*.tar.gz "$target_archive_file" +popd diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp-dsl/kfp/dsl/__init__.py similarity index 98% rename from sdk/python/kfp/dsl/__init__.py rename to sdk/python/kfp-dsl/kfp/dsl/__init__.py index d3502a7287..e8f89b6254 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp-dsl/kfp/dsl/__init__.py @@ -50,6 +50,8 @@ 'PipelineTask', ] +_kfp_dsl_import_error_msg = 'It looks like only `kfp-dsl` is installed. Please install the full KFP SDK using `pip install kfp`.' + try: from typing import Annotated except ImportError: diff --git a/sdk/python/kfp/dsl/base_component.py b/sdk/python/kfp-dsl/kfp/dsl/base_component.py similarity index 97% rename from sdk/python/kfp/dsl/base_component.py rename to sdk/python/kfp-dsl/kfp/dsl/base_component.py index 25a10f84df..1e8e561b2c 100644 --- a/sdk/python/kfp/dsl/base_component.py +++ b/sdk/python/kfp-dsl/kfp/dsl/base_component.py @@ -19,7 +19,6 @@ from kfp.dsl import pipeline_task from kfp.dsl import structures from kfp.dsl.types import type_utils -from kfp.pipeline_spec import pipeline_spec_pb2 class BaseComponent(abc.ABC): @@ -103,13 +102,13 @@ def __call__(self, *args, **kwargs) -> pipeline_task.PipelineTask: ) @property - def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': """Returns the pipeline spec of the component.""" with BlockPipelineTaskRegistration(): return self.component_spec.to_pipeline_spec() @property - def platform_spec(self) -> pipeline_spec_pb2.PlatformSpec: + def platform_spec(self) -> 'pipeline_spec_pb2.PlatformSpec': """Returns the PlatformSpec of the component. Useful when the component is a GraphComponent, else will be diff --git a/sdk/python/kfp/dsl/component_decorator.py b/sdk/python/kfp-dsl/kfp/dsl/component_decorator.py similarity index 100% rename from sdk/python/kfp/dsl/component_decorator.py rename to sdk/python/kfp-dsl/kfp/dsl/component_decorator.py diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp-dsl/kfp/dsl/component_factory.py similarity index 98% rename from sdk/python/kfp/dsl/component_factory.py rename to sdk/python/kfp-dsl/kfp/dsl/component_factory.py index 99d34f7828..9c0d1dac51 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp-dsl/kfp/dsl/component_factory.py @@ -20,7 +20,7 @@ from typing import Callable, List, Mapping, Optional, Tuple, Type, Union import warnings -import docstring_parser +from kfp import dsl from kfp.dsl import container_component_artifact_channel from kfp.dsl import container_component_class from kfp.dsl import graph_component @@ -124,9 +124,9 @@ def _get_packages_to_install_command( return ['sh', '-c', install_python_packages_script] -def _get_default_kfp_package_path() -> str: +def _get_kfp_dsl_requirement() -> str: import kfp - return f'kfp=={kfp.__version__}' + return f'kfp-dsl=={kfp.__version__}' def _get_function_source_definition(func: Callable) -> str: @@ -175,6 +175,12 @@ def extract_component_interface( parameters = list(signature.parameters.values()) original_docstring = inspect.getdoc(func) + + try: + import docstring_parser + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + parsed_docstring = docstring_parser.parse(original_docstring) inputs = {} @@ -475,7 +481,7 @@ def create_component_from_func( if install_kfp_package and target_image is None: if kfp_package_path is None: - kfp_package_path = _get_default_kfp_package_path() + kfp_package_path = _get_kfp_dsl_requirement() packages_to_install.append(kfp_package_path) packages_to_install_command = _get_packages_to_install_command( @@ -622,7 +628,7 @@ def create_graph_component_from_func( def get_pipeline_description( decorator_description: Union[str, None], - docstring: docstring_parser.Docstring, + docstring: 'docstring_parser.Docstring', ) -> Union[str, None]: """Obtains the correct pipeline description from the pipeline decorator's description argument and the parsed docstring. diff --git a/sdk/python/kfp/dsl/constants.py b/sdk/python/kfp-dsl/kfp/dsl/constants.py similarity index 100% rename from sdk/python/kfp/dsl/constants.py rename to sdk/python/kfp-dsl/kfp/dsl/constants.py diff --git a/sdk/python/kfp/dsl/container_component_artifact_channel.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py similarity index 100% rename from sdk/python/kfp/dsl/container_component_artifact_channel.py rename to sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py diff --git a/sdk/python/kfp/dsl/container_component_class.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_class.py similarity index 100% rename from sdk/python/kfp/dsl/container_component_class.py rename to sdk/python/kfp-dsl/kfp/dsl/container_component_class.py diff --git a/sdk/python/kfp/dsl/container_component_decorator.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py similarity index 100% rename from sdk/python/kfp/dsl/container_component_decorator.py rename to sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp-dsl/kfp/dsl/executor.py similarity index 100% rename from sdk/python/kfp/dsl/executor.py rename to sdk/python/kfp-dsl/kfp/dsl/executor.py diff --git a/sdk/python/kfp/dsl/executor_main.py b/sdk/python/kfp-dsl/kfp/dsl/executor_main.py similarity index 100% rename from sdk/python/kfp/dsl/executor_main.py rename to sdk/python/kfp-dsl/kfp/dsl/executor_main.py diff --git a/sdk/python/kfp/dsl/for_loop.py b/sdk/python/kfp-dsl/kfp/dsl/for_loop.py similarity index 100% rename from sdk/python/kfp/dsl/for_loop.py rename to sdk/python/kfp-dsl/kfp/dsl/for_loop.py diff --git a/sdk/python/kfp/dsl/graph_component.py b/sdk/python/kfp-dsl/kfp/dsl/graph_component.py similarity index 92% rename from sdk/python/kfp/dsl/graph_component.py rename to sdk/python/kfp-dsl/kfp/dsl/graph_component.py index 2b09927dfa..d7ddffc65a 100644 --- a/sdk/python/kfp/dsl/graph_component.py +++ b/sdk/python/kfp-dsl/kfp/dsl/graph_component.py @@ -17,12 +17,11 @@ from typing import Callable, Optional import uuid -from kfp.compiler import pipeline_spec_builder as builder +from kfp import dsl from kfp.dsl import base_component from kfp.dsl import pipeline_channel from kfp.dsl import pipeline_context from kfp.dsl import structures -from kfp.pipeline_spec import pipeline_spec_pb2 class GraphComponent(base_component.BaseComponent): @@ -65,6 +64,11 @@ def __init__( pipeline_group = dsl_pipeline.groups[0] pipeline_group.name = uuid.uuid4().hex + try: + from kfp.compiler import pipeline_spec_builder as builder + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + pipeline_spec, platform_spec = builder.create_pipeline_spec( pipeline=dsl_pipeline, component_spec=self.component_spec, @@ -83,7 +87,7 @@ def __init__( self.component_spec.platform_spec = platform_spec @property - def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': """Returns the pipeline spec of the component.""" return self.component_spec.implementation.graph diff --git a/sdk/python/kfp/dsl/importer_component.py b/sdk/python/kfp-dsl/kfp/dsl/importer_component.py similarity index 100% rename from sdk/python/kfp/dsl/importer_component.py rename to sdk/python/kfp-dsl/kfp/dsl/importer_component.py diff --git a/sdk/python/kfp/dsl/importer_node.py b/sdk/python/kfp-dsl/kfp/dsl/importer_node.py similarity index 100% rename from sdk/python/kfp/dsl/importer_node.py rename to sdk/python/kfp-dsl/kfp/dsl/importer_node.py diff --git a/sdk/python/kfp/dsl/kfp_config.py b/sdk/python/kfp-dsl/kfp/dsl/kfp_config.py similarity index 100% rename from sdk/python/kfp/dsl/kfp_config.py rename to sdk/python/kfp-dsl/kfp/dsl/kfp_config.py diff --git a/sdk/python/kfp/dsl/pipeline_channel.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py similarity index 100% rename from sdk/python/kfp/dsl/pipeline_channel.py rename to sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py diff --git a/sdk/python/kfp/dsl/pipeline_context.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py similarity index 100% rename from sdk/python/kfp/dsl/pipeline_context.py rename to sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py diff --git a/sdk/python/kfp/dsl/pipeline_task.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py similarity index 99% rename from sdk/python/kfp/dsl/pipeline_task.py rename to sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py index f35cdd752b..26081f75e1 100644 --- a/sdk/python/kfp/dsl/pipeline_task.py +++ b/sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py @@ -20,13 +20,13 @@ from typing import Any, Dict, List, Mapping, Optional, Union import warnings +import kfp from kfp.dsl import constants from kfp.dsl import pipeline_channel from kfp.dsl import placeholders from kfp.dsl import structures from kfp.dsl import utils from kfp.dsl.types import type_utils -from kfp.pipeline_spec import pipeline_spec_pb2 _register_task_handler = lambda task: utils.maybe_rename_for_k8s( task.component_spec.name) @@ -89,6 +89,7 @@ def __init__( error_message_prefix=( f'Incompatible argument passed to the input ' f'{input_name!r} of component {component_spec.name!r}: '), + raise_on_error=kfp.TYPE_CHECK, ) self.component_spec = component_spec @@ -149,7 +150,7 @@ def validate_placeholder_types( ]) @property - def platform_spec(self) -> pipeline_spec_pb2.PlatformSpec: + def platform_spec(self) -> 'pipeline_spec_pb2.PlatformSpec': """PlatformSpec for all tasks in the pipeline as task. Only for use on tasks created from GraphComponents. diff --git a/sdk/python/kfp/dsl/placeholders.py b/sdk/python/kfp-dsl/kfp/dsl/placeholders.py similarity index 100% rename from sdk/python/kfp/dsl/placeholders.py rename to sdk/python/kfp-dsl/kfp/dsl/placeholders.py diff --git a/sdk/python/kfp/dsl/python_component.py b/sdk/python/kfp-dsl/kfp/dsl/python_component.py similarity index 100% rename from sdk/python/kfp/dsl/python_component.py rename to sdk/python/kfp-dsl/kfp/dsl/python_component.py diff --git a/sdk/python/kfp/dsl/structures.py b/sdk/python/kfp-dsl/kfp/dsl/structures.py similarity index 92% rename from sdk/python/kfp/dsl/structures.py rename to sdk/python/kfp-dsl/kfp/dsl/structures.py index 24486e730d..941bff7a07 100644 --- a/sdk/python/kfp/dsl/structures.py +++ b/sdk/python/kfp-dsl/kfp/dsl/structures.py @@ -18,22 +18,18 @@ import dataclasses import itertools import re -from typing import Any, Dict, List, Mapping, Optional, Tuple, Union +from typing import Any, Dict, List, Mapping, Optional, Union import uuid -from google.protobuf import json_format -import kfp +from kfp import dsl from kfp.dsl import placeholders from kfp.dsl import utils -from kfp.dsl import v1_components from kfp.dsl import v1_structures from kfp.dsl.container_component_artifact_channel import \ ContainerComponentArtifactChannel from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations from kfp.dsl.types import type_utils -from kfp.pipeline_spec import pipeline_spec_pb2 -import yaml @dataclasses.dataclass @@ -370,7 +366,7 @@ class RetryPolicy: backoff_factor: Optional[float] = None backoff_max_duration: Optional[str] = None - def to_proto(self) -> pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy: + def to_proto(self) -> 'pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy': # include defaults so that IR is more reflective of runtime behavior max_retry_count = self.max_retry_count or 0 backoff_duration = self.backoff_duration or '0s' @@ -381,6 +377,12 @@ def to_proto(self) -> pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy: backoff_duration_seconds = f'{convert_duration_to_seconds(backoff_duration)}s' backoff_max_duration_seconds = f'{min(convert_duration_to_seconds(backoff_max_duration), 3600)}s' + try: + from google.protobuf import json_format + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + return json_format.ParseDict( { 'max_retry_count': max_retry_count, @@ -480,6 +482,13 @@ def from_pipeline_spec_dict(cls, pipeline_spec_dict: Dict[str, Any], executor['container']) if executor else None return Implementation(container=container_spec) else: + + try: + from google.protobuf import json_format + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + pipeline_spec = json_format.ParseDict( pipeline_spec_dict, pipeline_spec_pb2.PipelineSpec()) return Implementation(graph=pipeline_spec) @@ -551,6 +560,14 @@ def check_placeholder_references_valid_io_name( raise TypeError(f'Unexpected argument "{arg}" of type {type(arg)}.') +def _import_and_make_platform_spec() -> 'pipeline_spec_pb2.PlatformSpec': + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + return pipeline_spec_pb2.PlatformSpec() + + @dataclasses.dataclass class ComponentSpec: """The definition of a component. @@ -568,8 +585,9 @@ class ComponentSpec: description: Optional[str] = None inputs: Optional[Dict[str, InputSpec]] = None outputs: Optional[Dict[str, OutputSpec]] = None - platform_spec: pipeline_spec_pb2.PlatformSpec = dataclasses.field( - default_factory=pipeline_spec_pb2.PlatformSpec) + platform_spec: Optional[ + 'pipeline_spec_pb2.PlatformSpec'] = dataclasses.field( + default_factory=_import_and_make_platform_spec) def __post_init__(self) -> None: self._transform_name() @@ -652,6 +670,13 @@ def from_v1_component_spec( }) inputs = {} + + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + parameter_types_mapping = type_utils.get_parameter_types_mapping() + for spec in component_dict.get('inputs', []): type_ = spec.get('type') optional = spec.get('optional', False) or 'default' in spec @@ -667,9 +692,9 @@ def from_v1_component_spec( type=type_, optional=True) continue - elif isinstance(type_, str) and type_.lower( - ) in type_utils._PARAMETER_TYPES_MAPPING: - type_enum = type_utils._PARAMETER_TYPES_MAPPING[type_.lower()] + elif isinstance(type_, + str) and type_.lower() in parameter_types_mapping: + type_enum = parameter_types_mapping[type_.lower()] ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( type_enum) in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ @@ -720,9 +745,9 @@ def from_v1_component_spec( if isinstance(type_, str): type_ = type_utils.get_canonical_name_for_outer_generic(type_) - if isinstance(type_, str) and type_.lower( - ) in type_utils._PARAMETER_TYPES_MAPPING: - type_enum = type_utils._PARAMETER_TYPES_MAPPING[type_.lower()] + if isinstance(type_, + str) and type_.lower() in parameter_types_mapping: + type_enum = parameter_types_mapping[type_.lower()] ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( type_enum) in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ @@ -824,6 +849,12 @@ def extract_description_from_command( implementation.container.command or []) if implementation.container else None + try: + from google.protobuf import json_format + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + platform_spec = pipeline_spec_pb2.PlatformSpec() json_format.ParseDict(platform_spec_dict, platform_spec) @@ -836,53 +867,6 @@ def extract_description_from_command( platform_spec=platform_spec, ) - @classmethod - def from_yaml_documents(cls, component_yaml: str) -> 'ComponentSpec': - """Loads V1 or V2 component YAML into a ComponentSpec. - - Args: - component_yaml: PipelineSpec and optionally PlatformSpec YAML documents as a single string. - - Returns: - ComponentSpec: The ComponentSpec object. - """ - - def extract_description(component_yaml: str) -> Union[str, None]: - heading = '# Description: ' - multi_line_description_prefix = '# ' - index_of_heading = 2 - if heading in component_yaml: - description = component_yaml.splitlines()[index_of_heading] - - # Multi line - comments = component_yaml.splitlines() - index = index_of_heading + 1 - while comments[index][:len(multi_line_description_prefix - )] == multi_line_description_prefix: - description += '\n' + comments[index][ - len(multi_line_description_prefix) + 1:] - index += 1 - - return description[len(heading):] - else: - return None - - pipeline_spec_dict, platform_spec_dict = load_documents_from_yaml( - component_yaml) - - is_v1 = 'implementation' in set(pipeline_spec_dict.keys()) - if is_v1: - v1_component = v1_components._load_component_spec_from_component_text( - component_yaml) - return cls.from_v1_component_spec(v1_component) - else: - component_spec = ComponentSpec.from_ir_dicts( - pipeline_spec_dict, platform_spec_dict) - if not component_spec.description: - component_spec.description = extract_description( - component_yaml=component_yaml) - return component_spec - def save_to_component_yaml(self, output_file: str) -> None: """Saves ComponentSpec into IR YAML file. @@ -892,6 +876,12 @@ def save_to_component_yaml(self, output_file: str) -> None: from kfp.compiler import pipeline_spec_builder as builder pipeline_spec = self.to_pipeline_spec() + + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + builder.write_pipeline_spec_to_file( pipeline_spec, None, @@ -899,7 +889,7 @@ def save_to_component_yaml(self, output_file: str) -> None: output_file, ) - def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + def to_pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': """Creates a pipeline instance and constructs the pipeline spec for a single component. @@ -950,6 +940,12 @@ def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: utils.validate_pipeline_name(pipeline_name) + try: + import kfp + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline_name pipeline_spec.sdk_version = f'kfp-{kfp.__version__}' @@ -1052,24 +1048,3 @@ def convert_duration_to_seconds(duration: str) -> int: raise ValueError( f"Unsupported duration unit: '{duration[-1]}' for '{duration}'.") return int(duration[:-1]) * seconds_per_unit[duration[-1]] - - -def load_documents_from_yaml(component_yaml: str) -> Tuple[dict, dict]: - """Loads up to two YAML documents from a YAML string. - - First document must always be present. If second document is - present, it is returned as a dict, else an empty dict. - """ - documents = list(yaml.safe_load_all(component_yaml)) - num_docs = len(documents) - if num_docs == 1: - pipeline_spec_dict = documents[0] - platform_spec_dict = {} - elif num_docs == 2: - pipeline_spec_dict = documents[0] - platform_spec_dict = documents[1] - else: - raise ValueError( - f'Expected one or two YAML documents in the IR YAML file. Got: {num_docs}.' - ) - return pipeline_spec_dict, platform_spec_dict diff --git a/sdk/python/kfp/dsl/task_final_status.py b/sdk/python/kfp-dsl/kfp/dsl/task_final_status.py similarity index 100% rename from sdk/python/kfp/dsl/task_final_status.py rename to sdk/python/kfp-dsl/kfp/dsl/task_final_status.py diff --git a/sdk/python/kfp/dsl/tasks_group.py b/sdk/python/kfp-dsl/kfp/dsl/tasks_group.py similarity index 100% rename from sdk/python/kfp/dsl/tasks_group.py rename to sdk/python/kfp-dsl/kfp/dsl/tasks_group.py diff --git a/sdk/python/kfp/dsl/types/__init__.py b/sdk/python/kfp-dsl/kfp/dsl/types/__init__.py similarity index 100% rename from sdk/python/kfp/dsl/types/__init__.py rename to sdk/python/kfp-dsl/kfp/dsl/types/__init__.py diff --git a/sdk/python/kfp/dsl/types/artifact_types.py b/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py similarity index 100% rename from sdk/python/kfp/dsl/types/artifact_types.py rename to sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py diff --git a/sdk/python/kfp/dsl/types/custom_artifact_types.py b/sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py similarity index 100% rename from sdk/python/kfp/dsl/types/custom_artifact_types.py rename to sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py diff --git a/sdk/python/kfp/dsl/types/type_annotations.py b/sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py similarity index 100% rename from sdk/python/kfp/dsl/types/type_annotations.py rename to sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py diff --git a/sdk/python/kfp/dsl/types/type_utils.py b/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py similarity index 86% rename from sdk/python/kfp/dsl/types/type_utils.py rename to sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py index bd724742c5..e27fb234f9 100644 --- a/sdk/python/kfp/dsl/types/type_utils.py +++ b/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py @@ -19,12 +19,11 @@ from typing import Any, Callable, Dict, Optional, Type, Union import warnings -import kfp +from kfp import dsl from kfp.dsl import structures from kfp.dsl import task_final_status from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations -from kfp.pipeline_spec import pipeline_spec_pb2 DEFAULT_ARTIFACT_SCHEMA_VERSION = '0.0.1' PARAMETER_TYPES = Union[str, int, float, bool, dict, list] @@ -44,24 +43,32 @@ _GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$' _GOOGLE_TYPES_VERSION = DEFAULT_ARTIFACT_SCHEMA_VERSION + # ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping. # The keys are normalized (lowercased). These are types viewed as Parameters. # The values are the corresponding IR parameter primitive types. -_PARAMETER_TYPES_MAPPING = { - 'integer': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, - 'int': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, - 'double': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, - 'float': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, - 'string': pipeline_spec_pb2.ParameterType.STRING, - 'str': pipeline_spec_pb2.ParameterType.STRING, - 'text': pipeline_spec_pb2.ParameterType.STRING, - 'bool': pipeline_spec_pb2.ParameterType.BOOLEAN, - 'boolean': pipeline_spec_pb2.ParameterType.BOOLEAN, - 'dict': pipeline_spec_pb2.ParameterType.STRUCT, - 'list': pipeline_spec_pb2.ParameterType.LIST, - 'jsonobject': pipeline_spec_pb2.ParameterType.STRUCT, - 'jsonarray': pipeline_spec_pb2.ParameterType.LIST, -} +def get_parameter_types_mapping(): + + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + + return { + 'integer': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, + 'int': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, + 'double': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, + 'float': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, + 'string': pipeline_spec_pb2.ParameterType.STRING, + 'str': pipeline_spec_pb2.ParameterType.STRING, + 'text': pipeline_spec_pb2.ParameterType.STRING, + 'bool': pipeline_spec_pb2.ParameterType.BOOLEAN, + 'boolean': pipeline_spec_pb2.ParameterType.BOOLEAN, + 'dict': pipeline_spec_pb2.ParameterType.STRUCT, + 'list': pipeline_spec_pb2.ParameterType.LIST, + 'jsonobject': pipeline_spec_pb2.ParameterType.STRUCT, + 'jsonarray': pipeline_spec_pb2.ParameterType.LIST, + } def bool_cast_fn(default: Union[str, bool]) -> bool: @@ -109,18 +116,6 @@ def deserialize_v1_component_yaml_default(type_: str, default: Any) -> Any: return default -# Mapping primitive types to their IR message field names. -# This is used in constructing condition strings. -_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = { - pipeline_spec_pb2.ParameterType.NUMBER_INTEGER: 'number_value', - pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE: 'number_value', - pipeline_spec_pb2.ParameterType.STRING: 'string_value', - pipeline_spec_pb2.ParameterType.BOOLEAN: 'bool_value', - pipeline_spec_pb2.ParameterType.STRUCT: 'struct_value', - pipeline_spec_pb2.ParameterType.LIST: 'list_value', -} - - def is_task_final_status_type(type_name: Optional[Union[str, dict]]) -> bool: """Check if a ComponentSpec I/O type is PipelineTaskFinalStatus. @@ -150,15 +145,21 @@ def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool: else: return False - return type_name.lower( - ) in _PARAMETER_TYPES_MAPPING or is_task_final_status_type(type_name) + return type_name.lower() in get_parameter_types_mapping( + ) or is_task_final_status_type(type_name) def bundled_artifact_to_artifact_proto( - bundled_artifact_str: str) -> pipeline_spec_pb2.ArtifactTypeSchema: + bundled_artifact_str: str) -> 'pipeline_spec_pb2.ArtifactTypeSchema': """Gets the IR ArtifactTypeSchema proto for a bundled artifact in form `.@x.x.x` (e.g., system.Artifact@0.0.1).""" bundled_artifact_str, schema_version = bundled_artifact_str.split('@') + + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + return pipeline_spec_pb2.ArtifactTypeSchema( schema_title=bundled_artifact_str, schema_version=schema_version, @@ -167,7 +168,7 @@ def bundled_artifact_to_artifact_proto( def get_parameter_type( param_type: Optional[Union[Type, str, dict]] -) -> pipeline_spec_pb2.ParameterType: +) -> 'pipeline_spec_pb2.ParameterType': """Get the IR I/O parameter type for the given ComponentSpec I/O type. Args: @@ -189,12 +190,17 @@ def get_parameter_type( type_name = list(param_type.keys())[0] else: type_name = type_annotations.get_short_type_name(str(param_type)) - return _PARAMETER_TYPES_MAPPING.get(type_name.lower()) + return get_parameter_types_mapping().get(type_name.lower()) def get_parameter_type_name( param_type: Optional[Union[Type, str, dict]]) -> str: """Gets the parameter type name.""" + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + return pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( get_parameter_type(param_type)) @@ -213,6 +219,21 @@ def get_parameter_type_field_name(type_name: Optional[str]) -> Optional[str]: Raises: AttributeError: if type_name is not a string type. """ + try: + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + + # Mapping primitive types to their IR message field names. + # This is used in constructing condition strings. + _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = { + pipeline_spec_pb2.ParameterType.NUMBER_INTEGER: 'number_value', + pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE: 'number_value', + pipeline_spec_pb2.ParameterType.STRING: 'string_value', + pipeline_spec_pb2.ParameterType.BOOLEAN: 'bool_value', + pipeline_spec_pb2.ParameterType.STRUCT: 'struct_value', + pipeline_spec_pb2.ParameterType.LIST: 'list_value', + } return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get( get_parameter_type(type_name)) @@ -251,6 +272,7 @@ def verify_type_compatibility( expected_spec: Union[structures.InputSpec, structures.OutputSpec], error_message_prefix: str, checks_input: bool = True, + raise_on_error: bool = True, ) -> bool: """Verifies the given argument type is compatible with the expected type. @@ -259,12 +281,13 @@ def verify_type_compatibility( expected_spec: The InputSpec or OutputSpec that describes the expected type of given_value. error_message_prefix: The prefix for the error message. checks_input: True if checks an argument (given_value) against a component/pipeline input type (expected_spec). False if checks a component output (argument_value) against the pipeline output type (expected_spec). + raise_on_error: Whether to raise on type compatibility error. Should be passed kfp.TYPE_CHECK. Returns: True if types are compatible, and False if otherwise. Raises: - InconsistentTypeException if types are incompatible and TYPE_CHECK==True. + InconsistentTypeException if raise_on_error=True. """ # extract and normalize types expected_type = expected_spec.type @@ -307,7 +330,7 @@ def verify_type_compatibility( else: error_message_suffix = f'Output of type {given_type!r} cannot be surfaced as pipeline output type {expected_type!r}' error_text = error_message_prefix + error_message_suffix - if kfp.TYPE_CHECK: + if raise_on_error: raise InconsistentTypeException(error_text) else: warnings.warn(InconsistentTypeWarning(error_text)) @@ -424,11 +447,21 @@ def __enter__(self) -> 'TypeCheckManager': Returns: TypeCheckManager: Returns itself. """ + try: + import kfp + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + self._prev = kfp.TYPE_CHECK kfp.TYPE_CHECK = self._enable return self def __exit__(self, *unused_args) -> None: + + try: + import kfp + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e """Restore type check mode to its previous state.""" kfp.TYPE_CHECK = self._prev diff --git a/sdk/python/kfp/dsl/utils.py b/sdk/python/kfp-dsl/kfp/dsl/utils.py similarity index 100% rename from sdk/python/kfp/dsl/utils.py rename to sdk/python/kfp-dsl/kfp/dsl/utils.py diff --git a/sdk/python/kfp/dsl/v1_modelbase.py b/sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py similarity index 100% rename from sdk/python/kfp/dsl/v1_modelbase.py rename to sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py diff --git a/sdk/python/kfp/dsl/v1_structures.py b/sdk/python/kfp-dsl/kfp/dsl/v1_structures.py similarity index 98% rename from sdk/python/kfp/dsl/v1_structures.py rename to sdk/python/kfp-dsl/kfp/dsl/v1_structures.py index 661cef196f..57cc7c6375 100644 --- a/sdk/python/kfp/dsl/v1_structures.py +++ b/sdk/python/kfp-dsl/kfp/dsl/v1_structures.py @@ -16,7 +16,6 @@ from typing import Any, Dict, List, Mapping, Optional, Union from kfp.dsl.v1_modelbase import ModelBase -import yaml PrimitiveTypes = Union[str, int, float, bool] PrimitiveTypesIncludingNone = Optional[PrimitiveTypes] @@ -437,17 +436,6 @@ def verify_arg(arg): f'Argument "{argument}" references non-existing input.' ) - def save(self, file_path: str): - """Saves the component definition to file. - - It can be shared online and later loaded using the - load_component function. - """ - - component_yaml = yaml.dump(self.to_dict(), sort_keys=True) - with open(file_path, 'w') as f: - f.write(component_yaml) - class ComponentReference(ModelBase): """Component reference. diff --git a/sdk/python/kfp/dsl/yaml_component.py b/sdk/python/kfp-dsl/kfp/dsl/yaml_component.py similarity index 80% rename from sdk/python/kfp/dsl/yaml_component.py rename to sdk/python/kfp-dsl/kfp/dsl/yaml_component.py index 187fa533f2..807ca4e0ce 100644 --- a/sdk/python/kfp/dsl/yaml_component.py +++ b/sdk/python/kfp-dsl/kfp/dsl/yaml_component.py @@ -13,10 +13,9 @@ # limitations under the License. """Component loaded from YAML.""" -from google.protobuf import json_format +from kfp import dsl from kfp.dsl import base_component from kfp.dsl import structures -from kfp.pipeline_spec import pipeline_spec_pb2 class YamlComponent(base_component.BaseComponent): @@ -38,14 +37,21 @@ def __init__( self.component_yaml = component_yaml @property - def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': """Returns the pipeline spec of the component.""" - component_dict = structures.load_documents_from_yaml( + try: + from google.protobuf import json_format + from kfp.components import load_yaml_utilities + from kfp.pipeline_spec import pipeline_spec_pb2 + except ImportError as e: + raise ImportError(dsl._kfp_dsl_import_error_msg) from e + component_dict = load_yaml_utilities._load_documents_from_yaml( self.component_yaml)[0] is_v1 = 'implementation' in set(component_dict.keys()) if is_v1: return self.component_spec.to_pipeline_spec() else: + return json_format.ParseDict(component_dict, pipeline_spec_pb2.PipelineSpec()) diff --git a/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py b/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py new file mode 100644 index 0000000000..c661fb6e00 --- /dev/null +++ b/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py @@ -0,0 +1,139 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import dataclasses +import json +import os +import shutil +import subprocess +import tempfile +from typing import Any, Dict + +from absl.testing import parameterized +import yaml + +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data') +TMP_DIR = tempfile.mkdtemp() + + +@dataclasses.dataclass +class RuntimeTestConfig: + pipeline_file_relpath: str + executor_name: str + executor_input: Dict[str, Any] + + +TEST_CONFIGS = [ + RuntimeTestConfig( + pipeline_file_relpath=os.path.join( + TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), + executor_name='exec-print-op', + executor_input={ + 'inputs': { + 'parameterValues': { + 'message': 'Hello World!' + }, + 'parameters': { + 'message': { + 'stringValue': 'Hello World!' + } + } + }, + 'outputs': { + 'outputFile': + '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-18-50-32/print-op_-9063136771365142528/executor_output.json' + } + }, + ), + RuntimeTestConfig( + pipeline_file_relpath=os.path.join( + TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), + executor_name='exec-exit-op', + executor_input={ + 'inputs': { + 'parameterValues': { + 'status': { + 'error': { + 'code': + 9, + 'message': + 'The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].' + }, + 'pipelineJobResourceName': + 'projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11', + 'pipelineTaskName': + 'my-pipeline', + 'state': + 'FAILED' + }, + 'user_input': 'Hello World!' + }, + 'parameters': { + 'status': { + 'stringValue': + "{\"error\":{\"code\":9,\"message\":\"The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].\"},\"pipelineJobResourceName\":\"projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11\",\"pipelineTaskName\":\"my-pipeline\",\"state\":\"FAILED\"}" + }, + 'user_input': { + 'stringValue': 'Hello World!' + } + } + }, + 'outputs': { + 'outputFile': + '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-19-07-11/exit-op_-6100894116462198784/executor_output.json' + } + }, + ) +] + + +def run_commands_and_args( + config: RuntimeTestConfig, + temp_dir: str, +) -> subprocess.CompletedProcess: + with open(config.pipeline_file_relpath) as f: + pipline_spec_dict = yaml.safe_load(f) + container = pipline_spec_dict['deploymentSpec']['executors'][ + config.executor_name]['container'] + + command_and_args = container['command'] + container['args'] + executor_input_json = json.dumps(config.executor_input).replace( + '/gcs/', TMP_DIR) + command_and_args = [ + v.replace('{{$}}', executor_input_json) for v in command_and_args + ] + + return subprocess.run( + command_and_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + + +class TestRuntime(parameterized.TestCase): + + @classmethod + def setUp(cls): + cls.temp_dir = tempfile.mkdtemp() + + @classmethod + def tearDown(cls): + shutil.rmtree(cls.temp_dir) + + @parameterized.parameters(TEST_CONFIGS) + def test(self, config: RuntimeTestConfig): + process = run_commands_and_args( + config=config, + temp_dir=self.temp_dir, + ) + self.assertEqual(process.returncode, 0, process.stderr) diff --git a/sdk/python/kfp/dsl/executor_test.py b/sdk/python/kfp-dsl/runtime_tests/executor_test.py similarity index 100% rename from sdk/python/kfp/dsl/executor_test.py rename to sdk/python/kfp-dsl/runtime_tests/executor_test.py diff --git a/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py b/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py new file mode 100644 index 0000000000..a1d432cd31 --- /dev/null +++ b/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py @@ -0,0 +1,21 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +class TestImportObjects: + + def test(self): + # from kfp.dsl import * only allowed at module level, so emulate behavior + from kfp import dsl + for obj_name in dir(dsl): + if not obj_name.startswith('_'): + getattr(dsl, obj_name) diff --git a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py new file mode 100644 index 0000000000..27d418a333 --- /dev/null +++ b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py @@ -0,0 +1,58 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pipeline using ExitHandler with PipelineTaskFinalStatus.""" + +from kfp import compiler +from kfp import dsl +from kfp.dsl import component +from kfp.dsl import PipelineTaskFinalStatus + + +@component +def exit_op(user_input: str, status: PipelineTaskFinalStatus): + """Checks pipeline run status.""" + print('Pipeline status: ', status.state) + print('Job resource name: ', status.pipeline_job_resource_name) + print('Pipeline task name: ', status.pipeline_task_name) + print('Error code: ', status.error_code) + print('Error message: ', status.error_message) + + +@component +def print_op(message: str): + """Prints a message.""" + print(message) + + +@component +def fail_op(message: str): + """Fails.""" + import sys + print(message) + sys.exit(1) + + +@dsl.pipeline(name='pipeline-with-task-final-status') +def my_pipeline(message: str = 'Hello World!'): + exit_task = exit_op(user_input=message) + + with dsl.ExitHandler(exit_task, name='my-pipeline'): + print_op(message=message) + fail_op(message='Task failed.') + + +if __name__ == '__main__': + compiler.Compiler().compile( + pipeline_func=my_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml new file mode 100644 index 0000000000..86ad841a3d --- /dev/null +++ b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml @@ -0,0 +1,183 @@ +# PIPELINE DEFINITION +# Name: pipeline-with-task-final-status +# Inputs: +# message: str [Default: 'Hello World!'] +components: + comp-exit-handler-1: + dag: + tasks: + fail-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-fail-op + inputs: + parameters: + message: + runtimeValue: + constant: Task failed. + taskInfo: + name: fail-op + print-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-op + inputs: + parameters: + message: + componentInputParameter: pipelinechannel--message + taskInfo: + name: print-op + inputDefinitions: + parameters: + pipelinechannel--message: + parameterType: STRING + comp-exit-op: + executorLabel: exec-exit-op + inputDefinitions: + parameters: + status: + isOptional: true + parameterType: TASK_FINAL_STATUS + user_input: + parameterType: STRING + comp-fail-op: + executorLabel: exec-fail-op + inputDefinitions: + parameters: + message: + parameterType: STRING + comp-print-op: + executorLabel: exec-print-op + inputDefinitions: + parameters: + message: + parameterType: STRING +deploymentSpec: + executors: + exec-exit-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - exit_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef exit_op(user_input: str, status: PipelineTaskFinalStatus):\n\ + \ \"\"\"Checks pipeline run status.\"\"\"\n print('Pipeline status:\ + \ ', status.state)\n print('Job resource name: ', status.pipeline_job_resource_name)\n\ + \ print('Pipeline task name: ', status.pipeline_task_name)\n print('Error\ + \ code: ', status.error_code)\n print('Error message: ', status.error_message)\n\ + \n" + image: python:3.7 + exec-fail-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - fail_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n\ + \ print(message)\n sys.exit(1)\n\n" + image: python:3.7 + exec-print-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n\ + \ print(message)\n\n" + image: python:3.7 +pipelineInfo: + name: pipeline-with-task-final-status +root: + dag: + tasks: + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--message: + componentInputParameter: message + taskInfo: + name: my-pipeline + exit-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-exit-op + dependentTasks: + - exit-handler-1 + inputs: + parameters: + status: + taskFinalStatus: + producerTask: exit-handler-1 + user_input: + componentInputParameter: message + taskInfo: + name: exit-op + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + inputDefinitions: + parameters: + message: + defaultValue: Hello World! + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.1 diff --git a/sdk/python/kfp-dsl/setup.py b/sdk/python/kfp-dsl/setup.py new file mode 100644 index 0000000000..dbd6ade974 --- /dev/null +++ b/sdk/python/kfp-dsl/setup.py @@ -0,0 +1,52 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +import setuptools + + +def find_version(*file_path_parts: str) -> str: + """Get version from a file that defines a __version__ variable.""" + + file_path = os.path.join(os.path.dirname(__file__), *file_path_parts) + with open(file_path, 'r') as f: + version_file_text = f.read() + + version_match = re.search( + r"^__version__ = ['\"]([^'\"]*)['\"]", + version_file_text, + re.M, + ) + if version_match: + return version_match.group(1) + + raise RuntimeError(f'Unable to find version string in file: {file_path}.') + + +setuptools.setup( + name='kfp-dsl', + version=find_version( + os.path.dirname(os.path.dirname(__file__)), 'kfp', '__init__.py'), + description='A KFP SDK subpackage containing the DSL and runtime code.', + author='google', + author_email='kubeflow-pipelines@google.com', + url='https://github.com/kubeflow/pipelines', + packages=setuptools.find_namespace_packages(include=['kfp.*']), + python_requires='>=3.7.0', + install_requires=['typing-extensions>=3.7.4,<5; python_version<"3.9"'], + include_package_data=True, + license='Apache 2.0', +) diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index b276f892c1..5a7141d5c8 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -1748,6 +1748,7 @@ def _validate_dag_output_types( output_spec, error_message_prefix, checks_input=False, + raise_on_error=kfp.TYPE_CHECK, ) diff --git a/sdk/python/kfp/components/load_yaml_utilities.py b/sdk/python/kfp/components/load_yaml_utilities.py index 34342d3b0b..01af00d338 100644 --- a/sdk/python/kfp/components/load_yaml_utilities.py +++ b/sdk/python/kfp/components/load_yaml_utilities.py @@ -13,11 +13,15 @@ # limitations under the License. """Functions for loading components from compiled YAML.""" -from typing import Optional, Tuple +import hashlib +from typing import Optional, Tuple, Union +import warnings from kfp.dsl import structures +from kfp.dsl import v1_structures from kfp.dsl import yaml_component import requests +import yaml def load_component_from_text(text: str) -> yaml_component.YamlComponent: @@ -30,7 +34,7 @@ def load_component_from_text(text: str) -> yaml_component.YamlComponent: Component loaded from YAML. """ return yaml_component.YamlComponent( - component_spec=structures.ComponentSpec.from_yaml_documents(text), + component_spec=_load_component_spec_from_yaml_documents(text), component_yaml=text) @@ -86,3 +90,97 @@ def load_component_from_url( resp.raise_for_status() return load_component_from_text(resp.content.decode('utf-8')) + + +def _load_documents_from_yaml(component_yaml: str) -> Tuple[dict, dict]: + """Loads up to two YAML documents from a YAML string. + + First document must always be present. If second document is + present, it is returned as a dict, else an empty dict. + """ + documents = list(yaml.safe_load_all(component_yaml)) + num_docs = len(documents) + if num_docs == 1: + pipeline_spec_dict = documents[0] + platform_spec_dict = {} + elif num_docs == 2: + pipeline_spec_dict = documents[0] + platform_spec_dict = documents[1] + else: + raise ValueError( + f'Expected one or two YAML documents in the IR YAML file. Got: {num_docs}.' + ) + return pipeline_spec_dict, platform_spec_dict + + +def _load_component_spec_from_yaml_documents( + component_yaml: str) -> structures.ComponentSpec: + """Loads V1 or V2 component YAML into a ComponentSpec. + + Args: + component_yaml: PipelineSpec and optionally PlatformSpec YAML documents as a single string. + + Returns: + ComponentSpec: The ComponentSpec object. + """ + + def extract_description(component_yaml: str) -> Union[str, None]: + heading = '# Description: ' + multi_line_description_prefix = '# ' + index_of_heading = 2 + if heading in component_yaml: + description = component_yaml.splitlines()[index_of_heading] + + # Multi line + comments = component_yaml.splitlines() + index = index_of_heading + 1 + while comments[index][:len(multi_line_description_prefix + )] == multi_line_description_prefix: + description += '\n' + comments[index][ + len(multi_line_description_prefix) + 1:] + index += 1 + + return description[len(heading):] + else: + return None + + pipeline_spec_dict, platform_spec_dict = _load_documents_from_yaml( + component_yaml) + + is_v1 = 'implementation' in set(pipeline_spec_dict.keys()) + if is_v1: + v1_component = load_v1_component_spec_from_component_text( + component_yaml) + return structures.ComponentSpec.from_v1_component_spec(v1_component) + else: + component_spec = structures.ComponentSpec.from_ir_dicts( + pipeline_spec_dict, platform_spec_dict) + if not component_spec.description: + component_spec.description = extract_description( + component_yaml=component_yaml) + return component_spec + + +def load_v1_component_spec_from_component_text( + text) -> v1_structures.ComponentSpec: + component_dict = yaml.safe_load(text) + component_spec = v1_structures.ComponentSpec.from_dict(component_dict) + + if isinstance(component_spec.implementation, + v1_structures.ContainerImplementation) and ( + component_spec.implementation.container.command is None): + warnings.warn( + 'Container component must specify command to be compatible with KFP ' + 'v2 compatible mode and emissary executor, which will be the default' + ' executor for KFP v2.' + 'https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/', + category=FutureWarning, + ) + + # Calculating hash digest for the component + data = text if isinstance(text, bytes) else text.encode('utf-8') + data = data.replace(b'\r\n', b'\n') # Normalizing line endings + digest = hashlib.sha256(data).hexdigest() + component_spec._digest = digest + + return component_spec diff --git a/sdk/python/kfp/components/load_yaml_utilities_test.py b/sdk/python/kfp/components/load_yaml_utilities_test.py index 55ba29cf57..dff93c2257 100644 --- a/sdk/python/kfp/components/load_yaml_utilities_test.py +++ b/sdk/python/kfp/components/load_yaml_utilities_test.py @@ -19,6 +19,7 @@ import unittest from kfp import components +from kfp.components import load_yaml_utilities from kfp.dsl import structures SAMPLE_YAML = textwrap.dedent("""\ @@ -124,5 +125,47 @@ def test_load_component_from_url(self): 'python:3.7') +class TestLoadDocumentsFromYAML(unittest.TestCase): + + def test_no_documents(self): + with self.assertRaisesRegex( + ValueError, + r'Expected one or two YAML documents in the IR YAML file\. Got\: 0\.' + ): + load_yaml_utilities._load_documents_from_yaml('') + + def test_one_document(self): + doc1, doc2 = load_yaml_utilities._load_documents_from_yaml( + textwrap.dedent("""\ + key1: value1 + """)) + self.assertEqual(doc1, {'key1': 'value1'}) + self.assertEqual(doc2, {}) + + def test_two_documents(self): + doc1, doc2 = load_yaml_utilities._load_documents_from_yaml( + textwrap.dedent("""\ + key1: value1 + --- + key2: value2 + """)) + self.assertEqual(doc1, {'key1': 'value1'}) + self.assertEqual(doc2, {'key2': 'value2'}) + + def test_three_documents(self): + with self.assertRaisesRegex( + ValueError, + r'Expected one or two YAML documents in the IR YAML file\. Got\: 3\.' + ): + load_yaml_utilities._load_documents_from_yaml( + textwrap.dedent("""\ + key3: value3 + --- + key3: value3 + --- + key3: value3 + """)) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/base_component_test.py b/sdk/python/kfp/dsl-test/base_component_test.py similarity index 100% rename from sdk/python/kfp/dsl/base_component_test.py rename to sdk/python/kfp/dsl-test/base_component_test.py diff --git a/sdk/python/kfp/dsl/component_decorator_test.py b/sdk/python/kfp/dsl-test/component_decorator_test.py similarity index 97% rename from sdk/python/kfp/dsl/component_decorator_test.py rename to sdk/python/kfp/dsl-test/component_decorator_test.py index 4b51de638f..1358fa691d 100644 --- a/sdk/python/kfp/dsl/component_decorator_test.py +++ b/sdk/python/kfp/dsl-test/component_decorator_test.py @@ -17,8 +17,8 @@ from typing import Dict, List, NamedTuple import unittest +from kfp.components import load_yaml_utilities from kfp.dsl import python_component -from kfp.dsl import structures from kfp.dsl.component_decorator import component @@ -104,7 +104,8 @@ def comp(text: str) -> str: with open(filepath, 'r') as f: yaml_text = f.read() - component_spec = structures.ComponentSpec.from_yaml_documents(yaml_text) + component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + yaml_text) self.assertEqual(component_spec.name, comp.component_spec.name) def test_output_named_tuple_with_dict(self): diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl-test/component_factory_test.py similarity index 100% rename from sdk/python/kfp/dsl/component_factory_test.py rename to sdk/python/kfp/dsl-test/component_factory_test.py diff --git a/sdk/python/kfp/dsl/container_component_artifact_channel_test.py b/sdk/python/kfp/dsl-test/container_component_artifact_channel_test.py similarity index 100% rename from sdk/python/kfp/dsl/container_component_artifact_channel_test.py rename to sdk/python/kfp/dsl-test/container_component_artifact_channel_test.py diff --git a/sdk/python/kfp/dsl/container_component_decorator_test.py b/sdk/python/kfp/dsl-test/container_component_decorator_test.py similarity index 100% rename from sdk/python/kfp/dsl/container_component_decorator_test.py rename to sdk/python/kfp/dsl-test/container_component_decorator_test.py diff --git a/sdk/python/kfp/dsl/for_loop_test.py b/sdk/python/kfp/dsl-test/for_loop_test.py similarity index 100% rename from sdk/python/kfp/dsl/for_loop_test.py rename to sdk/python/kfp/dsl-test/for_loop_test.py diff --git a/sdk/python/kfp/dsl/importer_node_test.py b/sdk/python/kfp/dsl-test/importer_node_test.py similarity index 100% rename from sdk/python/kfp/dsl/importer_node_test.py rename to sdk/python/kfp/dsl-test/importer_node_test.py diff --git a/sdk/python/kfp/dsl/pipeline_channel_test.py b/sdk/python/kfp/dsl-test/pipeline_channel_test.py similarity index 100% rename from sdk/python/kfp/dsl/pipeline_channel_test.py rename to sdk/python/kfp/dsl-test/pipeline_channel_test.py diff --git a/sdk/python/kfp/dsl/pipeline_task_test.py b/sdk/python/kfp/dsl-test/pipeline_task_test.py similarity index 88% rename from sdk/python/kfp/dsl/pipeline_task_test.py rename to sdk/python/kfp/dsl-test/pipeline_task_test.py index 6e7443fc1a..cf71a4150b 100644 --- a/sdk/python/kfp/dsl/pipeline_task_test.py +++ b/sdk/python/kfp/dsl-test/pipeline_task_test.py @@ -18,6 +18,7 @@ from absl.testing import parameterized from kfp import dsl +from kfp.components import load_yaml_utilities from kfp.dsl import pipeline_task from kfp.dsl import placeholders from kfp.dsl import structures @@ -112,8 +113,8 @@ def test_create_pipeline_task_valid(self): ) task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) self.assertEqual(task._task_spec, expected_task_spec) @@ -125,8 +126,8 @@ def test_create_pipeline_task_invalid_wrong_input(self): ValueError, "Component 'component1' got an unexpected input: 'input0'."): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={ 'input1': 'value', 'input0': 'abc', @@ -135,8 +136,8 @@ def test_create_pipeline_task_invalid_wrong_input(self): def test_set_caching_options(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_caching_options(False) @@ -163,8 +164,8 @@ def test_set_caching_options(self): def test_set_valid_cpu_request_limit(self, cpu: str, expected_cpu_number: float): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_cpu_request(cpu) @@ -182,8 +183,8 @@ def test_set_valid_cpu_request_limit(self, cpu: str, def test_set_valid_gpu_limit(self, gpu_limit: str, expected_gpu_number: int): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) with self.assertWarnsRegex( @@ -196,8 +197,8 @@ def test_set_valid_gpu_limit(self, gpu_limit: str, def test_add_valid_node_selector_constraint(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) with self.assertWarnsRegex( @@ -220,8 +221,8 @@ def test_add_valid_node_selector_constraint(self): ) def test_set_accelerator_limit(self, limit, expected): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) @@ -285,8 +286,8 @@ def test_set_accelerator_limit(self, limit, expected): ) def test_set_memory_limit(self, memory: str, expected_memory_number: int): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_memory_request(memory) @@ -298,8 +299,8 @@ def test_set_memory_limit(self, memory: str, expected_memory_number: int): def test_set_accelerator_type_with_type_only(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_accelerator_type('NVIDIA_TESLA_K80') @@ -310,8 +311,8 @@ def test_set_accelerator_type_with_type_only(self): def test_set_accelerator_type_with_accelerator_count(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_accelerator_limit('5').set_accelerator_type('TPU_V3') @@ -322,8 +323,8 @@ def test_set_accelerator_type_with_accelerator_count(self): def test_set_env_variable(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_env_variable('env_name', 'env_value') @@ -331,8 +332,8 @@ def test_set_env_variable(self): def test_set_display_name(self): task = pipeline_task.PipelineTask( - component_spec=structures.ComponentSpec.from_yaml_documents( - V2_YAML), + component_spec=load_yaml_utilities + ._load_component_spec_from_yaml_documents(V2_YAML), args={'input1': 'value'}, ) task.set_display_name('test_name') diff --git a/sdk/python/kfp/dsl/placeholders_test.py b/sdk/python/kfp/dsl-test/placeholders_test.py similarity index 100% rename from sdk/python/kfp/dsl/placeholders_test.py rename to sdk/python/kfp/dsl-test/placeholders_test.py diff --git a/sdk/python/kfp/dsl/structures_test.py b/sdk/python/kfp/dsl-test/structures_test.py similarity index 94% rename from sdk/python/kfp/dsl/structures_test.py rename to sdk/python/kfp/dsl-test/structures_test.py index ad6274d931..d36a34e57f 100644 --- a/sdk/python/kfp/dsl/structures_test.py +++ b/sdk/python/kfp/dsl-test/structures_test.py @@ -22,6 +22,7 @@ from kfp import compiler from kfp import components from kfp import dsl +from kfp.components import load_yaml_utilities from kfp.dsl import component_factory from kfp.dsl import placeholders from kfp.dsl import structures @@ -263,7 +264,7 @@ def test_simple_component_spec_save_to_component_yaml(self): # test that it can be read back correctly with open(output_path, 'r') as f: contents = f.read() - new_component_spec = structures.ComponentSpec.from_yaml_documents( + new_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( contents) self.assertEqual(original_component_spec, new_component_spec) @@ -318,7 +319,7 @@ def test_simple_component_spec_load_from_v2_component_yaml(self): sdkVersion: kfp-2.0.0-alpha.2 """) - generated_spec = structures.ComponentSpec.from_yaml_documents( + generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( component_yaml_v2) expected_spec = structures.ComponentSpec( @@ -359,7 +360,8 @@ def test_simple_component_spec_load_from_v2_component_yaml(self): ) def test_component_spec_placeholder_load_from_v2_component_yaml( self, yaml, expected_component): - generated_spec = structures.ComponentSpec.from_yaml_documents(yaml) + generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + yaml) self.assertEqual(generated_spec, expected_component) def test_component_spec_load_from_v1_component_yaml(self): @@ -388,7 +390,7 @@ def test_component_spec_load_from_v1_component_yaml(self): - {outputPath: Output 2} """) - generated_spec = structures.ComponentSpec.from_yaml_documents( + generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( component_yaml_v1) expected_spec = structures.ComponentSpec( @@ -639,7 +641,7 @@ def test_from_ir_component_outputs_dict(self): class TestReadInComponent(parameterized.TestCase): def test_read_v1(self): - component_spec = structures.ComponentSpec.from_yaml_documents( + component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( V1_YAML_IF_PLACEHOLDER) self.assertEqual(component_spec.name, 'component-if') self.assertEqual(component_spec.implementation.container.image, @@ -694,7 +696,7 @@ def test_simple_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = structures.ComponentSpec.from_yaml_documents( + loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='component1', @@ -762,7 +764,7 @@ def test_if_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = structures.ComponentSpec.from_yaml_documents( + loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='if', @@ -833,7 +835,7 @@ def test_concat_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = structures.ComponentSpec.from_yaml_documents( + loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='concat', @@ -1113,47 +1115,5 @@ def test_load_noncanonical_v1_generic_types(self): self.assertEqual(outputs['output4'].type, 'Dict') -class TestLoadDocumentsFromYAML(unittest.TestCase): - - def test_no_documents(self): - with self.assertRaisesRegex( - ValueError, - r'Expected one or two YAML documents in the IR YAML file\. Got\: 0\.' - ): - structures.load_documents_from_yaml('') - - def test_one_document(self): - doc1, doc2 = structures.load_documents_from_yaml( - textwrap.dedent("""\ - key1: value1 - """)) - self.assertEqual(doc1, {'key1': 'value1'}) - self.assertEqual(doc2, {}) - - def test_two_documents(self): - doc1, doc2 = structures.load_documents_from_yaml( - textwrap.dedent("""\ - key1: value1 - --- - key2: value2 - """)) - self.assertEqual(doc1, {'key1': 'value1'}) - self.assertEqual(doc2, {'key2': 'value2'}) - - def test_three_documents(self): - with self.assertRaisesRegex( - ValueError, - r'Expected one or two YAML documents in the IR YAML file\. Got\: 3\.' - ): - structures.load_documents_from_yaml( - textwrap.dedent("""\ - key3: value3 - --- - key3: value3 - --- - key3: value3 - """)) - - if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/tasks_group_test.py b/sdk/python/kfp/dsl-test/tasks_group_test.py similarity index 100% rename from sdk/python/kfp/dsl/tasks_group_test.py rename to sdk/python/kfp/dsl-test/tasks_group_test.py diff --git a/sdk/python/kfp/dsl/types/artifact_types_test.py b/sdk/python/kfp/dsl-test/types/artifact_types_test.py similarity index 100% rename from sdk/python/kfp/dsl/types/artifact_types_test.py rename to sdk/python/kfp/dsl-test/types/artifact_types_test.py diff --git a/sdk/python/kfp/dsl/types/custom_artifact_types_test.py b/sdk/python/kfp/dsl-test/types/custom_artifact_types_test.py similarity index 100% rename from sdk/python/kfp/dsl/types/custom_artifact_types_test.py rename to sdk/python/kfp/dsl-test/types/custom_artifact_types_test.py diff --git a/sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json b/sdk/python/kfp/dsl-test/types/test_data/expected_bulk_loaded_confusion_matrix.json similarity index 98% rename from sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json rename to sdk/python/kfp/dsl-test/types/test_data/expected_bulk_loaded_confusion_matrix.json index 184233e0ba..1a994dff33 100644 --- a/sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json +++ b/sdk/python/kfp/dsl-test/types/test_data/expected_bulk_loaded_confusion_matrix.json @@ -7,4 +7,4 @@ [2, 6, 0], [3, 5, 6], [5, 7, 8]] -} \ No newline at end of file +} diff --git a/sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json b/sdk/python/kfp/dsl-test/types/test_data/expected_confusion_matrix.json similarity index 98% rename from sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json rename to sdk/python/kfp/dsl-test/types/test_data/expected_confusion_matrix.json index 83312d1daa..560d6cbd16 100644 --- a/sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json +++ b/sdk/python/kfp/dsl-test/types/test_data/expected_confusion_matrix.json @@ -7,4 +7,4 @@ [2, 6, 0], [3, 0, 0], [0, 0, 0]] -} \ No newline at end of file +} diff --git a/sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json b/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_bulk_load_classification_metrics.json similarity index 83% rename from sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json rename to sdk/python/kfp/dsl-test/types/test_data/expected_io_types_bulk_load_classification_metrics.json index 356109ddcf..93092e04cd 100644 --- a/sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json +++ b/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_bulk_load_classification_metrics.json @@ -1,11 +1,11 @@ { - "confidenceMetrics": + "confidenceMetrics": [ { "confidenceThreshold": 53.6, "recall": 52.6, "falsePositiveRate": 85.1 - }, + }, { "confidenceThreshold": 53.6, "recall": 52.6, @@ -16,20 +16,20 @@ "recall": 52.6, "falsePositiveRate": 85.1 } - ], - "confusionMatrix": + ], + "confusionMatrix": { - "annotationSpecs": + "annotationSpecs": [ {"displayName": "dog"}, {"displayName": "cat"}, {"displayName": "horses"} ], - "rows": + "rows": [ {"row": [2, 6, 0]}, {"row": [3, 5, 6]}, {"row" : [5, 7, 8]} ] } -} \ No newline at end of file +} diff --git a/sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json b/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_classification_metrics.json similarity index 85% rename from sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json rename to sdk/python/kfp/dsl-test/types/test_data/expected_io_types_classification_metrics.json index 71324c8164..528cf611a7 100644 --- a/sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json +++ b/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_classification_metrics.json @@ -5,22 +5,22 @@ "confidenceThreshold": 0.1, "recall": 98.2, "falsePositiveRate": 96.2 - }, + }, { "confidenceThreshold": 24.3, "recall": 24.5, "falsePositiveRate": 98.4 } ], - "confusionMatrix": + "confusionMatrix": { - "annotationSpecs": + "annotationSpecs": [ {"displayName": "dog"}, {"displayName": "cat"}, {"displayName": "horses"} - ], - "rows": + ], + "rows": [ {"row" : [2, 6, 0]}, {"row" : [3, 0, 0]}, diff --git a/sdk/python/kfp/dsl/types/type_annotations_test.py b/sdk/python/kfp/dsl-test/types/type_annotations_test.py similarity index 100% rename from sdk/python/kfp/dsl/types/type_annotations_test.py rename to sdk/python/kfp/dsl-test/types/type_annotations_test.py diff --git a/sdk/python/kfp/dsl/types/type_utils_test.py b/sdk/python/kfp/dsl-test/types/type_utils_test.py similarity index 99% rename from sdk/python/kfp/dsl/types/type_utils_test.py rename to sdk/python/kfp/dsl-test/types/type_utils_test.py index ee2cf16180..9706fc4ec6 100644 --- a/sdk/python/kfp/dsl/types/type_utils_test.py +++ b/sdk/python/kfp/dsl-test/types/type_utils_test.py @@ -727,6 +727,7 @@ def test_verify_type_compatibility( given_value=argument_value, expected_spec=parameter_input_spec, error_message_prefix='', + raise_on_error=kfp.TYPE_CHECK, )) else: with self.assertRaises(InconsistentTypeException): @@ -734,6 +735,7 @@ def test_verify_type_compatibility( given_value=argument_value, expected_spec=parameter_input_spec, error_message_prefix='', + raise_on_error=kfp.TYPE_CHECK, ) def test_list_of_artifacts_across_compilation_valid(self): diff --git a/sdk/python/kfp/dsl/utils_test.py b/sdk/python/kfp/dsl-test/utils_test.py similarity index 100% rename from sdk/python/kfp/dsl/utils_test.py rename to sdk/python/kfp/dsl-test/utils_test.py diff --git a/sdk/python/kfp/dsl/v1_components.py b/sdk/python/kfp/dsl/v1_components.py deleted file mode 100644 index 9714d56eef..0000000000 --- a/sdk/python/kfp/dsl/v1_components.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2018-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import hashlib -import warnings - -from kfp.dsl import v1_structures -import yaml - - -def _load_component_spec_from_component_text( - text) -> v1_structures.ComponentSpec: - component_dict = yaml.safe_load(text) - component_spec = v1_structures.ComponentSpec.from_dict(component_dict) - - if isinstance(component_spec.implementation, - v1_structures.ContainerImplementation) and ( - component_spec.implementation.container.command is None): - warnings.warn( - 'Container component must specify command to be compatible with KFP ' - 'v2 compatible mode and emissary executor, which will be the default' - ' executor for KFP v2.' - 'https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/', - category=FutureWarning, - ) - - # Calculating hash digest for the component - data = text if isinstance(text, bytes) else text.encode('utf-8') - data = data.replace(b'\r\n', b'\n') # Normalizing line endings - digest = hashlib.sha256(data).hexdigest() - component_spec._digest = digest - - return component_spec diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index 975568b8b7..f76ea1ba17 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -10,6 +10,7 @@ google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0 google-auth>=1.6.1,<3 # https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md#221-2022-03-15 google-cloud-storage>=2.2.1,<3 +kfp-dsl==2.0.1 # pin kfp-pipeline-spec to an exact version, since this is the contract between a given KFP SDK version and the BE. we don't want old version of the SDK to write new fields and to have the BE reject the new unsupported field (even if the new field backward compatible from a proto perspective) kfp-pipeline-spec==0.2.2 # Update the upper version whenever a new major version of the diff --git a/sdk/python/test_data/components/add_numbers.yaml b/sdk/python/test_data/components/add_numbers.yaml index 5b5486da36..3b717513fa 100644 --- a/sdk/python/test_data/components/add_numbers.yaml +++ b/sdk/python/test_data/components/add_numbers.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/component_with_metadata_fields.yaml b/sdk/python/test_data/components/component_with_metadata_fields.yaml index 61a41867cf..039b24833f 100644 --- a/sdk/python/test_data/components/component_with_metadata_fields.yaml +++ b/sdk/python/test_data/components/component_with_metadata_fields.yaml @@ -48,7 +48,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/component_with_pip_install.yaml b/sdk/python/test_data/components/component_with_pip_install.yaml index 4e4335a204..24b66764ce 100644 --- a/sdk/python/test_data/components/component_with_pip_install.yaml +++ b/sdk/python/test_data/components/component_with_pip_install.yaml @@ -19,7 +19,7 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/components/component_with_task_final_status.yaml b/sdk/python/test_data/components/component_with_task_final_status.yaml index ac138f7055..aed58ffea2 100644 --- a/sdk/python/test_data/components/component_with_task_final_status.yaml +++ b/sdk/python/test_data/components/component_with_task_final_status.yaml @@ -24,7 +24,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/concat_message.yaml b/sdk/python/test_data/components/concat_message.yaml index 5dc62f9620..8dd970f199 100644 --- a/sdk/python/test_data/components/concat_message.yaml +++ b/sdk/python/test_data/components/concat_message.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/dict_input.yaml b/sdk/python/test_data/components/dict_input.yaml index 977103a338..4a2d083e3d 100644 --- a/sdk/python/test_data/components/dict_input.yaml +++ b/sdk/python/test_data/components/dict_input.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/identity.yaml b/sdk/python/test_data/components/identity.yaml index b8a4551a9f..7ec3ce6ea0 100644 --- a/sdk/python/test_data/components/identity.yaml +++ b/sdk/python/test_data/components/identity.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/input_artifact.yaml b/sdk/python/test_data/components/input_artifact.yaml index e029dd8161..638ad9935b 100644 --- a/sdk/python/test_data/components/input_artifact.yaml +++ b/sdk/python/test_data/components/input_artifact.yaml @@ -25,7 +25,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/nested_return.yaml b/sdk/python/test_data/components/nested_return.yaml index 810215dcf3..705cf55da3 100644 --- a/sdk/python/test_data/components/nested_return.yaml +++ b/sdk/python/test_data/components/nested_return.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/output_metrics.yaml b/sdk/python/test_data/components/output_metrics.yaml index 6a18a32d0b..c093bf9bdc 100644 --- a/sdk/python/test_data/components/output_metrics.yaml +++ b/sdk/python/test_data/components/output_metrics.yaml @@ -27,7 +27,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/preprocess.yaml b/sdk/python/test_data/components/preprocess.yaml index 03c46dbdac..66c6c07f06 100644 --- a/sdk/python/test_data/components/preprocess.yaml +++ b/sdk/python/test_data/components/preprocess.yaml @@ -56,7 +56,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index f53f6ae05d..a359b5cc07 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index 59ebc83433..79ca63434f 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -19,7 +19,7 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index 5bcf95a08e..14c23f61ec 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -155,7 +155,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index abc9a2995d..f12b9d071c 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -78,7 +78,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -130,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index b7525f874c..c696d315c5 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -108,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -135,7 +135,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -162,7 +162,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml index ad5e32ce02..c193905629 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml @@ -285,7 +285,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -315,7 +315,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -345,7 +345,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -375,7 +375,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -403,7 +403,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml index 55f5c8ae24..28b8a5bb76 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml @@ -90,7 +90,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -136,7 +136,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index c2d8aae620..2768287ce7 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -158,7 +158,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index af4379d557..5cb736de7a 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -150,7 +150,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -177,7 +177,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -229,7 +229,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index b76f1ad5b6..422002351c 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -251,7 +251,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -277,7 +277,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -303,7 +303,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -330,7 +330,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -357,7 +357,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -383,7 +383,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index 9bc16ff5b2..05cb6b5a25 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -75,7 +75,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -111,7 +111,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index 18fc3aa052..4faf1f4e64 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index 42c88e3a68..1dd3e2eb57 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,7 +129,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -156,7 +156,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -183,7 +183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -210,7 +210,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index 9c8f5e0993..e37138b305 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index 63ce9aceb0..380de8c413 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -188,7 +188,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index ab7d67cac7..70ff1cb7f3 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index 5eed3984a5..9672d77fbc 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -88,7 +88,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -116,7 +116,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -144,7 +144,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 6443b13909..85507fd6f5 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -94,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index 789a1e975d..ccdd19d965 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -41,7 +41,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index b1c6091fe2..9cd106e04b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,7 +65,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -92,7 +92,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -119,7 +119,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index 6753ae29a0..11291ec534 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -57,7 +57,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.1' 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp-dsl==2.0.1' 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -90,7 +90,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index a7678237f6..b6e7bfa5d2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -127,7 +127,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index 13999d852c..68d26df39d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -250,7 +250,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -276,7 +276,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -302,7 +302,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -328,7 +328,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -354,7 +354,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index fbf6dd967b..a63192501f 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,7 +602,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -631,7 +631,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -660,7 +660,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -688,7 +688,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -714,7 +714,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -741,7 +741,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -768,7 +768,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -795,7 +795,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -822,7 +822,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -849,7 +849,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -876,7 +876,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -903,7 +903,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -930,7 +930,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 1aa009e344..24e6cd0c44 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -95,7 +95,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index d2091815bf..3f136829e2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -89,7 +89,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index 3bbec7526c..2777957763 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,7 +125,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -260,7 +260,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -287,7 +287,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index e81a303531..5a21d018f5 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,7 +147,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -175,7 +175,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -313,7 +313,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -340,7 +340,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index 9b601893ed..f92791b406 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -145,7 +145,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -172,7 +172,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -199,7 +199,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index 1cba4dd0a2..b35286eeb9 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,7 +104,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -131,7 +131,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index f1f3a5fa23..cbcb9c86cc 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -205,7 +205,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -257,7 +257,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -283,7 +283,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index 6f31bc7deb..39778a63a2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -128,7 +128,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index 5a313c4ed4..ca1787d667 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,7 +55,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -107,7 +107,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -133,7 +133,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index 34c474435b..d3b92f441f 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,7 +30,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index e53e19ac60..86ad841a3d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -99,7 +99,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index 385cb4a1d4..e688046946 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,7 +35,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -62,7 +62,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/test/presubmit-component-yaml.sh b/test/presubmit-component-yaml.sh index 7bc30f8f63..91aad1ba79 100755 --- a/test/presubmit-component-yaml.sh +++ b/test/presubmit-component-yaml.sh @@ -15,10 +15,8 @@ source_root=$(pwd) -pushd $source_root/sdk/python python3 -m pip install --upgrade pip -python3 -m pip install -r requirements.txt -python3 -m pip install . +source sdk/python/install_from_source.sh # Test loading all component.yaml definitions "$source_root/components/test_load_all_components.sh" diff --git a/test/presubmit-test-kfp-dsl-runtime-code.sh b/test/presubmit-test-kfp-dsl-runtime-code.sh new file mode 100644 index 0000000000..8e4e150553 --- /dev/null +++ b/test/presubmit-test-kfp-dsl-runtime-code.sh @@ -0,0 +1,23 @@ +#!/bin/bash -ex +# Copyright 2023 Kubeflow Pipelines contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source_root=$(pwd) + +pip install --upgrade pip +pip install -e $source_root/sdk/python/kfp-dsl +python3 -m pip install $(grep 'absl-py==' sdk/python/requirements-dev.txt) +python3 -m pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) + +pytest sdk/python/kfp-dsl diff --git a/test/presubmit-test-kfp-kubernetes-library.sh b/test/presubmit-test-kfp-kubernetes-library.sh index 931b03831e..a8225de5f7 100755 --- a/test/presubmit-test-kfp-kubernetes-library.sh +++ b/test/presubmit-test-kfp-kubernetes-library.sh @@ -18,9 +18,7 @@ source_root=$(pwd) pip install --upgrade pip pip install wheel -## start remove: install from PyPI once platform-specific features are released in the KFP SDK -pip install -e "$source_root/sdk/python" -## end remove +pip install 'kfp>=2.0.0,<3.0.0' # generate Python proto code from source apt-get update -y diff --git a/test/presubmit-tests-sdk.sh b/test/presubmit-tests-sdk.sh index 97a6c26444..215f3e07b0 100755 --- a/test/presubmit-tests-sdk.sh +++ b/test/presubmit-tests-sdk.sh @@ -24,12 +24,10 @@ python3 -m pip install $(grep 'pytest-xdist==' sdk/python/requirements-dev.txt) python3 -m pip install $(grep 'pytest-cov==' sdk/python/requirements-dev.txt) python3 -m pip install --upgrade protobuf -pushd "$source_root/sdk/python" -python3 -m pip install -e . -popd # Changing the current directory to the repo root for correct coverall paths +source $source_root/sdk/python/install_from_source.sh # TODO: remove deprecated dependency; then remove --ignore arg -pytest sdk/python/kfp --ignore=sdk/python/kfp/deprecated --cov=kfp +pytest sdk/python/kfp --ignore=sdk/python/kfp/deprecated --cov=kfp set +x # export COVERALLS_REPO_TOKEN=$(gsutil cat gs://ml-pipeline-test-keys/coveralls_repo_token) diff --git a/test/presubmit-tests-tfx.sh b/test/presubmit-tests-tfx.sh index 0136ded812..ecf6517711 100755 --- a/test/presubmit-tests-tfx.sh +++ b/test/presubmit-tests-tfx.sh @@ -30,9 +30,7 @@ pip3 install junit_xml # Using Argo to lint all compiled workflows "${source_root}/test/install-argo-cli.sh" -pushd $source_root/sdk/python -python3 -m pip install -e . -popd # Changing the current directory to the repo root for correct coverall paths +source $source_root/sdk/python/install_from_source.sh # Test against TFX # Compile and setup bazel for compiling the protos From 309c5b9f0abd689ca11a28a010d7f622c9c936cb Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 26 Jul 2023 10:09:02 -0700 Subject: [PATCH 048/253] chore(sdk): release KFP SDK 2.1.0 (#9782) --- docs/conf.py | 9 ++++++++- sdk/RELEASE.md | 12 ++++++++++++ sdk/python/kfp/__init__.py | 2 +- sdk/python/kfp/compiler/compiler_test.py | 3 ++- sdk/python/kfp/compiler/read_write_test.py | 3 ++- sdk/python/requirements.in | 2 +- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 42171b3f67..3505e44b86 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -132,12 +132,19 @@ True, 'version_info': [ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags + { + 'version': + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.0/', + 'title': + '2.1.0', + 'aliases': ['stable'], + }, { 'version': 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.0.1/', 'title': '2.0.1', - 'aliases': ['stable'], + 'aliases': [], }, { 'version': diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 00bda01c27..3d969545e7 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -8,6 +8,18 @@ ## Bug fixes and other changes +## Documentation updates +# 2.1.0 + +## Features +* Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9738](https://github.com/kubeflow/pipelines/pull/9738) + +## Breaking changes + +## Deprecations + +## Bug fixes and other changes + ## Documentation updates # 2.0.1 diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 3c605b82a9..28de27fef1 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.0.1' +__version__ = '2.1.0' TYPE_CHECK = True diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 92b1f6a1b7..597af49cc7 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -1299,7 +1299,8 @@ def ignore_kfp_version_helper(spec: Dict[str, Any]) -> Dict[str, Any]: pipeline_spec['deploymentSpec']['executors'][ executor] = yaml.safe_load( re.sub( - r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", 'kfp', + r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", + 'kfp', yaml.dump( pipeline_spec['deploymentSpec']['executors'] [executor], diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index 29c76db03e..fc4dc7d3e1 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -74,7 +74,8 @@ def ignore_kfp_version_helper(spec: Dict[str, Any]) -> Dict[str, Any]: pipeline_spec['deploymentSpec']['executors'][ executor] = yaml.safe_load( re.sub( - r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", 'kfp', + r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", + 'kfp', yaml.dump( pipeline_spec['deploymentSpec']['executors'] [executor], diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index f76ea1ba17..d2f7b86a54 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -10,7 +10,7 @@ google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0 google-auth>=1.6.1,<3 # https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md#221-2022-03-15 google-cloud-storage>=2.2.1,<3 -kfp-dsl==2.0.1 +kfp-dsl==2.1.0 # pin kfp-pipeline-spec to an exact version, since this is the contract between a given KFP SDK version and the BE. we don't want old version of the SDK to write new fields and to have the BE reject the new unsupported field (even if the new field backward compatible from a proto perspective) kfp-pipeline-spec==0.2.2 # Update the upper version whenever a new major version of the From 8ad97167ff9ea589d396728fdec4413fd559ade1 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 26 Jul 2023 15:41:51 -0700 Subject: [PATCH 049/253] fix(sdk): various kfp-dsl fixes (#9785) * specify kfp-dsl version directly * make presubmit test executable * add yaml requirement * assorted test and import fixes --- sdk/python/kfp-dsl/kfp/dsl/structures.py | 13 ++- .../kfp-dsl/kfp/dsl/types/type_utils.py | 83 ++++++------------- .../execute_commands_args_test.py | 9 +- sdk/python/kfp-dsl/setup.py | 25 +----- test/presubmit-test-kfp-dsl-runtime-code.sh | 5 +- 5 files changed, 42 insertions(+), 93 deletions(-) mode change 100644 => 100755 test/presubmit-test-kfp-dsl-runtime-code.sh diff --git a/sdk/python/kfp-dsl/kfp/dsl/structures.py b/sdk/python/kfp-dsl/kfp/dsl/structures.py index 941bff7a07..f3a379ea39 100644 --- a/sdk/python/kfp-dsl/kfp/dsl/structures.py +++ b/sdk/python/kfp-dsl/kfp/dsl/structures.py @@ -675,7 +675,6 @@ def from_v1_component_spec( from kfp.pipeline_spec import pipeline_spec_pb2 except ImportError as e: raise ImportError(dsl._kfp_dsl_import_error_msg) from e - parameter_types_mapping = type_utils.get_parameter_types_mapping() for spec in component_dict.get('inputs', []): type_ = spec.get('type') @@ -692,9 +691,9 @@ def from_v1_component_spec( type=type_, optional=True) continue - elif isinstance(type_, - str) and type_.lower() in parameter_types_mapping: - type_enum = parameter_types_mapping[type_.lower()] + elif isinstance(type_, str) and type_.lower( + ) in type_utils.PARAMETER_TYPES_MAPPING: + type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( type_enum) in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ @@ -745,9 +744,9 @@ def from_v1_component_spec( if isinstance(type_, str): type_ = type_utils.get_canonical_name_for_outer_generic(type_) - if isinstance(type_, - str) and type_.lower() in parameter_types_mapping: - type_enum = parameter_types_mapping[type_.lower()] + if isinstance(type_, str) and type_.lower( + ) in type_utils.PARAMETER_TYPES_MAPPING: + type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( type_enum) in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py b/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py index e27fb234f9..6f07fbfcb8 100644 --- a/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py +++ b/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py @@ -43,32 +43,32 @@ _GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$' _GOOGLE_TYPES_VERSION = DEFAULT_ARTIFACT_SCHEMA_VERSION - # ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping. # The keys are normalized (lowercased). These are types viewed as Parameters. # The values are the corresponding IR parameter primitive types. -def get_parameter_types_mapping(): - - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - return { - 'integer': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, - 'int': pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, - 'double': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, - 'float': pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, - 'string': pipeline_spec_pb2.ParameterType.STRING, - 'str': pipeline_spec_pb2.ParameterType.STRING, - 'text': pipeline_spec_pb2.ParameterType.STRING, - 'bool': pipeline_spec_pb2.ParameterType.BOOLEAN, - 'boolean': pipeline_spec_pb2.ParameterType.BOOLEAN, - 'dict': pipeline_spec_pb2.ParameterType.STRUCT, - 'list': pipeline_spec_pb2.ParameterType.LIST, - 'jsonobject': pipeline_spec_pb2.ParameterType.STRUCT, - 'jsonarray': pipeline_spec_pb2.ParameterType.LIST, - } +# pipeline_spec_pb2.ParameterType enum values +NUMBER_DOUBLE = 1 +NUMBER_INTEGER = 2 +STRING = 3 +BOOLEAN = 4 +LIST = 5 +STRUCT = 6 +PARAMETER_TYPES_MAPPING = { + 'integer': 2, + 'int': NUMBER_INTEGER, + 'double': NUMBER_DOUBLE, + 'float': NUMBER_DOUBLE, + 'string': STRING, + 'str': STRING, + 'text': STRING, + 'bool': BOOLEAN, + 'boolean': BOOLEAN, + 'dict': STRUCT, + 'list': LIST, + 'jsonobject': STRUCT, + 'jsonarray': LIST, +} def bool_cast_fn(default: Union[str, bool]) -> bool: @@ -145,8 +145,8 @@ def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool: else: return False - return type_name.lower() in get_parameter_types_mapping( - ) or is_task_final_status_type(type_name) + return type_name.lower( + ) in PARAMETER_TYPES_MAPPING or is_task_final_status_type(type_name) def bundled_artifact_to_artifact_proto( @@ -190,7 +190,7 @@ def get_parameter_type( type_name = list(param_type.keys())[0] else: type_name = type_annotations.get_short_type_name(str(param_type)) - return get_parameter_types_mapping().get(type_name.lower()) + return PARAMETER_TYPES_MAPPING.get(type_name.lower()) def get_parameter_type_name( @@ -205,39 +205,6 @@ def get_parameter_type_name( get_parameter_type(param_type)) -def get_parameter_type_field_name(type_name: Optional[str]) -> Optional[str]: - """Get the IR field name for the given primitive type. - - For example: 'str' -> 'string_value', 'double' -> 'double_value', etc. - - Args: - type_name: type name of the ComponentSpec I/O primitive type. - - Returns: - The IR value reference field name. - - Raises: - AttributeError: if type_name is not a string type. - """ - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - # Mapping primitive types to their IR message field names. - # This is used in constructing condition strings. - _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = { - pipeline_spec_pb2.ParameterType.NUMBER_INTEGER: 'number_value', - pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE: 'number_value', - pipeline_spec_pb2.ParameterType.STRING: 'string_value', - pipeline_spec_pb2.ParameterType.BOOLEAN: 'bool_value', - pipeline_spec_pb2.ParameterType.STRUCT: 'struct_value', - pipeline_spec_pb2.ParameterType.LIST: 'list_value', - } - return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get( - get_parameter_type(type_name)) - - class InconsistentTypeException(Exception): """InconsistencyTypeException is raised when two types are not consistent.""" diff --git a/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py b/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py index c661fb6e00..084a1f204f 100644 --- a/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py +++ b/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py @@ -14,6 +14,7 @@ import dataclasses import json import os +import re import shutil import subprocess import tempfile @@ -23,7 +24,6 @@ import yaml TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data') -TMP_DIR = tempfile.mkdtemp() @dataclasses.dataclass @@ -107,8 +107,13 @@ def run_commands_and_args( config.executor_name]['container'] command_and_args = container['command'] + container['args'] + command_and_args = [ + re.sub(r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", + 'kfp-dsl', cmd) for cmd in command_and_args + ] + executor_input_json = json.dumps(config.executor_input).replace( - '/gcs/', TMP_DIR) + '/gcs/', temp_dir) command_and_args = [ v.replace('{{$}}', executor_input_json) for v in command_and_args ] diff --git a/sdk/python/kfp-dsl/setup.py b/sdk/python/kfp-dsl/setup.py index dbd6ade974..005908940b 100644 --- a/sdk/python/kfp-dsl/setup.py +++ b/sdk/python/kfp-dsl/setup.py @@ -12,34 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import re - import setuptools - -def find_version(*file_path_parts: str) -> str: - """Get version from a file that defines a __version__ variable.""" - - file_path = os.path.join(os.path.dirname(__file__), *file_path_parts) - with open(file_path, 'r') as f: - version_file_text = f.read() - - version_match = re.search( - r"^__version__ = ['\"]([^'\"]*)['\"]", - version_file_text, - re.M, - ) - if version_match: - return version_match.group(1) - - raise RuntimeError(f'Unable to find version string in file: {file_path}.') - - setuptools.setup( name='kfp-dsl', - version=find_version( - os.path.dirname(os.path.dirname(__file__)), 'kfp', '__init__.py'), + version='2.1.0', description='A KFP SDK subpackage containing the DSL and runtime code.', author='google', author_email='kubeflow-pipelines@google.com', diff --git a/test/presubmit-test-kfp-dsl-runtime-code.sh b/test/presubmit-test-kfp-dsl-runtime-code.sh old mode 100644 new mode 100755 index 8e4e150553..d8c94dc06b --- a/test/presubmit-test-kfp-dsl-runtime-code.sh +++ b/test/presubmit-test-kfp-dsl-runtime-code.sh @@ -17,7 +17,8 @@ source_root=$(pwd) pip install --upgrade pip pip install -e $source_root/sdk/python/kfp-dsl -python3 -m pip install $(grep 'absl-py==' sdk/python/requirements-dev.txt) -python3 -m pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) +pip install pyyaml +pip install $(grep 'absl-py==' sdk/python/requirements-dev.txt) +pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) pytest sdk/python/kfp-dsl From 49db9effe4b95ec168590ae37b4e58af76484efb Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 26 Jul 2023 17:26:51 -0700 Subject: [PATCH 050/253] chore(sdk): release KFP SDK 2.1.1 (#9787) --- docs/conf.py | 4 ++-- sdk/RELEASE.md | 2 +- sdk/python/kfp-dsl/setup.py | 2 +- sdk/python/kfp/__init__.py | 2 +- sdk/python/requirements.in | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 3505e44b86..8aa9576e05 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -134,9 +134,9 @@ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags { 'version': - 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.0/', + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.1/', 'title': - '2.1.0', + '2.1.1', 'aliases': ['stable'], }, { diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 3d969545e7..1da8922c62 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -9,7 +9,7 @@ ## Bug fixes and other changes ## Documentation updates -# 2.1.0 +# 2.1.1 ## Features * Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9738](https://github.com/kubeflow/pipelines/pull/9738) diff --git a/sdk/python/kfp-dsl/setup.py b/sdk/python/kfp-dsl/setup.py index 005908940b..809bce3b10 100644 --- a/sdk/python/kfp-dsl/setup.py +++ b/sdk/python/kfp-dsl/setup.py @@ -16,7 +16,7 @@ setuptools.setup( name='kfp-dsl', - version='2.1.0', + version='2.1.1', description='A KFP SDK subpackage containing the DSL and runtime code.', author='google', author_email='kubeflow-pipelines@google.com', diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 28de27fef1..0e9ad528f9 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.1.0' +__version__ = '2.1.1' TYPE_CHECK = True diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index d2f7b86a54..9f351c1e30 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -10,7 +10,7 @@ google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0 google-auth>=1.6.1,<3 # https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md#221-2022-03-15 google-cloud-storage>=2.2.1,<3 -kfp-dsl==2.1.0 +kfp-dsl==2.1.1 # pin kfp-pipeline-spec to an exact version, since this is the contract between a given KFP SDK version and the BE. we don't want old version of the SDK to write new fields and to have the BE reject the new unsupported field (even if the new field backward compatible from a proto perspective) kfp-pipeline-spec==0.2.2 # Update the upper version whenever a new major version of the From 46fafe3798010943c75c9a08bd594f1be8e97aa5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 26 Jul 2023 18:52:53 -0700 Subject: [PATCH 051/253] chore(components): add project_id and location placeholders as GCPC implementation utility PiperOrigin-RevId: 551377458 --- .../_placeholders.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_placeholders.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_placeholders.py b/components/google-cloud/google_cloud_pipeline_components/_placeholders.py new file mode 100644 index 0000000000..44dd3cae8a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_placeholders.py @@ -0,0 +1,18 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Placeholders for use in component authoring.""" + +# prefer not using placeholder suffix like KFP does for reduce verbosity +PROJECT_ID_PLACEHOLDER = "{{$.pipeline_google_cloud_project_id}}" +LOCATION_PLACEHOLDER = "{{$.pipeline_google_cloud_location}}" From 7c55a4f5f5333ed606758c744167df113540586e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 26 Jul 2023 22:22:12 -0700 Subject: [PATCH 052/253] chore(components): release GCPC v2.1.1 PiperOrigin-RevId: 551413553 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 3 +++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 0df9dedf2f..71ad870a43 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index c02e27f5a7..4c5cc61e66 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,8 @@ ## Upcoming release +## Release 2.1.1 +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.1.0 * Add AutoML tabular and forecasting components to `preview` namespace * Fix bug where `parent_model` parameter of `ModelUploadOp` ignored diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index c600b1005b..bbdf86081e 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.1", + "title": "2.1.1", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.0", "title": "2.1.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 50bf7a59b5..d60161b3c4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.1.0" +__version__ = "2.1.1" From cc829bd7aba3cc44df0fa299a6cae45a7af5a8e9 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 27 Jul 2023 14:12:51 -0700 Subject: [PATCH 053/253] chore(components): release GCPC v2.1.1 PiperOrigin-RevId: 551637082 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 3 --- components/google-cloud/docs/source/versions.json | 5 ----- .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 71ad870a43..0df9dedf2f 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 4c5cc61e66..c02e27f5a7 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,8 +1,5 @@ ## Upcoming release -## Release 2.1.1 -* Apply latest GCPC image vulnerability resolutions (base OS and software updates) - ## Release 2.1.0 * Add AutoML tabular and forecasting components to `preview` namespace * Fix bug where `parent_model` parameter of `ModelUploadOp` ignored diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index bbdf86081e..c600b1005b 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,9 +1,4 @@ [ - { - "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.1", - "title": "2.1.1", - "aliases": [] - }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.0", "title": "2.1.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index d60161b3c4..50bf7a59b5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.1.1" +__version__ = "2.1.0" From 8dd9bd6e27ef4df8ba5818b177c776a82837b3da Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Thu, 27 Jul 2023 14:16:54 -0700 Subject: [PATCH 054/253] chore(components): Update evaluation component type hints to be Python3.7 compatible PiperOrigin-RevId: 551638206 --- .../data_sampler/component.py | 4 +++- .../dataset_preprocessor/component.py | 6 +++-- .../import_evaluation/component.py | 10 ++++----- .../llm_evaluation/component.py | 5 +++-- .../target_field_data_remover/component.py | 4 +++- .../_implementation/model_evaluation/utils.py | 22 +++++++++---------- .../model_evaluation/data_bias_component.py | 9 +++++--- .../classification_component.py | 10 +++++---- .../error_analysis_pipeline.py | 6 +++-- .../evaluated_annotation_pipeline.py | 4 +++- ...ml_tabular_feature_attribution_pipeline.py | 10 +++++---- .../evaluation_automl_tabular_pipeline.py | 7 +++--- ...uation_automl_unstructure_data_pipeline.py | 6 +++-- ...evaluation_feature_attribution_pipeline.py | 8 ++++--- .../model_evaluation/forecasting_component.py | 6 +++-- .../model_evaluation/regression_component.py | 4 +++- 16 files changed, 74 insertions(+), 47 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py index 7634caa091..d974ff82c5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/data_sampler/component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -28,7 +30,7 @@ def evaluation_data_sampler( gcs_output_directory: OutputPath(list), project: str, location: str = 'us-central1', - gcs_source_uris: list = [], + gcs_source_uris: List[str] = [], bigquery_source_uri: str = '', instances_format: str = 'jsonl', sample_size: int = 10000, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py index 6d15b53003..50fc8a05a5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/dataset_preprocessor/component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp.dsl import container_component @@ -37,8 +39,8 @@ def dataset_preprocessor_error_analysis( test_dataset_annotation_set_name: str = '', training_dataset: Input[VertexDataset] = None, training_dataset_annotation_set_name: str = '', - test_dataset_storage_source_uris: list = [], - training_dataset_storage_source_uris: list = [], + test_dataset_storage_source_uris: List[str] = [], + training_dataset_storage_source_uris: List[str] = [], ): # fmt: off """Preprocesses datasets for Vision Error Analysis pipelines. diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py index b7be65d5c1..5c98c4b950 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Optional +from typing import List, Optional from google_cloud_pipeline_components import _image from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics @@ -40,10 +40,10 @@ def model_evaluation_import( summarization_metrics: Optional[Input[Metrics]] = None, explanation: Optional[Input[Metrics]] = None, feature_attributions: Optional[Input[Metrics]] = None, - display_name: Optional[str] = "", - dataset_path: Optional[str] = "", - dataset_paths: Optional[list] = [], - dataset_type: Optional[str] = "", + display_name: str = "", + dataset_path: str = "", + dataset_paths: List[str] = [], + dataset_type: str = "", ): # fmt: off """Imports a model evaluation artifact to an existing Vertex model with diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py index 203bbf00c4..0711af300b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -11,9 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Text Generation LLM Evaluation component.""" +from typing import List + from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version @@ -39,7 +40,7 @@ def model_evaluation_text_generation( service_account: str = '', enable_web_access: bool = True, network: str = '', - reserved_ip_ranges: list = [], + reserved_ip_ranges: List[str] = [], encryption_spec_key_name: str = '', ): """Computes evaluation metrics of a text generation model. diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py index 0673671a79..5b61856076 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/target_field_data_remover/component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -28,7 +30,7 @@ def target_field_data_remover( gcs_output_directory: OutputPath(list), project: str, location: str = 'us-central1', - gcs_source_uris: list = [], + gcs_source_uris: List[str] = [], bigquery_source_uri: str = '', instances_format: str = 'jsonl', target_field_name: str = 'ground_truth', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py index d1cbcb32d4..94b13278b7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py @@ -1,6 +1,6 @@ """Utility functions used to create custom Kubeflow components.""" -from typing import Any +from typing import Any, Dict, List from google_cloud_pipeline_components import _image @@ -9,30 +9,26 @@ def build_custom_job_payload( *, display_name: str, image_uri: str, - args: list[str], + args: List[str], machine_type: str = 'n1-standard-4', service_account: str = '', network: str = '', - reserved_ip_ranges: list[str] = [], + reserved_ip_ranges: List[str] = [], enable_web_access: bool = False, - encryption_spec_key_name: str = '', accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED', accelerator_count: int = 0, -) -> dict[str, Any]: + encryption_spec_key_name: str = '', +) -> Dict[str, Any]: """Generates payload for a CustomJob in a Sec4 horizontal compliant way. Args: display_name: CustomJob display name. Can contain up to 128 UTF-8 characters. + image_uri: Docker image URI to use for the CustomJob. + args: Arguments to pass to the Docker image. machine_type: The type of the machine. See the list of machine types supported for custom training: https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types - accelerator_type: The type of accelerator(s) that may be attached to the - machine as per acceleratorCount. - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType - accelerator_count: The number of accelerators to attach to the machine. - image_uri: Docker image URI to use for the CustomJob. - args: Arguments to pass to the Docker image. service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) @@ -54,6 +50,10 @@ def build_custom_job_payload( access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If set to `true`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + accelerator_type: The type of accelerator(s) that may be attached to the + machine as per acceleratorCount. + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType + accelerator_count: The number of accelerators to attach to the machine. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py index 9ddad08757..05a10896c4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Any, List + from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp.dsl import Artifact @@ -30,12 +33,12 @@ def detect_data_bias( data_bias_metrics: Output[Artifact], project: str, target_field_name: str, - bias_configs: list, + bias_configs: List[Any], location: str = 'us-central1', dataset_format: str = 'jsonl', - dataset_storage_source_uris: list = [], + dataset_storage_source_uris: List[str] = [], dataset: Input[VertexDataset] = None, - columns: list = [], + columns: List[str] = [], encryption_spec_key_name: str = '', ): # fmt: off diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py index 9f865cbb7b..b5c2fcc28d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, List + from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics @@ -32,14 +34,14 @@ def model_evaluation_classification( predictions_gcs_source: dsl.Input[dsl.Artifact] = None, predictions_bigquery_source: dsl.Input[BQTable] = None, ground_truth_format: str = 'jsonl', - ground_truth_gcs_source: list = [], + ground_truth_gcs_source: List[str] = [], ground_truth_bigquery_source: str = '', classification_type: str = 'multiclass', - class_labels: list = [], + class_labels: List[str] = [], prediction_score_column: str = 'prediction.scores', prediction_label_column: str = 'prediction.classes', - slicing_specs: list = [], - positive_classes: list = [], + slicing_specs: List[Any] = [], + positive_classes: List[str] = [], dataflow_service_account: str = '', dataflow_disk_size_gb: int = 50, dataflow_machine_type: str = 'n1-standard-4', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index 881b3d4ef3..ad340aad7f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ErrorAnalysisAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluatedAnnotationOp @@ -36,8 +38,8 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v test_dataset_annotation_set_name: str = '', training_dataset_resource_name: str = '', training_dataset_annotation_set_name: str = '', - test_dataset_storage_source_uris: list = [], - training_dataset_storage_source_uris: list = [], + test_dataset_storage_source_uris: List[str] = [], + training_dataset_storage_source_uris: List[str] = [], batch_predict_instances_format: str = 'jsonl', batch_predict_predictions_format: str = 'jsonl', batch_predict_machine_type: str = 'n1-standard-32', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index fdd02830f1..9ee8b9c95d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDatasetPreprocessorOp as DatasetPreprocessorOp @@ -31,7 +33,7 @@ def evaluated_annotation_pipeline( batch_predict_gcs_destination_output_uri: str, test_dataset_resource_name: str = '', test_dataset_annotation_set_name: str = '', - test_dataset_storage_source_uris: list = [], + test_dataset_storage_source_uris: List[str] = [], batch_predict_instances_format: str = 'jsonl', batch_predict_predictions_format: str = 'jsonl', batch_predict_machine_type: str = 'n1-standard-32', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index b2d1e7d3e4..9fa4943fdf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp @@ -32,19 +34,19 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d target_field_name: str, batch_predict_instances_format: str, batch_predict_gcs_destination_output_uri: str, - batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic batch_predict_bigquery_source_uri: str = '', batch_predict_predictions_format: str = 'jsonl', batch_predict_bigquery_destination_output_uri: str = '', batch_predict_machine_type: str = 'n1-standard-16', batch_predict_starting_replica_count: int = 5, batch_predict_max_replica_count: int = 10, - batch_predict_explanation_metadata: dict = {}, # pylint: disable=g-bare-generic - batch_predict_explanation_parameters: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic batch_predict_explanation_data_sample_size: int = 10000, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, - slicing_specs: list = [], # pylint: disable=g-bare-generic + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index 0d2b7196b2..82d146d591 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import NamedTuple +from typing import Any, List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp @@ -31,7 +32,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val target_field_name: str, batch_predict_instances_format: str, batch_predict_gcs_destination_output_uri: str, - batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic batch_predict_bigquery_source_uri: str = '', batch_predict_predictions_format: str = 'jsonl', batch_predict_bigquery_destination_output_uri: str = '', @@ -40,7 +41,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val batch_predict_max_replica_count: int = 10, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, - slicing_specs: list = [], # pylint: disable=g-bare-generic + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index 89b3b4da43..61beb34b59 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp @@ -32,7 +34,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de target_field_name: str, batch_predict_instances_format: str, batch_predict_gcs_destination_output_uri: str, - batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic batch_predict_bigquery_source_uri: str = '', batch_predict_predictions_format: str = 'jsonl', batch_predict_bigquery_destination_output_uri: str = '', @@ -43,7 +45,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de batch_predict_accelerator_count: int = 0, evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', - evaluation_class_labels: list = [], # pylint: disable=g-bare-generic + evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 87f0d9666e..3270ce8a85 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict, List + from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp @@ -33,15 +35,15 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul target_field_name: str, batch_predict_instances_format: str, batch_predict_gcs_destination_output_uri: str, - batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic batch_predict_bigquery_source_uri: str = '', batch_predict_predictions_format: str = 'jsonl', batch_predict_bigquery_destination_output_uri: str = '', batch_predict_machine_type: str = 'n1-standard-16', batch_predict_starting_replica_count: int = 5, batch_predict_max_replica_count: int = 10, - batch_predict_explanation_metadata: dict = {}, # pylint: disable=g-bare-generic - batch_predict_explanation_parameters: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic batch_predict_explanation_data_sample_size: int = 10000, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py index 66cfac91aa..a8f165e997 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ForecastingMetrics @@ -32,10 +34,10 @@ def model_evaluation_forecasting( predictions_gcs_source: dsl.Input[dsl.Artifact] = None, predictions_bigquery_source: dsl.Input[BQTable] = None, ground_truth_format: str = 'jsonl', - ground_truth_gcs_source: list = [], + ground_truth_gcs_source: List[str] = [], ground_truth_bigquery_source: str = '', forecasting_type: str = 'point', - forecasting_quantiles: list = [], + forecasting_quantiles: List[float] = [], point_evaluation_quantile: float = 0.5, prediction_score_column: str = 'prediction.value', dataflow_service_account: str = '', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py index 13a69ef64f..b39557a4fb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List + from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics @@ -32,7 +34,7 @@ def model_evaluation_regression( predictions_gcs_source: dsl.Input[dsl.Artifact] = None, predictions_bigquery_source: dsl.Input[BQTable] = None, ground_truth_format: str = 'jsonl', - ground_truth_gcs_source: list = [], + ground_truth_gcs_source: List[str] = [], ground_truth_bigquery_source: str = '', prediction_score_column: str = 'prediction.value', dataflow_service_account: str = '', From 2cfe4636ae362efff177329143feb6f7f6f6a8a5 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Thu, 27 Jul 2023 14:54:14 -0700 Subject: [PATCH 055/253] fix(components): Update package import for google protobuf PiperOrigin-RevId: 551648465 --- .../preview/model_evaluation/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py index 8157976ebe..5dbb96cd86 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Union from google.cloud.aiplatform_v1.types.model_evaluation_slice import ModelEvaluationSlice + +from google.protobuf.wrappers_pb2 import BoolValue from google.protobuf import json_format -from google.protobuf import wrappers_pb2 def create_slice_specs_list( @@ -46,7 +47,7 @@ def create_slice_specs_list( if isinstance(value, bool): # Bool must be checked first, bool is a child of int in Python. configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( - all_values=wrappers_pb2.BoolValue(value=value) + all_values=BoolValue(value=value) ) elif isinstance(value, int) or isinstance(value, float): configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( From ae804f471cb5ad7e4ba70ee44bf958a4a909d2a9 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 27 Jul 2023 15:00:39 -0700 Subject: [PATCH 056/253] feat(components): Implement LLM Safety Bias Component & E2E tests PiperOrigin-RevId: 551650193 --- .../model_evaluation/__init__.py | 2 + .../model_evaluation/rai_safety/__init__.py | 14 ++ .../rai_safety/llm_safety_bias/__init__.py | 14 ++ .../rai_safety/llm_safety_bias/component.py | 103 ++++++++++++ .../rai_safety/safety_metrics_pipeline.py | 159 ++++++++++++++++++ 5 files changed, 292 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 798f3596bc..bc66ca0531 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -23,6 +23,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as ModelEvaluationTextGenerationOp +from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_metrics_bias as SafetyMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -35,5 +36,6 @@ 'ModelEvaluationTextGenerationOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', + 'SafetyMetricsOp', 'TargetFieldDataRemoverOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py new file mode 100644 index 0000000000..5175407c81 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline RAI Components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py new file mode 100644 index 0000000000..10ad4cbf18 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline LLM Safety Bias Components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py new file mode 100644 index 0000000000..4fd3f7ce08 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py @@ -0,0 +1,103 @@ +"""Python LLM Safety Model Evaluation component used in KFP pipelines.""" + +from typing import List, Optional + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp.dsl import Artifact +from kfp.dsl import container_component +from kfp.dsl import Output +from kfp.dsl import OutputPath + + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:lakeyk-test' +# TODO(b/293198435): Update to publicly released image. + + +@container_component +def llm_safety_metrics_bias( + gcp_resources: OutputPath(str), + bias_llm_metrics: Output[Artifact], + project: str, + location: str = 'us-central1', + slice_spec_gcs_source: str = '', + predictions_gcs_source: str = '', + display_name: str = 'llm_safety_bias_component', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + enable_web_access: bool = True, + network: str = '', + reserved_ip_ranges: Optional[List[str]] = None, + encryption_spec_key_name: str = '', +): + """Reports aggregated safety metrics from a model's predictions based on specified data slices. + + Args: + project (str): Required. Project to run the component. + location (Optional[str]): Location for running the component. If not set, + defaulted to `us-central1`. + slice_spec_gcs_source (Optional[str]): Google Cloud Storage location to + file with jsonl slice spec definition. + predictions_gcs_source (Optional[str]): A storage URI pointing toward a + GCS file or directory with prediction results to be used for this + evaluation. + display_name (Optional[str]): The name of the Evaluation job. + machine_type (Optional[str]): The machine type of this custom job. If not + set, defaulted to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account (Optional[str]): Sets the default service account for + workload run-as account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + enable_web_access (Optional[bool]): Whether you want Vertex AI to enable + [interactive shell + access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) + to training containers. If set to `true`, you can access interactive + shells at the URIs given by [CustomJob.web_access_uris][]. + network (Optional[str]): The full name of the Compute Engine network to + which the job should be peered. For example, + projects/12345/global/networks/myVPC. Format is of the form + projects/{project}/global/networks/{network}. Where {project} is a + project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + reserved_ip_ranges (Optional[Sequence[str]]): A list of names for the + reserved ip ranges under the VPC network that can be used for this job. + If set, we will deploy the job within the provided ip ranges. Otherwise, + the job will be deployed to any ip ranges under the provided VPC + network. + encryption_spec_key_name (Optional[str]): Customer-managed encryption key + options for the CustomJob. If this is set, then all resources created by + the CustomJob will be encrypted with the provided encryption key. + + Returns: + bias_llm_metrics (system.Artifact): + Artifact tracking the LLM model bias detection output. + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--safety_metrics={True}', + f'--predictions_gcs_source={predictions_gcs_source}', + f'--slice_spec_gcs_source={slice_spec_gcs_source}', + f'--bias_llm_metrics={bias_llm_metrics.path}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + reserved_ip_ranges=reserved_ip_ranges, + enable_web_access=enable_web_access, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py new file mode 100644 index 0000000000..95f39921cd --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py @@ -0,0 +1,159 @@ +"""Vertex LLM safety metrics pipeline.""" + +import sys + +import kfp +from vertexevaluation.llm.component import function_based +from vertexevaluation.llm.component.batch_predict import model_batch_predict +from google_cloud_pipeline_components._implementation.model_evaluation import SafetyMetricsOp +from vertexevaluation.llm.pipelines import utils + + +@kfp.dsl.pipeline(name='llm-safety-eval-pipeline') +def llm_safety_eval_pipeline( + project: str, + model_name: str, + batch_predict_gcs_destination_output_uri: str, + slice_spec_gcs_source: str = '', + location: str = 'us-central1', + batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_instances_format: str = 'jsonl', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + machine_type: str = 'n1-standard-4', + service_account: str = '', + enable_web_access: bool = True, + network: str = '', + reserved_ip_ranges: list = [], # pylint: disable=g-bare-generic + encryption_spec_key_name: str = '', +): + """The LLM Data Slicing and Safety Metrics Evaluation pipeline with batch prediction. + + Args: + project: Required. Project to run the component. + model_name: The Model name used to get predictions via this job. Must share + the same ancestor location. Starting this job has no impact on any + existing deployments of the Model and their resources. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + slice_spec_gcs_source: The Google Cloud Storage location of the file where + the slice spec definition is located. + location: Location for running the component. If not set, defaulted to + `us-central1`. + batch_predict_gcs_source_uris: The Google Cloud Storage batch predict source + locations. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which predictions are given, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per `accelerator_count`. Only used if + `machine_type` is set. For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + `machine_type`. Only used if `machine_type` is set. For more details + about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Optional. Service account to run the dataflow job. If not + set, dataflow will use the default worker service account. For more + details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + enable_web_access (Optional[bool]): Whether you want Vertex AI to enable + [interactive shell access] + https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell + to training containers. If set to `true`, you can access interactive + shells at the URIs given by [CustomJob.web_access_uris][]. + network: Dataflow's fully qualified subnetwork name, when empty the default + subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + reserved_ip_ranges: The reserved ip ranges. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + """ + + batch_predict_task = model_batch_predict( + project=project, + location=location, + model=model_name, + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=batch_predict_gcs_source_uris, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + encryption_spec_key_name=encryption_spec_key_name, + accelerator_type=batch_predict_accelerator_type, + accelerator_count=batch_predict_accelerator_count, + ) + + converter_task = function_based.convert_artifact_to_string( + input_artifact=batch_predict_task.outputs['gcs_output_directory'] + ) + + SafetyMetricsOp( + project=project, + predictions_gcs_source=converter_task.output, + slice_spec_gcs_source=slice_spec_gcs_source, + location=location, + machine_type=machine_type, + service_account=service_account, + enable_web_access=enable_web_access, + network=network, + reserved_ip_ranges=reserved_ip_ranges, + encryption_spec_key_name=encryption_spec_key_name + ) + + +def main(argv: list[str]) -> None: + parsed_args = utils.parse_args('llm_safety_eval_pipeline', argv) + + parameters = utils.get_parameters_from_input_args_for_pipeline( + parsed_args, llm_safety_eval_pipeline + ) + + parameters.update( + { + 'batch_predict_gcs_source_uris': [ + 'gs://lakeyk-llm-test/golden_dataset/adversarial_with_gender_identity_1k_col_renamed.jsonl' + ] + } + ) + + job = utils.run_pipeline( + llm_safety_eval_pipeline, + parameters=parameters, + project=parameters['project'], + location=parameters['location'], + pipeline_root=parameters['batch_predict_gcs_destination_output_uri'], + ) + + if parsed_args.wait: + job.wait() + + +if __name__ == '__main__': + main(sys.argv) From 540294aedb9622b13063fdbee287411e68ba656a Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 27 Jul 2023 16:02:32 -0700 Subject: [PATCH 057/253] fix(sdk): fix GCPC break in KFP SDK (#9791) --- sdk/python/kfp/components/__init__.py | 4 +++ sdk/python/kfp/v2/__init__.py | 6 ++-- test/gcpc-tests/run_all_gcpc_modules.py | 38 +++++++++++++++++++++ test/presubmit-test-run-all-gcpc-modules.sh | 23 +++++++++++++ 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 test/gcpc-tests/run_all_gcpc_modules.py create mode 100755 test/presubmit-test-run-all-gcpc-modules.sh diff --git a/sdk/python/kfp/components/__init__.py b/sdk/python/kfp/components/__init__.py index 005c43f3cd..606fe843dd 100644 --- a/sdk/python/kfp/components/__init__.py +++ b/sdk/python/kfp/components/__init__.py @@ -27,6 +27,10 @@ from kfp.components.load_yaml_utilities import load_component_from_file from kfp.components.load_yaml_utilities import load_component_from_text from kfp.components.load_yaml_utilities import load_component_from_url +# keep this for backward compatibility with user code "from kfp.components import placholders" and similar +from kfp.dsl import base_component # noqa: keep unused import +from kfp.dsl import placeholders # noqa: keep unused import +# from kfp.dsl.base_component import BaseComponent from kfp.dsl.container_component_class import ContainerComponent from kfp.dsl.python_component import PythonComponent diff --git a/sdk/python/kfp/v2/__init__.py b/sdk/python/kfp/v2/__init__.py index 82c9014ab9..fe854231ae 100644 --- a/sdk/python/kfp/v2/__init__.py +++ b/sdk/python/kfp/v2/__init__.py @@ -21,6 +21,6 @@ category=DeprecationWarning, stacklevel=2) -from kfp import compiler -from kfp import components -from kfp import dsl +from kfp import compiler # noqa: keep unused import +from kfp import components # noqa: keep unused import +from kfp import dsl # noqa: keep unused import diff --git a/test/gcpc-tests/run_all_gcpc_modules.py b/test/gcpc-tests/run_all_gcpc_modules.py new file mode 100644 index 0000000000..053fe266f4 --- /dev/null +++ b/test/gcpc-tests/run_all_gcpc_modules.py @@ -0,0 +1,38 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test running all GCPC modules.""" +import importlib +import pkgutil +import unittest + + +def run_all_modules(package_name: str) -> None: + package = importlib.import_module(package_name) + for _, module_name, ispkg in pkgutil.walk_packages(package.__path__): + # use dots to avoid false positives on packages with google in name + # and train test split packages + if '.test' in package_name: + continue + if ispkg: + run_all_modules(f'{package_name}.{module_name}') + else: + importlib.import_module(f'{package_name}.{module_name}') + print(f'Successfully ran: {package_name}') + + +class TestRunAllGCPCModules(unittest.TestCase): + + def test_run_all_modules(self): + run_all_modules('google_cloud_pipeline_components.preview') + run_all_modules('google_cloud_pipeline_components.v1') diff --git a/test/presubmit-test-run-all-gcpc-modules.sh b/test/presubmit-test-run-all-gcpc-modules.sh new file mode 100755 index 0000000000..a8a1e05226 --- /dev/null +++ b/test/presubmit-test-run-all-gcpc-modules.sh @@ -0,0 +1,23 @@ +#!/bin/bash -ex +# Copyright 2023 Kubeflow Pipelines contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source_root=$(pwd) + +pip install --upgrade pip +source $source_root/sdk/python/install_from_source.sh +pip install components/google-cloud +pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) + +pytest test/gcpc-tests/run_all_gcpc_modules.py From 25f6ee63895d89859bd0f652a82e3fd27462119e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 28 Jul 2023 09:47:34 -0700 Subject: [PATCH 058/253] chore(sdk): release KFP SDK 2.1.2 (#9797) --- docs/conf.py | 2 +- sdk/RELEASE.md | 2 +- sdk/python/kfp-dsl/setup.py | 2 +- sdk/python/kfp/__init__.py | 2 +- sdk/python/requirements.in | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8aa9576e05..fabc7e26ae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -136,7 +136,7 @@ 'version': 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.1/', 'title': - '2.1.1', + '2.1.2', 'aliases': ['stable'], }, { diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 1da8922c62..a30a3d809d 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -9,7 +9,7 @@ ## Bug fixes and other changes ## Documentation updates -# 2.1.1 +# 2.1.2 ## Features * Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9738](https://github.com/kubeflow/pipelines/pull/9738) diff --git a/sdk/python/kfp-dsl/setup.py b/sdk/python/kfp-dsl/setup.py index 809bce3b10..5c2cdfaccc 100644 --- a/sdk/python/kfp-dsl/setup.py +++ b/sdk/python/kfp-dsl/setup.py @@ -16,7 +16,7 @@ setuptools.setup( name='kfp-dsl', - version='2.1.1', + version='2.1.2', description='A KFP SDK subpackage containing the DSL and runtime code.', author='google', author_email='kubeflow-pipelines@google.com', diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 0e9ad528f9..31a1d8253d 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.1.1' +__version__ = '2.1.2' TYPE_CHECK = True diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index 9f351c1e30..27604e8a18 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -10,7 +10,7 @@ google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0 google-auth>=1.6.1,<3 # https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md#221-2022-03-15 google-cloud-storage>=2.2.1,<3 -kfp-dsl==2.1.1 +kfp-dsl==2.1.2 # pin kfp-pipeline-spec to an exact version, since this is the contract between a given KFP SDK version and the BE. we don't want old version of the SDK to write new fields and to have the BE reject the new unsupported field (even if the new field backward compatible from a proto perspective) kfp-pipeline-spec==0.2.2 # Update the upper version whenever a new major version of the From 27ee817b7d8c09d3b8f0d789c44c2f6fb3c7ec1e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 28 Jul 2023 22:08:32 -0700 Subject: [PATCH 059/253] chore(components): add comment to GCPC utils and convert annotation to forward reference PiperOrigin-RevId: 552030788 --- .../google-cloud/google_cloud_pipeline_components/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/utils.py b/components/google-cloud/google_cloud_pipeline_components/utils.py index 3168e9be3d..7e7032e7e5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/utils.py @@ -21,6 +21,8 @@ from google_cloud_pipeline_components import _image from kfp import components from kfp import dsl +# do not follow this pattern! +# we should not depend on non-public modules of the KFP SDK! from kfp.components import placeholders from google.protobuf import json_format @@ -143,10 +145,7 @@ def unquote_nonstring_placeholders( def gcpc_output_name_converter( new_name: str, original_name: Optional[str] = None, -) -> Callable[ - [components.base_component.BaseComponent], - components.base_component.BaseComponent, -]: +) -> Callable[["BaseComponent"], "BaseComponent"]: # pytype: disable=name-error """Replace the output with original_name with a new_name in a component decorated with an @dsl.container_component decorator. Enables authoring components that have an input and output with the same From af753dc645ea2630a07dce3c0a1287ee3d2d5c87 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Tue, 1 Aug 2023 16:24:25 -0700 Subject: [PATCH 060/253] feat(components): Update RAI safety component with latest image fix(components): Remove 'enable_web_access' input from custom job utils function PiperOrigin-RevId: 552958256 --- .../model_evaluation/llm_evaluation/component.py | 7 ------- .../model_evaluation/rai_safety/__init__.py | 2 +- .../rai_safety/llm_safety_bias/component.py | 10 +--------- .../_implementation/model_evaluation/utils.py | 6 ------ 4 files changed, 2 insertions(+), 23 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py index 0711af300b..f909592dba 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -38,7 +38,6 @@ def model_evaluation_text_generation( display_name: str = 'model-evaluation-text-generation', machine_type: str = 'e2-highmem-16', service_account: str = '', - enable_web_access: bool = True, network: str = '', reserved_ip_ranges: List[str] = [], encryption_spec_key_name: str = '', @@ -88,11 +87,6 @@ def model_evaluation_text_generation( unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. - enable_web_access (Optional[bool]): Whether you want Vertex AI to enable - [interactive shell - access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) - to training containers. If set to `true`, you can access interactive - shells at the URIs given by [CustomJob.web_access_uris][]. network (Optional[str]): The full name of the Compute Engine network to which the job should be peered. For example, projects/12345/global/networks/myVPC. Format is of the form @@ -139,7 +133,6 @@ def model_evaluation_text_generation( service_account=service_account, network=network, reserved_ip_ranges=reserved_ip_ranges, - enable_web_access=enable_web_access, encryption_spec_key_name=encryption_spec_key_name, ), gcp_resources=gcp_resources, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py index 5175407c81..b40cffdf86 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Google Cloud Pipeline RAI Components.""" +"""Google Cloud Pipeline RAI Safety Bias Components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py index 4fd3f7ce08..0fac124abf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py @@ -10,8 +10,7 @@ from kfp.dsl import OutputPath -_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:lakeyk-test' -# TODO(b/293198435): Update to publicly released image. +_IMAGE_URI = 'us-docker.pkg.dev/vertex-ai-restricted/llm-eval/llm-bias:v0.2' @container_component @@ -25,7 +24,6 @@ def llm_safety_metrics_bias( display_name: str = 'llm_safety_bias_component', machine_type: str = 'e2-highmem-16', service_account: str = '', - enable_web_access: bool = True, network: str = '', reserved_ip_ranges: Optional[List[str]] = None, encryption_spec_key_name: str = '', @@ -52,11 +50,6 @@ def llm_safety_metrics_bias( unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. - enable_web_access (Optional[bool]): Whether you want Vertex AI to enable - [interactive shell - access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) - to training containers. If set to `true`, you can access interactive - shells at the URIs given by [CustomJob.web_access_uris][]. network (Optional[str]): The full name of the Compute Engine network to which the job should be peered. For example, projects/12345/global/networks/myVPC. Format is of the form @@ -96,7 +89,6 @@ def llm_safety_metrics_bias( service_account=service_account, network=network, reserved_ip_ranges=reserved_ip_ranges, - enable_web_access=enable_web_access, encryption_spec_key_name=encryption_spec_key_name, ), gcp_resources=gcp_resources, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py index 94b13278b7..3be30d9952 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/utils.py @@ -14,7 +14,6 @@ def build_custom_job_payload( service_account: str = '', network: str = '', reserved_ip_ranges: List[str] = [], - enable_web_access: bool = False, accelerator_type: str = 'ACCELERATOR_TYPE_UNSPECIFIED', accelerator_count: int = 0, encryption_spec_key_name: str = '', @@ -46,10 +45,6 @@ def build_custom_job_payload( network that can be used for this job. If set, we will deploy the job within the provided ip ranges. Otherwise, the job will be deployed to any ip ranges under the provided VPC network. - enable_web_access: Whether you want Vertex AI to enable [interactive shell - access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) - to training containers. If set to `true`, you can access interactive - shells at the URIs given by [CustomJob.web_access_uris][]. accelerator_type: The type of accelerator(s) that may be attached to the machine as per acceleratorCount. https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType @@ -79,7 +74,6 @@ def build_custom_job_payload( 'service_account': str(service_account), 'network': str(network), 'reserved_ip_ranges': reserved_ip_ranges, - 'enable_web_access': bool(enable_web_access), }, 'encryption_spec': {'kms_key_name': str(encryption_spec_key_name)}, } From 0153430206567e5c50c878bc7b2fcdf0a79817c0 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:10:18 -0700 Subject: [PATCH 061/253] fix(frontend): Missing pipeline version name in new run page. (#9799) --- frontend/src/pages/PipelineDetails.test.tsx | 15 +++++++++++++++ frontend/src/pages/PipelineDetails.tsx | 12 ++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/frontend/src/pages/PipelineDetails.test.tsx b/frontend/src/pages/PipelineDetails.test.tsx index 051c942f57..d238554b4e 100644 --- a/frontend/src/pages/PipelineDetails.test.tsx +++ b/frontend/src/pages/PipelineDetails.test.tsx @@ -760,6 +760,21 @@ describe('PipelineDetails', () => { expect(newRunBtn).toBeDefined(); }); + it('uses selected version ID to create run if URL does not contain version ID', async () => { + tree = shallow(); + await TestUtils.flushPromises(); + const instance = tree.instance() as PipelineDetails; + const newRunFromPipelineVersionBtn = instance.getInitialToolbarState().actions[ + ButtonKeys.NEW_RUN_FROM_PIPELINE_VERSION + ]; + newRunFromPipelineVersionBtn.action(); + expect(historyPushSpy).toHaveBeenCalledTimes(1); + expect(historyPushSpy).toHaveBeenLastCalledWith( + RoutePage.NEW_RUN + + `?${QUERY_PARAMS.pipelineId}=${testV2Pipeline.pipeline_id}&${QUERY_PARAMS.pipelineVersionId}=${originalTestV2PipelineVersion.pipeline_version_id}`, + ); + }); + it('clicking new run button navigates to the new run page', async () => { tree = shallow(); await TestUtils.flushPromises(); diff --git a/frontend/src/pages/PipelineDetails.tsx b/frontend/src/pages/PipelineDetails.tsx index d415797a99..107e0d68f4 100644 --- a/frontend/src/pages/PipelineDetails.tsx +++ b/frontend/src/pages/PipelineDetails.tsx @@ -131,10 +131,18 @@ class PipelineDetails extends Page<{}, PipelineDetailsState> { buttons .newRunFromPipelineVersion( () => { - return pipelineIdFromParams ? pipelineIdFromParams : ''; + return this.state.v2Pipeline + ? this.state.v2Pipeline.pipeline_id + : pipelineIdFromParams + ? pipelineIdFromParams + : ''; }, () => { - return pipelineVersionIdFromParams ? pipelineVersionIdFromParams : ''; + return this.state.v2SelectedVersion + ? this.state.v2SelectedVersion.pipeline_version_id + : pipelineVersionIdFromParams + ? pipelineVersionIdFromParams + : ''; }, ) .newPipelineVersion('Upload version', () => From 11a5ae1dd76048b1df99f23bcfa9c2b10e53938a Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 4 Aug 2023 10:12:20 -0700 Subject: [PATCH 062/253] chore(sdk): clean up artifact code (#9800) --- sdk/python/kfp-dsl/kfp/dsl/executor.py | 6 +-- .../kfp-dsl/kfp/dsl/types/artifact_types.py | 47 +---------------- .../kfp-dsl/runtime_tests/executor_test.py | 50 +------------------ 3 files changed, 4 insertions(+), 99 deletions(-) diff --git a/sdk/python/kfp-dsl/kfp/dsl/executor.py b/sdk/python/kfp-dsl/kfp/dsl/executor.py index db8a8a89bd..cc87f34b0a 100644 --- a/sdk/python/kfp-dsl/kfp/dsl/executor.py +++ b/sdk/python/kfp-dsl/kfp/dsl/executor.py @@ -357,11 +357,7 @@ def create_artifact_instance( artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE.get( schema_title, artifact_cls) - return artifact_cls._from_executor_fields( - uri=runtime_artifact.get('uri', ''), - name=runtime_artifact.get('name', ''), - metadata=runtime_artifact.get('metadata', {}), - ) if hasattr(artifact_cls, '_from_executor_fields') else artifact_cls( + return artifact_cls( uri=runtime_artifact.get('uri', ''), name=runtime_artifact.get('name', ''), metadata=runtime_artifact.get('metadata', {}), diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py b/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py index 163e3feec3..2c6999c2d8 100644 --- a/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py +++ b/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py @@ -11,10 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Classes for input/output Artifacts in KFP SDK. - -These are only compatible with v2 Pipelines. -""" +"""Classes for input/output Artifacts in KFP SDK.""" from typing import Dict, List, Optional, Type @@ -112,12 +109,6 @@ class Model(Artifact): """ schema_title = 'system.Model' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - super().__init__(uri=uri, name=name, metadata=metadata) - @property def framework(self) -> str: return self._get_framework() @@ -143,12 +134,6 @@ class Dataset(Artifact): """ schema_title = 'system.Dataset' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - super().__init__(uri=uri, name=name, metadata=metadata) - class Metrics(Artifact): """An artifact for storing key-value scalar metrics. @@ -160,12 +145,6 @@ class Metrics(Artifact): """ schema_title = 'system.Metrics' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - super().__init__(uri=uri, name=name, metadata=metadata) - def log_metric(self, metric: str, value: float) -> None: """Sets a custom scalar metric in the artifact's metadata. @@ -186,12 +165,6 @@ class ClassificationMetrics(Artifact): """ schema_title = 'system.ClassificationMetrics' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None): - super().__init__(uri=uri, name=name, metadata=metadata) - def log_roc_data_point(self, fpr: float, tpr: float, threshold: float) -> None: """Logs a single data point in the ROC curve to metadata. @@ -355,12 +328,6 @@ class SlicedClassificationMetrics(Artifact): schema_title = 'system.SlicedClassificationMetrics' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - super().__init__(uri=uri, name=name, metadata=metadata) - def _upsert_classification_metrics_for_slice(self, slice: str) -> None: """Upserts the classification metrics instance for a slice.""" if slice not in self._sliced_metrics: @@ -479,12 +446,6 @@ class HTML(Artifact): """ schema_title = 'system.HTML' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - super().__init__(uri=uri, name=name, metadata=metadata) - class Markdown(Artifact): """An artifact representing a markdown file. @@ -496,12 +457,6 @@ class Markdown(Artifact): """ schema_title = 'system.Markdown' - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None): - super().__init__(uri=uri, name=name, metadata=metadata) - _SCHEMA_TITLE_TO_TYPE: Dict[str, Type[Artifact]] = { x.schema_title: x for x in [ diff --git a/sdk/python/kfp-dsl/runtime_tests/executor_test.py b/sdk/python/kfp-dsl/runtime_tests/executor_test.py index 351e68a17a..4cc5969344 100644 --- a/sdk/python/kfp-dsl/runtime_tests/executor_test.py +++ b/sdk/python/kfp-dsl/runtime_tests/executor_test.py @@ -21,6 +21,7 @@ from unittest import mock from absl.testing import parameterized +from kfp import dsl from kfp.dsl import executor from kfp.dsl import Input from kfp.dsl import Output @@ -113,15 +114,10 @@ def test_input_artifact_custom_type(self): } """ - class VertexDataset: + class VertexDataset(dsl.Artifact): schema_title = 'google.VertexDataset' schema_version = '0.0.0' - def __init__(self, name: str, uri: str, metadata: dict) -> None: - self.name = name - self.uri = uri - self.metadata = metadata - @property def path(self) -> str: return self.uri.replace('gs://', @@ -1217,29 +1213,6 @@ def test_func(input_list: Input[List[Artifact]]): self.assertDictEqual(output_metadata, {}) -class VertexDataset: - schema_title = 'google.VertexDataset' - schema_version = '0.0.0' - - @classmethod - def _from_executor_fields( - cls, - name: str, - uri: str, - metadata: dict, - ) -> 'VertexDataset': - - instance = VertexDataset() - instance.name = name - instance.uri = uri - instance.metadata = metadata - return instance - - @property - def path(self) -> str: - return self.uri.replace('gs://', artifact_types._GCS_LOCAL_MOUNT_PREFIX) - - class TestDictToArtifact(parameterized.TestCase): @parameterized.parameters( @@ -1355,25 +1328,6 @@ def test_dict_to_artifact_kfp_artifact( self.assertIsInstance( executor.create_artifact_instance(runtime_artifact), expected_type) - def test_dict_to_artifact_google_artifact(self): - runtime_artifact = { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'google.VertexDataset' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - } - # with artifact_cls - self.assertIsInstance( - executor.create_artifact_instance( - runtime_artifact, artifact_cls=VertexDataset), VertexDataset) - - # without artifact_cls - self.assertIsInstance( - executor.create_artifact_instance(runtime_artifact), - artifact_types.Artifact) - if __name__ == '__main__': unittest.main() From a6af41c23be0fdc2a038c8b46725faa49e8909c1 Mon Sep 17 00:00:00 2001 From: deepk2u Date: Fri, 4 Aug 2023 13:34:20 -0700 Subject: [PATCH 063/253] fix(backend): fix timeouts with list run api. Fixes #9780 (#9806) * Update client_manager.go * Update client_manager.go --- backend/src/apiserver/client_manager.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/backend/src/apiserver/client_manager.go b/backend/src/apiserver/client_manager.go index 5abe07ca31..a02dcf53c0 100644 --- a/backend/src/apiserver/client_manager.go +++ b/backend/src/apiserver/client_manager.go @@ -279,6 +279,16 @@ func initDBClient(initConnectionTimeout time.Duration) *storage.DB { glog.Fatalf("Failed to create index experimentuuid_conditions_finishedatinsec on run_details. Error: %s", response.Error) } + response = db.Model(&model.Run{}).AddIndex("namespace_createatinsec", "Namespace", "CreatedAtInSec") + if response.Error != nil { + glog.Fatalf("Failed to create index namespace_createatinsec on run_details. Error: %s", response.Error) + } + + response = db.Model(&model.Run{}).AddIndex("namespace_conditions_finishedatinsec", "Namespace", "Conditions", "FinishedAtInSec") + if response.Error != nil { + glog.Fatalf("Failed to create index namespace_conditions_finishedatinsec on run_details. Error: %s", response.Error) + } + response = db.Model(&model.Pipeline{}).AddUniqueIndex("name_namespace_index", "Name", "Namespace") if response.Error != nil { glog.Fatalf("Failed to create index name_namespace_index on run_details. Error: %s", response.Error) From c92f40c59a77585570e0d6178a4ed6b8acfd4664 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Fri, 4 Aug 2023 15:45:53 -0700 Subject: [PATCH 064/253] chore(components): Refactor evaluation pipelines into graph components PiperOrigin-RevId: 553932748 --- .../preview/model_evaluation/__init__.py | 2 + .../feature_attribution_graph_component.py | 244 ++++++ ...ml_tabular_feature_attribution_pipeline.py | 723 ++++++++++++---- .../evaluation_automl_tabular_pipeline.py | 535 ++++++++++-- ...uation_automl_unstructure_data_pipeline.py | 590 ++++++++++--- ...evaluation_feature_attribution_pipeline.py | 776 ++++++++++++++---- 6 files changed, 2356 insertions(+), 514 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py index 366396e11f..b8de7ae626 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py @@ -15,10 +15,12 @@ from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp __all__ = [ 'ModelEvaluationFeatureAttributionOp', + 'FeatureAttributionGraphComponentOp', 'DetectModelBiasOp', 'DetectDataBiasOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py new file mode 100644 index 0000000000..ecb436dbd4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py @@ -0,0 +1,244 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, NamedTuple + +from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp +from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.types.artifact_types import VertexModel +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp +import kfp + + +@kfp.dsl.pipeline(name='feature-attribution-graph-component') +def feature_attribution_graph_component( # pylint: disable=dangerous-default-value + project: str, + location: str, + prediction_type: str, + vertex_model: VertexModel, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +) -> NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics): + """A pipeline to compute feature attributions by sampling data for batch explanations. + + This pipeline guarantees support for AutoML Tabular models that contain a + valid explanation_spec. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification", "regression", or "forecasting". + vertex_model: The Vertex model artifact used for batch explanation. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + + Returns: + A system.Metrics artifact with feature attributions. + """ + outputs = NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics) + + # Sample the input dataset for a quicker batch explanation. + data_sampler_task = EvaluationDataSamplerOp( + project=project, + location=location, + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_uri=batch_predict_bigquery_source_uri, + instances_format=batch_predict_instances_format, + sample_size=batch_predict_explanation_data_sample_size, + force_runner_mode=force_runner_mode, + ) + + # Run batch explain. + batch_explain_task = ModelBatchPredictOp( + project=project, + location=location, + model=vertex_model, + job_display_name='model-registry-batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=data_sampler_task.outputs['gcs_output_directory'], + bigquery_source_input_uri=data_sampler_task.outputs[ + 'bigquery_output_table' + ], + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + generate_explanation=True, + explanation_parameters=batch_predict_explanation_parameters, + explanation_metadata=batch_predict_explanation_metadata, + machine_type=batch_predict_machine_type, + starting_replica_count=batch_predict_starting_replica_count, + max_replica_count=batch_predict_max_replica_count, + encryption_spec_key_name=encryption_spec_key_name, + accelerator_type=batch_predict_accelerator_type, + accelerator_count=batch_predict_accelerator_count, + ) + + # Generate feature attributions from explanations. + feature_attribution_task = ModelEvaluationFeatureAttributionOp( + project=project, + location=location, + problem_type=prediction_type, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_explain_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_explain_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + return outputs( + feature_attributions=feature_attribution_task.outputs[ + 'feature_attributions' + ] + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index 9fa4943fdf..1011a11b76 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -12,24 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List +from typing import Any, Dict, List, NamedTuple from google_cloud_pipeline_components._implementation.model import GetVertexModelOp -from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp -from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp +from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics +from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -from google_cloud_pipeline_components.v1.model_evaluation.forecasting_component import model_evaluation_forecasting as ModelEvaluationForecastingOp from google_cloud_pipeline_components.v1.model_evaluation.regression_component import model_evaluation_regression as ModelEvaluationRegressionOp import kfp -@kfp.dsl.pipeline(name='evaluation-automl-tabular-feature-attribution-pipeline') -def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=dangerous-default-value +@kfp.dsl.pipeline( + name='evaluation-automl-tabular-feature-attribution-classification-pipeline' +) +def evaluation_automl_tabular_feature_attribution_classification_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, - prediction_type: str, model_name: str, target_field_name: str, batch_predict_instances_format: str, @@ -55,19 +56,22 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, ): - """The evaluation AutoML tabular pipeline with feature attribution. + """The evaluation AutoML tabular pipeline with feature attribution for classification models. This pipeline guarantees support for AutoML Tabular models that contain a - valid explanation_spec. This pipeline does not include the data_splitter - component, which is needed for many tabular custom models. + valid explanation_spec. This pipeline does not include the + target_field_data_remover component, which is needed for many tabular custom + models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification", "regression", or "forecasting". model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find @@ -168,6 +172,12 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d batch_predict_accelerator_count: The number of accelerators to attach to the ``batch_predict_machine_type``. Only used if ``batch_predict_machine_type`` is set. + slicing_specs: List of + ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. + When provided, compute metrics for each defined slice. See sample code in + https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component + For more details on configuring slices, see + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -187,7 +197,16 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.ClassificationMetrics artifact. """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, + ) + evaluation_display_name = ( 'evaluation-automl-tabular-feature-attribution-pipeline' ) @@ -213,32 +232,271 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d accelerator_count=batch_predict_accelerator_count, ) - # Run the Batch Explain process (sampler -> batch explanation). - data_sampler_task = EvaluationDataSamplerOp( + # Run feature attribution steps. + feature_attribution_graph = FeatureAttributionGraphComponentOp( project=project, location=location, - gcs_source_uris=batch_predict_gcs_source_uris, - bigquery_source_uri=batch_predict_bigquery_source_uri, - instances_format=batch_predict_instances_format, - sample_size=batch_predict_explanation_data_sample_size, + prediction_type='classification', + vertex_model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + dataflow_machine_type=dataflow_machine_type, + dataflow_max_num_workers=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, ) - batch_explain_task = ModelBatchPredictOp( + + # Run evaluation for a classification model. + eval_task = ModelEvaluationClassificationOp( project=project, location=location, - model=get_model_task.outputs['model'], - job_display_name='model-registry-batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', - gcs_source_uris=data_sampler_task.outputs['gcs_output_directory'], - bigquery_source_input_uri=data_sampler_task.outputs[ + target_field_name=target_field_name, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ 'bigquery_output_table' ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + slicing_specs=slicing_specs, + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + classification_metrics=eval_task.outputs['evaluation_metrics'], + feature_attributions=feature_attribution_graph.outputs[ + 'feature_attributions' + ], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline( + name='evaluation-automl-tabular-feature-attribution-regression-pipeline' +) +def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, +): + """The evaluation AutoML tabular pipeline with feature attribution for regression models. + + This pipeline guarantees support for AutoML Tabular models that contain a + valid explanation_spec. This pipeline does not include the + target_field_data_remover component, which is needed for many tabular custom + models. + + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.RegressionMetrics artifact. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, + ) + + evaluation_display_name = ( + 'evaluation-automl-tabular-feature-attribution-pipeline' + ) + get_model_task = GetVertexModelOp(model_name=model_name) + + # Run Batch Prediction. + batch_predict_task = ModelBatchPredictOp( + project=project, + location=location, + model=get_model_task.outputs['model'], + job_display_name='model-registry-batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_input_uri=batch_predict_bigquery_source_uri, instances_format=batch_predict_instances_format, predictions_format=batch_predict_predictions_format, gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, - generate_explanation=True, - explanation_parameters=batch_predict_explanation_parameters, - explanation_metadata=batch_predict_explanation_metadata, machine_type=batch_predict_machine_type, starting_replica_count=batch_predict_starting_replica_count, max_replica_count=batch_predict_max_replica_count, @@ -247,46 +505,272 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d accelerator_count=batch_predict_accelerator_count, ) - # Run evaluation based on prediction type and feature attribution component. - # After, import the model evaluations to the Vertex model. + # Run feature attribution steps. + feature_attribution_graph = FeatureAttributionGraphComponentOp( + project=project, + location=location, + prediction_type='regression', + vertex_model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + dataflow_machine_type=dataflow_machine_type, + dataflow_max_num_workers=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + # Run evaluation for a regression model. + eval_task = ModelEvaluationRegressionOp( + project=project, + location=location, + target_field_name=target_field_name, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + regression_metrics=eval_task.outputs['evaluation_metrics'], + feature_attributions=feature_attribution_graph.outputs[ + 'feature_attributions' + ], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-automl-tabular-feature-attribution-pipeline') +def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + prediction_type: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +): + """The evaluation AutoML tabular pipeline with feature attribution. + + This pipeline guarantees support for AutoML Tabular classification and + regression models that contain a valid explanation_spec. This pipeline does + not include the target_field_data_remover component, which is needed for many + tabular custom models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + slicing_specs: List of + ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. + When provided, compute metrics for each defined slice. See sample code in + https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component + For more details on configuring slices, see + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): - eval_task = ModelEvaluationClassificationOp( + evaluation_automl_tabular_feature_attribution_classification_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, slicing_specs=slicing_specs, - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, @@ -294,122 +778,33 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, ) - ModelImportEvaluationOp( - classification_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) - - with kfp.dsl.Condition(prediction_type == 'forecasting', name='forecasting'): - eval_task = ModelEvaluationForecastingOp( - project=project, - location=location, - target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - ) - ModelImportEvaluationOp( - forecasting_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) with kfp.dsl.Condition(prediction_type == 'regression', name='regression'): - eval_task = ModelEvaluationRegressionOp( + evaluation_automl_tabular_feature_attribution_regression_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - ) - ModelImportEvaluationOp( - regression_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index 82d146d591..296bfadf73 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List +from typing import Any, List, NamedTuple from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp +from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics +from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp from google_cloud_pipeline_components.v1.model_evaluation.forecasting_component import model_evaluation_forecasting as ModelEvaluationForecastingOp @@ -23,11 +25,10 @@ import kfp -@kfp.dsl.pipeline(name='evaluation-automl-tabular-pipeline') -def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-value +@kfp.dsl.pipeline(name='evaluation-automl-tabular-classification-pipeline') +def evaluation_automl_tabular_classification_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, - prediction_type: str, model_name: str, target_field_name: str, batch_predict_instances_format: str, @@ -50,8 +51,12 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, ): - """The evaluation AutoML tabular pipeline with no feature attribution. + """The evaluation AutoML tabular pipeline with no feature attribution for classification models. This pipeline guarantees support for AutoML Tabular models. This pipeline does not include the target_field_data_remover component, which is needed for many @@ -60,8 +65,6 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification", "regression", or "forecasting". model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find @@ -143,6 +146,12 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val batch_predict_accelerator_count: The number of accelerators to attach to the ``batch_predict_machine_type``. Only used if ``batch_predict_machine_type`` is set. + slicing_specs: List of + ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. + When provided, compute metrics for each defined slice. See sample code in + https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component + For more details on configuring slices, see + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -162,11 +171,23 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.ClassificationMetrics artifact and imported + evaluation_resource_name. """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, + ) + evaluation_display_name = 'evaluation-automl-tabular-pipeline' + + # Get the Vertex AI Model. get_model_task = GetVertexModelOp(model_name=model_name) - # Run Batch Prediction. + # Run Vertex AI Batch Prediction. batch_predict_task = ModelBatchPredictOp( project=project, location=location, @@ -186,100 +207,460 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val accelerator_count=batch_predict_accelerator_count, ) - # Run evaluation based on prediction type. - # After, import the model evaluations to the Vertex model. + # Run evaluation for a classification model. + eval_task = ModelEvaluationClassificationOp( + project=project, + location=location, + target_field_name=target_field_name, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + slicing_specs=slicing_specs, + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + classification_metrics=eval_task.outputs['evaluation_metrics'], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-automl-tabular-regression-pipeline') +def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, +): + """The evaluation AutoML tabular pipeline with no feature attribution for regression models. + + This pipeline guarantees support for AutoML Tabular models. This pipeline does + not include the target_field_data_remover component, which is needed for many + tabular custom models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.RegressionMetrics artifact and imported + evaluation_resource_name. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, + ) + + evaluation_display_name = 'evaluation-automl-tabular-pipeline' + + # Get the Vertex AI Model. + get_model_task = GetVertexModelOp(model_name=model_name) + + # Run Vertex AI Batch Prediction. + batch_predict_task = ModelBatchPredictOp( + project=project, + location=location, + model=get_model_task.outputs['model'], + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_input_uri=batch_predict_bigquery_source_uri, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + machine_type=batch_predict_machine_type, + starting_replica_count=batch_predict_starting_replica_count, + max_replica_count=batch_predict_max_replica_count, + encryption_spec_key_name=encryption_spec_key_name, + accelerator_type=batch_predict_accelerator_type, + accelerator_count=batch_predict_accelerator_count, + ) + + # Run evaluation for a regression model. + eval_task = ModelEvaluationRegressionOp( + project=project, + location=location, + target_field_name=target_field_name, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + regression_metrics=eval_task.outputs['evaluation_metrics'], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-automl-tabular-pipeline') +def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + prediction_type: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +): + """The evaluation AutoML tabular pipeline with no feature attribution. + + This pipeline guarantees support for AutoML Tabular classification and + regression models. This pipeline does not include the + target_field_data_remover component, which is needed for many tabular custom + models and AutoML Tabular Forecasting. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + slicing_specs: List of + ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. + When provided, compute metrics for each defined slice. See sample code in + https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component + For more details on configuring slices, see + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): - eval_task = ModelEvaluationClassificationOp( + evaluation_automl_tabular_classification_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, slicing_specs=slicing_specs, - ) - ModelImportEvaluationOp( - classification_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) - - with kfp.dsl.Condition(prediction_type == 'forecasting', name='forecasting'): - eval_task = ModelEvaluationForecastingOp( - project=project, - location=location, - target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - ModelImportEvaluationOp( - forecasting_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) with kfp.dsl.Condition(prediction_type == 'regression', name='regression'): - eval_task = ModelEvaluationRegressionOp( + evaluation_automl_tabular_regression_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - ModelImportEvaluationOp( - regression_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index 61beb34b59..cfbc8685af 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -12,24 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import List, NamedTuple from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp +from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics +from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -from google_cloud_pipeline_components.v1.model_evaluation.forecasting_component import model_evaluation_forecasting as ModelEvaluationForecastingOp from google_cloud_pipeline_components.v1.model_evaluation.regression_component import model_evaluation_regression as ModelEvaluationRegressionOp import kfp from kfp import dsl -@kfp.dsl.pipeline(name='evaluation-automl-unstructure-data-pipeline') -def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-default-value +@kfp.dsl.pipeline(name='evaluation-classification-pipeline') +def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, - prediction_type: str, model_name: str, target_field_name: str, batch_predict_instances_format: str, @@ -54,17 +54,19 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, ): - """The evaluation pipeline with ground truth and no feature attribution. + """The evaluation pipeline with ground truth and no feature attribution for classification models. - This pipeline is used for all unstructured AutoML models, including Text, - Video, Image and Custom imported models. + This pipeline is used for all classification unstructured AutoML models, + including Text, Video, Image and Custom models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification", "regression", or "forecasting". model_name: The Vertex model resource name to be imported and used for batch prediction. Formatted like projects/{project}/locations/{location}/models/{model} or @@ -179,7 +181,17 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + + Returns: + A Tuple of google.ClassificationMetrics artifact and the imported + evaluation metrics resource name. """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, + ) + evaluation_display_name = 'evaluation_automl_unstructure_data_pipeline' get_model_task = GetVertexModelOp(model_name=model_name) @@ -225,113 +237,505 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de accelerator_count=batch_predict_accelerator_count, ) - # Run evaluation based on prediction type. - # After, import the model evaluations to the Vertex model. + # Run evaluation for a classification model. + eval_task = ModelEvaluationClassificationOp( + project=project, + location=location, + class_labels=evaluation_class_labels, + prediction_label_column=evaluation_prediction_label_column, + prediction_score_column=evaluation_prediction_score_column, + target_field_name=target_field_name, + ground_truth_format=batch_predict_instances_format, + ground_truth_gcs_source=batch_predict_gcs_source_uris, + ground_truth_bigquery_source=batch_predict_bigquery_source_uri, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + classification_metrics=eval_task.outputs['evaluation_metrics'], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-regression-pipeline') +def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + evaluation_prediction_score_column: str = '', + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, +): + """The evaluation pipeline with ground truth and no feature attribution for regression models. + + This pipeline is used for all custom tabular regression models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: The Vertex model resource name to be imported and used for batch + prediction. Formatted like + projects/{project}/locations/{location}/models/{model} or + projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + evaluation_prediction_score_column: The column name of the field containing + batch prediction scores. Formatted to be able to find nested columns, + delimited by ``.``. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + + Returns: + A Tuple of google.RegressionMetrics artifact and the imported evaluation + metrics resource name. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, + ) + + evaluation_display_name = 'evaluation_automl_unstructure_data_pipeline' + get_model_task = GetVertexModelOp(model_name=model_name) + + # Remove the ground truth from the given GCS data. + # This is required for many models as Vertex Batch Prediction can not have the + # ground truth in the data to run, but later the evaluation component requires + # the ground truth data. + target_field_data_remover_task = TargetFieldDataRemoverOp( + project=project, + location=location, + target_field_name=target_field_name, + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_uri=batch_predict_bigquery_source_uri, + instances_format=batch_predict_instances_format, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + # Run Batch Prediction. + batch_predict_task = ModelBatchPredictOp( + project=project, + location=location, + model=get_model_task.outputs['model'], + job_display_name=f'evaluation-batch-predict-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + gcs_source_uris=target_field_data_remover_task.outputs[ + 'gcs_output_directory' + ], + bigquery_source_input_uri=target_field_data_remover_task.outputs[ + 'bigquery_output_table' + ], + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + machine_type=batch_predict_machine_type, + starting_replica_count=batch_predict_starting_replica_count, + max_replica_count=batch_predict_max_replica_count, + encryption_spec_key_name=encryption_spec_key_name, + accelerator_type=batch_predict_accelerator_type, + accelerator_count=batch_predict_accelerator_count, + ) + + # Run evaluation for a regression model. + eval_task = ModelEvaluationRegressionOp( + project=project, + location=location, + target_field_name=target_field_name, + ground_truth_format=batch_predict_instances_format, + ground_truth_gcs_source=batch_predict_gcs_source_uris, + ground_truth_bigquery_source=batch_predict_bigquery_source_uri, + prediction_score_column=evaluation_prediction_score_column, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + regression_metrics=eval_task.outputs['evaluation_metrics'], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-pipeline') +def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + prediction_type: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + evaluation_prediction_label_column: str = '', + evaluation_prediction_score_column: str = '', + evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +): + """The evaluation pipeline with ground truth and no feature attribution. + + This pipeline is used for all unstructured AutoML models, including Text, + Video, Image and Custom models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch + prediction. Formatted like + projects/{project}/locations/{location}/models/{model} or + projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + evaluation_prediction_label_column: The column name of the field containing + classes the model is scoring. Formatted to be able to find nested columns, + delimited by ``.``. + evaluation_prediction_score_column: The column name of the field containing + batch prediction scores. Formatted to be able to find nested columns, + delimited by ``.``. + evaluation_class_labels: Required for classification prediction type. The + list of class names for the target_field_name, in the same order they + appear in a file in batch_predict_gcs_source_uris. For instance, if the + target_field_name could be either ``1`` or ``0``, then the class_labels + input will be ["1", "0"]. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): - eval_task = ModelEvaluationClassificationOp( + evaluation_automl_unstructure_data_classification_pipeline( project=project, location=location, - class_labels=evaluation_class_labels, - prediction_label_column=evaluation_prediction_label_column, - prediction_score_column=evaluation_prediction_score_column, + model_name=model_name, target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + evaluation_prediction_label_column=evaluation_prediction_label_column, + evaluation_prediction_score_column=evaluation_prediction_score_column, + evaluation_class_labels=evaluation_class_labels, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - ModelImportEvaluationOp( - classification_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) - - with kfp.dsl.Condition(prediction_type == 'forecasting', name='forecasting'): - eval_task = ModelEvaluationForecastingOp( - project=project, - location=location, - target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - prediction_score_column=evaluation_prediction_score_column, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - ModelImportEvaluationOp( - forecasting_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) with kfp.dsl.Condition(prediction_type == 'regression', name='regression'): - eval_task = ModelEvaluationRegressionOp( + evaluation_automl_unstructure_data_regression_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - prediction_score_column=evaluation_prediction_score_column, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + evaluation_prediction_score_column=evaluation_prediction_score_column, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - ModelImportEvaluationOp( - regression_metrics=eval_task.outputs['evaluation_metrics'], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 3270ce8a85..c741df43bc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -12,25 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List +from typing import Any, Dict, List, NamedTuple from google_cloud_pipeline_components._implementation.model import GetVertexModelOp -from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp -from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp +from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics +from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -from google_cloud_pipeline_components.v1.model_evaluation.forecasting_component import model_evaluation_forecasting as ModelEvaluationForecastingOp from google_cloud_pipeline_components.v1.model_evaluation.regression_component import model_evaluation_regression as ModelEvaluationRegressionOp import kfp -@kfp.dsl.pipeline(name='evaluation-feature-attribution-pipeline') -def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-default-value +@kfp.dsl.pipeline(name='evaluation-feature-attribution-classification-pipeline') +def evaluation_feature_attribution_classification_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, - prediction_type: str, model_name: str, target_field_name: str, batch_predict_instances_format: str, @@ -49,7 +48,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul batch_predict_accelerator_count: int = 0, evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', - evaluation_class_labels: list = [], # pylint: disable=g-bare-generic + evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, @@ -58,19 +57,20 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, ): - """The evaluation AutoML tabular pipeline with feature attribution. + """The evaluation custom tabular pipeline with feature attribution for classification models. - This pipeline guarantees support for AutoML Tabular models that contain a - valid explanation_spec. This pipeline does not include the - target_field_data_remover component, which is needed for many tabular custom - models. + This pipeline gives support for custom models that contain a + valid explanation_spec. This pipeline includes the target_field_data_remover + component, which is needed for many tabular custom models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification", "regression", or "forecasting". model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find @@ -202,11 +202,20 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.ClassificationMetrics artifact. """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, + ) + evaluation_display_name = 'evaluation-feature-attribution-pipeline' get_model_task = GetVertexModelOp(model_name=model_name) - # Remove the ground truth from the given GCS data. + # Remove the ground truth from the given GCS or BQ data. # This is required for many models as Vertex Batch Prediction can not have the # ground truth in the data to run, but later the evaluation component requires # the ground truth data. @@ -248,36 +257,301 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul accelerator_count=batch_predict_accelerator_count, ) - # Run the Batch Explain process (sampler -> batch explanation). - data_sampler_task = EvaluationDataSamplerOp( + # Run feature attribution steps. + feature_attribution_graph = FeatureAttributionGraphComponentOp( project=project, location=location, - gcs_source_uris=target_field_data_remover_task.outputs[ + prediction_type='classification', + vertex_model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=target_field_data_remover_task.outputs[ 'gcs_output_directory' ], - bigquery_source_uri=target_field_data_remover_task.outputs[ + batch_predict_bigquery_source_uri=target_field_data_remover_task.outputs[ + 'bigquery_output_table' + ], + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + dataflow_machine_type=dataflow_machine_type, + dataflow_max_num_workers=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + # Run evaluation for a classification model. + eval_task = ModelEvaluationClassificationOp( + project=project, + location=location, + class_labels=evaluation_class_labels, + prediction_label_column=evaluation_prediction_label_column, + prediction_score_column=evaluation_prediction_score_column, + target_field_name=target_field_name, + ground_truth_format=batch_predict_instances_format, + ground_truth_gcs_source=batch_predict_gcs_source_uris, + ground_truth_bigquery_source=batch_predict_bigquery_source_uri, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ 'bigquery_output_table' ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + classification_metrics=eval_task.outputs['evaluation_metrics'], + feature_attributions=feature_attribution_graph.outputs[ + 'feature_attributions' + ], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-feature-attribution-regression-pipeline') +def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + evaluation_prediction_score_column: str = '', + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, +): + """The evaluation custom tabular pipeline with feature attribution for regression models. + + This pipeline gives support for custom models that contain a + valid explanation_spec. This pipeline includes the target_field_data_remover + component, which is needed for many tabular custom models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + evaluation_prediction_score_column: The column name of the field containing + batch prediction scores. Formatted to be able to find nested columns, + delimited by ``.``. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + + Returns: + A google.RegressionMetrics artifact. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=RegressionMetrics, + evaluation_resource_name=str, + ) + + evaluation_display_name = 'evaluation-feature-attribution-pipeline' + get_model_task = GetVertexModelOp(model_name=model_name) + + # Remove the ground truth from the given GCS or BQ data. + # This is required for many models as Vertex Batch Prediction can not have the + # ground truth in the data to run, but later the evaluation component requires + # the ground truth data. + target_field_data_remover_task = TargetFieldDataRemoverOp( + project=project, + location=location, + target_field_name=target_field_name, + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_uri=batch_predict_bigquery_source_uri, instances_format=batch_predict_instances_format, - sample_size=batch_predict_explanation_data_sample_size, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, ) - batch_explain_task = ModelBatchPredictOp( + + # Run Batch Prediction. + batch_predict_task = ModelBatchPredictOp( project=project, location=location, model=get_model_task.outputs['model'], - job_display_name='model-registry-batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', - gcs_source_uris=data_sampler_task.outputs['gcs_output_directory'], - bigquery_source_input_uri=data_sampler_task.outputs[ + job_display_name='model-registry-batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=target_field_data_remover_task.outputs[ + 'gcs_output_directory' + ], + bigquery_source_input_uri=target_field_data_remover_task.outputs[ 'bigquery_output_table' ], instances_format=batch_predict_instances_format, predictions_format=batch_predict_predictions_format, gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, - generate_explanation=True, - explanation_parameters=batch_predict_explanation_parameters, - explanation_metadata=batch_predict_explanation_metadata, machine_type=batch_predict_machine_type, starting_replica_count=batch_predict_starting_replica_count, max_replica_count=batch_predict_max_replica_count, @@ -286,167 +560,320 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul accelerator_count=batch_predict_accelerator_count, ) - # Run evaluation based on prediction type and feature attribution component. - # After, import the model evaluations to the Vertex model. + # Run feature attribution steps. + feature_attribution_graph = FeatureAttributionGraphComponentOp( + project=project, + location=location, + prediction_type='regression', + vertex_model=get_model_task.outputs['model'], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=target_field_data_remover_task.outputs[ + 'gcs_output_directory' + ], + batch_predict_bigquery_source_uri=target_field_data_remover_task.outputs[ + 'bigquery_output_table' + ], + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + dataflow_machine_type=dataflow_machine_type, + dataflow_max_num_workers=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + # Run evaluation for a regression model. + eval_task = ModelEvaluationRegressionOp( + project=project, + location=location, + target_field_name=target_field_name, + ground_truth_format=batch_predict_instances_format, + ground_truth_gcs_source=batch_predict_gcs_source_uris, + ground_truth_bigquery_source=batch_predict_bigquery_source_uri, + prediction_score_column=evaluation_prediction_score_column, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_predict_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_predict_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + model=get_model_task.outputs['model'], + ) + + # Import the evaluation result to Vertex AI. + import_evaluation_task = ModelImportEvaluationOp( + regression_metrics=eval_task.outputs['evaluation_metrics'], + feature_attributions=feature_attribution_graph.outputs[ + 'feature_attributions' + ], + model=get_model_task.outputs['model'], + dataset_type=batch_predict_instances_format, + dataset_path=batch_predict_bigquery_source_uri, + dataset_paths=batch_predict_gcs_source_uris, + display_name=evaluation_display_name, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) + + +@kfp.dsl.pipeline(name='evaluation-feature-attribution-pipeline') +def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + prediction_type: str, + model_name: str, + target_field_name: str, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: Dict[str, Any] = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + evaluation_prediction_label_column: str = '', + evaluation_prediction_score_column: str = '', + evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', +): + """The evaluation custom tabular pipeline with feature attribution. + + This pipeline gives support for custom models that contain a + valid explanation_spec. This pipeline includes the target_field_data_remover + component, which is needed for many tabular custom models. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch + prediction. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + evaluation_prediction_label_column: The column name of the field containing + classes the model is scoring. Formatted to be able to find nested columns, + delimited by ``.``. + evaluation_prediction_score_column: The column name of the field containing + batch prediction scores. Formatted to be able to find nested columns, + delimited by ``.``. + evaluation_class_labels: Required for classification prediction type. The + list of class names for the target_field_name, in the same order they + appear in a file in batch_predict_gcs_source_uris. For instance, if the + target_field_name could be either ``1`` or ``0``, then the class_labels + input will be ["1", "0"]. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): - eval_task = ModelEvaluationClassificationOp( - project=project, - location=location, - class_labels=evaluation_class_labels, - prediction_label_column=evaluation_prediction_label_column, - prediction_score_column=evaluation_prediction_score_column, - target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - ) - ModelImportEvaluationOp( - classification_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) - - with kfp.dsl.Condition(prediction_type == 'forecasting', name='forecasting'): - eval_task = ModelEvaluationForecastingOp( + evaluation_feature_attribution_classification_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - prediction_score_column=evaluation_prediction_score_column, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + evaluation_prediction_label_column=evaluation_prediction_label_column, + evaluation_prediction_score_column=evaluation_prediction_score_column, + evaluation_class_labels=evaluation_class_labels, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - ) - ModelImportEvaluationOp( - forecasting_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, ) with kfp.dsl.Condition(prediction_type == 'regression', name='regression'): - eval_task = ModelEvaluationRegressionOp( + evaluation_feature_attribution_regression_pipeline( project=project, location=location, + model_name=model_name, target_field_name=target_field_name, - ground_truth_format=batch_predict_instances_format, - ground_truth_gcs_source=batch_predict_gcs_source_uris, - ground_truth_bigquery_source=batch_predict_bigquery_source_uri, - prediction_score_column=evaluation_prediction_score_column, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_predict_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_predict_task.outputs[ - 'bigquery_output_table' - ], - dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, - dataflow_disk_size_gb=dataflow_disk_size_gb, - dataflow_service_account=dataflow_service_account, - dataflow_subnetwork=dataflow_subnetwork, - dataflow_use_public_ips=dataflow_use_public_ips, - encryption_spec_key_name=encryption_spec_key_name, - force_runner_mode=force_runner_mode, - model=get_model_task.outputs['model'], - ) - feature_attribution_task = ModelEvaluationFeatureAttributionOp( - project=project, - location=location, - problem_type=prediction_type, - predictions_format=batch_predict_predictions_format, - predictions_gcs_source=batch_explain_task.outputs[ - 'gcs_output_directory' - ], - predictions_bigquery_source=batch_explain_task.outputs[ - 'bigquery_output_table' - ], + batch_predict_instances_format=batch_predict_instances_format, + batch_predict_gcs_destination_output_uri=batch_predict_gcs_destination_output_uri, + batch_predict_gcs_source_uris=batch_predict_gcs_source_uris, + batch_predict_bigquery_source_uri=batch_predict_bigquery_source_uri, + batch_predict_predictions_format=batch_predict_predictions_format, + batch_predict_bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + batch_predict_machine_type=batch_predict_machine_type, + batch_predict_starting_replica_count=batch_predict_starting_replica_count, + batch_predict_max_replica_count=batch_predict_max_replica_count, + batch_predict_explanation_metadata=batch_predict_explanation_metadata, + batch_predict_explanation_parameters=batch_predict_explanation_parameters, + batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, + batch_predict_accelerator_type=batch_predict_accelerator_type, + batch_predict_accelerator_count=batch_predict_accelerator_count, + evaluation_prediction_score_column=evaluation_prediction_score_column, dataflow_machine_type=dataflow_machine_type, - dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_max_num_workers=dataflow_max_num_workers, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_service_account=dataflow_service_account, dataflow_subnetwork=dataflow_subnetwork, @@ -454,14 +881,3 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, ) - ModelImportEvaluationOp( - regression_metrics=eval_task.outputs['evaluation_metrics'], - feature_attributions=feature_attribution_task.outputs[ - 'feature_attributions' - ], - model=get_model_task.outputs['model'], - dataset_type=batch_predict_instances_format, - dataset_path=batch_predict_bigquery_source_uri, - dataset_paths=batch_predict_gcs_source_uris, - display_name=evaluation_display_name, - ) From f07aef7cb84249dcb788e49cfa5669670ac54698 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 7 Aug 2023 15:53:17 -0700 Subject: [PATCH 065/253] chore(components): release GCPC v2.1.1 [roll forward] PiperOrigin-RevId: 554617624 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 4 ++++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 0df9dedf2f..71ad870a43 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index c02e27f5a7..86e80b9fa7 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,9 @@ ## Upcoming release +## Release 2.1.1 +* Add `preview.model_evaluation.FeatureAttributionGraphComponentOp` pipeline +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.1.0 * Add AutoML tabular and forecasting components to `preview` namespace * Fix bug where `parent_model` parameter of `ModelUploadOp` ignored diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index c600b1005b..bbdf86081e 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.1", + "title": "2.1.1", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.0", "title": "2.1.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 50bf7a59b5..d60161b3c4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.1.0" +__version__ = "2.1.1" From 671bb0ca93fe14521730e2977d5fb26c8c175c63 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Mon, 7 Aug 2023 16:57:28 -0700 Subject: [PATCH 066/253] docs(components): Name change and update LLM safety bias metrics component documentation PiperOrigin-RevId: 554634603 --- .../model_evaluation/__init__.py | 4 +- .../rai_safety/llm_safety_bias/component.py | 89 +++++++++---------- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index bc66ca0531..ddd8f3bf37 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -23,7 +23,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as ModelEvaluationTextGenerationOp -from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_metrics_bias as SafetyMetricsOp +from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -36,6 +36,6 @@ 'ModelEvaluationTextGenerationOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', - 'SafetyMetricsOp', + 'LLMSafetyBiasMetricsOp', 'TargetFieldDataRemoverOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py index 0fac124abf..b5d16cc961 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py @@ -1,6 +1,5 @@ -"""Python LLM Safety Model Evaluation component used in KFP pipelines.""" +"""LLM Safety Bias Metrics component used in KFP pipelines.""" -from typing import List, Optional from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils @@ -14,63 +13,60 @@ @container_component -def llm_safety_metrics_bias( +def llm_safety_bias_metrics( gcp_resources: OutputPath(str), - bias_llm_metrics: Output[Artifact], + llm_safety_bias_evaluation_metrics: Output[Artifact], project: str, location: str = 'us-central1', slice_spec_gcs_source: str = '', predictions_gcs_source: str = '', - display_name: str = 'llm_safety_bias_component', + display_name: str = 'llm-safety-bias-component', machine_type: str = 'e2-highmem-16', service_account: str = '', network: str = '', - reserved_ip_ranges: Optional[List[str]] = None, encryption_spec_key_name: str = '', ): - """Reports aggregated safety metrics from a model's predictions based on specified data slices. + """Aggregates LLM safety bias metrics based on specified data slices. Args: - project (str): Required. Project to run the component. - location (Optional[str]): Location for running the component. If not set, - defaulted to `us-central1`. - slice_spec_gcs_source (Optional[str]): Google Cloud Storage location to - file with jsonl slice spec definition. - predictions_gcs_source (Optional[str]): A storage URI pointing toward a - GCS file or directory with prediction results to be used for this - evaluation. - display_name (Optional[str]): The name of the Evaluation job. - machine_type (Optional[str]): The machine type of this custom job. If not - set, defaulted to `e2-highmem-16`. More details: - https://cloud.google.com/compute/docs/machine-resource - service_account (Optional[str]): Sets the default service account for - workload run-as account. The service account running the pipeline - (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - submitting jobs must have act-as permission on this run-as account. If - unspecified, the Vertex AI Custom Code Service - Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - network (Optional[str]): The full name of the Compute Engine network to - which the job should be peered. For example, - projects/12345/global/networks/myVPC. Format is of the form - projects/{project}/global/networks/{network}. Where {project} is a - project number, as in 12345, and {network} is a network name. Private - services access must already be configured for the network. If left - unspecified, the job is not peered with any network. - reserved_ip_ranges (Optional[Sequence[str]]): A list of names for the - reserved ip ranges under the VPC network that can be used for this job. - If set, we will deploy the job within the provided ip ranges. Otherwise, - the job will be deployed to any ip ranges under the provided VPC - network. - encryption_spec_key_name (Optional[str]): Customer-managed encryption key - options for the CustomJob. If this is set, then all resources created by - the CustomJob will be encrypted with the provided encryption key. + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + slice_spec_gcs_source: Google Cloud Storage location to file with JSONL + slicing spec definition. + predictions_gcs_source: A storage URI pointing toward a GCS file or + directory with prediction results to be used for this evaluation. + display_name: The display name of the evaluation custom job. + machine_type: The machine type of this custom job. If not set, defaulted to + ``e2-highmem-16``. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. Returns: - bias_llm_metrics (system.Artifact): - Artifact tracking the LLM model bias detection output. - gcp_resources (str): - Serialized gcp_resources proto tracking the custom job. + llm_safety_bias_evaluation_metrics: ``Artifact`` tracking the LLM safety + bias evaluation metrics output. + gcp_resources: Serialized gcp_resources proto tracking the custom job. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ return gcpc_utils.build_serverless_customjob_container_spec( project=project, @@ -83,12 +79,11 @@ def llm_safety_metrics_bias( f'--safety_metrics={True}', f'--predictions_gcs_source={predictions_gcs_source}', f'--slice_spec_gcs_source={slice_spec_gcs_source}', - f'--bias_llm_metrics={bias_llm_metrics.path}', + f'--bias_llm_metrics={llm_safety_bias_evaluation_metrics.path}', '--executor_input={{$.json_escape[1]}}', ], service_account=service_account, network=network, - reserved_ip_ranges=reserved_ip_ranges, encryption_spec_key_name=encryption_spec_key_name, ), gcp_resources=gcp_resources, From f8c1f9cf21d2472017c1a07319d3fab4c22fa7b5 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Mon, 7 Aug 2023 17:48:53 -0700 Subject: [PATCH 067/253] feat(components): Internal change PiperOrigin-RevId: 554646568 --- .../model_evaluation/__init__.py | 4 +- .../llm_evaluation/component.py | 133 +++++++++--------- 2 files changed, 68 insertions(+), 69 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index ddd8f3bf37..f33517e78b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -22,7 +22,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp -from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as ModelEvaluationTextGenerationOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -33,7 +33,7 @@ 'ErrorAnalysisAnnotationOp', 'EvaluatedAnnotationOp', 'FeatureExtractorOp', - 'ModelEvaluationTextGenerationOp', + 'LLMEvaluationTextGenerationOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', 'LLMSafetyBiasMetricsOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py index f909592dba..1144875449 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -19,15 +19,17 @@ from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp import dsl -from kfp.dsl import container_component +from kfp.dsl import Metrics +from kfp.dsl import Output +from kfp.dsl import OutputPath -@container_component +@dsl.container_component def model_evaluation_text_generation( - gcp_resources: dsl.OutputPath(str), - evaluation_metrics: dsl.Output[dsl.Metrics], + gcp_resources: OutputPath(str), + evaluation_metrics: Output[Metrics], project: str, - location: str = 'us-central1', + location: str, evaluation_task: str = 'text-generation', target_field_name: str = 'instance.ground_truth', prediction_field_name: str = 'predictions.content', @@ -45,72 +47,69 @@ def model_evaluation_text_generation( """Computes evaluation metrics of a text generation model. Supports evaluating large language models performing the following generative - tasks: - `summarization`,`question-answering`,`text-generation` + tasks: ``summarization``, ``question-answering``, and ``text-generation``. Args: - project (str): Required. Project to run the component. - location (Optional[str]): Location for running the component. If not set, - defaulted to `us-central1`. - evaluation_task (Optional[str]): The task that the large language model - will be evaluated on. The evaluation component computes a set of metrics - relevant to that specific task. Currently supported tasks are: - `summarization`,`question-answering`,`text-generation`. - target_field_name (Optional[str]): The full name path of the features - target field in the predictions file. Formatted to be able to find - nested columns, delimited by `.`. Alternatively referred to as the - ground truth (or ground_truth_column) field. If not set, defaulted to - `inputs.ground_truth`. - prediction_field_name (Optional[str]): The full name path of the - prediction field in the prediction file. Formatted to be able to find - nested columns, delimited by `.`. If not set, defaulted to - `predictions.content`. - predictions_format (Optional[str]): The file format for the LLM Batch - Prediction results. `jsonl` is currently the only allowed format. If not - set, defaulted to `jsonl`. - joined_predictions_gcs_source (Optional[str]): A storage URI pointing - toward a GCS directory or a GCS file with joined prediction & ground - truth files to be used for this evaluation. - predictions_gcs_source (Optional[str]): A storage URI pointing toward a - GCS directory with only prediction files to be used for this evaluation. - ground_truth_gcs_source (Optional[str]): A storage URI pointing toward a - GCS directory with only ground truth files to be used for this - evaluation. - display_name (Optional[str]): The name of the Evaluation job. - machine_type (Optional[str]): The machine type of this custom job. If not - set, defaulted to `e2-highmem-16`. More details: - https://cloud.google.com/compute/docs/machine-resource - service_account (Optional[str]): Sets the default service account for - workload run-as account. The service account running the pipeline - (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - submitting jobs must have act-as permission on this run-as account. If - unspecified, the Vertex AI Custom Code Service - Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - network (Optional[str]): The full name of the Compute Engine network to - which the job should be peered. For example, - projects/12345/global/networks/myVPC. Format is of the form - projects/{project}/global/networks/{network}. Where {project} is a - project number, as in 12345, and {network} is a network name. Private - services access must already be configured for the network. If left - unspecified, the job is not peered with any network. - reserved_ip_ranges (Optional[Sequence[str]]): A list of names for the - reserved ip ranges under the VPC network that can be used for this job. - If set, we will deploy the job within the provided ip ranges. Otherwise, - the job will be deployed to any ip ranges under the provided VPC - network. - encryption_spec_key_name (Optional[str]): Customer-managed encryption key - options for the CustomJob. If this is set, then all resources created by - the CustomJob will be encrypted with the provided encryption key. + project: The GCP project that runs the pipeline component. + location: The GCP region that runs the pipeline component. + evaluation_task: The task that the large language model will be evaluated + on. The evaluation component computes a set of metrics relevant to that + specific task. Currently supported tasks are: ``summarization``, + ``question-answering`, and ``text-generation``. + target_field_name: The full name path of the features target field in the + predictions file. Formatted to be able to find nested columns, delimited + by ``.``. Alternatively referred to as the ground truth (or + ground_truth_column) field. If not set, defaulted to + ``inputs.ground_truth``. + prediction_field_name: The full name path of the prediction field in the + prediction file. Formatted to be able to find nested columns, delimited by + ``.``. If not set, defaulted to ``predictions.content``. + predictions_format: The file format for the LLM Batch Prediction results. + ``jsonl`` is currently the only allowed format. If not set, defaulted to + ``jsonl``. + joined_predictions_gcs_source: A storage URI pointing toward a GCS directory + or a GCS file with joined prediction & ground truth files to be used for + this evaluation. + predictions_gcs_source: A storage URI pointing toward a GCS directory with + only prediction files to be used for this evaluation. + ground_truth_gcs_source: A storage URI pointing toward a GCS directory with + only ground truth files to be used for this evaluation. + display_name: The name of the evaluation custom job. + machine_type: The machine type of this custom job. If not set, defaulted to + ``e2-highmem-16``. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC + network that can be used for this job. If set, we will deploy the job + within the provided ip ranges. Otherwise, the job will be deployed to any + ip ranges under the provided VPC network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. Returns: - evaluation_metrics (system.Metrics): - A Metrics artifact representing the language model evaluation metrics. - gcp_resources (str): - Serialized gcp_resources proto tracking the custom job. - - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + evaluation_metrics: ``Metrics`` artifact representing the language model + evaluation metrics. + gcp_resources: Serialized gcp_resources proto tracking the custom job. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ return gcpc_utils.build_serverless_customjob_container_spec( project=project, From 1a2a752dc9048d5144d0a0990420a4a6ae2b0a2e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 7 Aug 2023 18:46:51 -0700 Subject: [PATCH 068/253] chore(components): internal PiperOrigin-RevId: 554658985 --- .../google_cloud_pipeline_components.gwsq | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 components/google-cloud/google_cloud_pipeline_components.gwsq diff --git a/components/google-cloud/google_cloud_pipeline_components.gwsq b/components/google-cloud/google_cloud_pipeline_components.gwsq deleted file mode 100644 index b221919d16..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components.gwsq +++ /dev/null @@ -1,14 +0,0 @@ -// corresponds to g/google-cloud-pipeline-components-approvers -send_cls_to('google-cloud-pipeline-components-approvers'); - -define Main { - // corresponds to ganpati google-cloud-pipeline-components-approvers.prod - list RequiredGcpcApprovers = mdb('google-cloud-pipeline-components-approvers'); - - // WANT_LGTM from a GCPC approver if the author is not a GCPC approver, a GCPC approver is not already assigned, and the author has begun assigning reviewers to their CL (not a WIP anymore) - if (!author_in(RequiredGcpcApprovers) && len(current_reviewers()) > 0) { - string SelectedApprover = list_to_string(select_from(RequiredGcpcApprovers)); - set_tag('WANT_LGTM', SelectedApprover, append=True); - } - -} \ No newline at end of file From 37b907f2a448e7b1da05b23172d956da08a3af5b Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 7 Aug 2023 19:46:55 -0700 Subject: [PATCH 069/253] test(sdk): add KFP SDK upgrade presubmit test (#9827) --- test/presubmit-test-sdk-upgrade.sh | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 test/presubmit-test-sdk-upgrade.sh diff --git a/test/presubmit-test-sdk-upgrade.sh b/test/presubmit-test-sdk-upgrade.sh new file mode 100755 index 0000000000..1ed8d923ee --- /dev/null +++ b/test/presubmit-test-sdk-upgrade.sh @@ -0,0 +1,31 @@ +#!/bin/bash -ex +# Copyright 2023 Kubeflow Pipelines contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +python3 -m pip install --upgrade pip + +python3 -m pip install kfp +LATEST_KFP_SDK_RELEASE=$(pip show kfp | grep "Version:" | awk '{print $2}' | awk '{$1=$1};1') +echo "Installed latest KFP SDK version: $LATEST_KFP_SDK_RELEASE" + +# install in normal mode, not editable mode, to emulate typical user upgrade behavior +pip3 install sdk/python/kfp-dsl +pip3 install sdk/python +HEAD_KFP_SDK_VERSION=$(pip show kfp | grep "Version:" | awk '{print $2}') +echo "Successfully upgraded to KFP SDK version @ HEAD: $HEAD_KFP_SDK_VERSION" + +python -c 'import kfp' +echo "Successfully ran 'import kfp' @ HEAD: $HEAD_KFP_SDK_VERSION" From f80d2b30e4c7d05c0f511f3bf3d6dd102a9a578a Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Tue, 8 Aug 2023 09:44:18 -0700 Subject: [PATCH 070/253] feat(components): Internal change PiperOrigin-RevId: 554853752 --- .../model_evaluation/__init__.py | 2 + .../__init__.py | 14 + .../component.py | 244 ++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index f33517e78b..90571e99d4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -22,6 +22,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -33,6 +34,7 @@ 'ErrorAnalysisAnnotationOp', 'EvaluatedAnnotationOp', 'FeatureExtractorOp', + 'LLMEvaluationClassificationPredictionsPostprocessorOp', 'LLMEvaluationTextGenerationOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/__init__.py new file mode 100644 index 0000000000..522a3bf396 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Evaluation Predictions Postprocessor Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py new file mode 100644 index 0000000000..ea29082db0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py @@ -0,0 +1,244 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM Classification Postprocessor component.""" + +from typing import List, NamedTuple, Optional + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output +from kfp.dsl import OutputPath + + +@dsl.component +def add_json_escape_class_labels(class_labels: list) -> str: + import json + + json_escaped_class_labels = json.dumps(class_labels).replace('"', '\\"') + return json_escaped_class_labels + + +@dsl.container_component +def llm_classification_predictions_postprocessor_internal( + gcp_resources: OutputPath(str), + postprocessed_predictions_gcs_source: Output[Artifact], + postprocessed_class_labels: OutputPath(list), + project: str, + location: str, + batch_prediction_results: Input[Artifact], + class_labels: str, + display_name: str = 'llm-classification-predictions-postprocessor', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + reserved_ip_ranges: Optional[List[str]] = None, + encryption_spec_key_name: str = '', +): + """Postprocesses LLM predictions for evaluating classification task. + + For each output string, find the first appearance of a class label in the + list of classes, and output the index of this class in a one-hot encoding + format for evaluation. If the output string does not contain any class labels + from the list, label it as “UNKNOWN”. + + Constraints + 1. In rare cases, if the model outputs verbose answers like "The topic of + the text is not business, but is health". In this case, the first answer in + the list the model outputs isn't what the model actually chose, and the + postprocessor output would be incorrect. + 2. Cannot handle cases where class names are substrings of each other. For + example, "toxic, nontoxic". + + Args: + project: The GCP project that runs the pipeline component. + location: The GCP region that runs the pipeline component. + batch_prediction_results: An Artifact pointing toward a GCS directory with + prediction files to be used for this component. + class_labels: String that is JSON array escapedclass names for the + target_field, in the same order they appear in the batch predictions input + file. + display_name: The name of the custom job. + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC + network that can be used for this job. If set, we will deploy the job + within the provided ip ranges. Otherwise, the job will be deployed to any + ip ranges under the provided VPC network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this Custom Job will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + + Returns: + postprocessed_predictions_gcs_source: A string URI pointing toward a GCS + directory with postprocessed prediction files to be used for Evaluation + component. + postprocessed_class_labels: The list of class names for the target_field + with an additional field named "UNKNOWN", in the same order they appear in + the batch predictions input file. + gcp_resources: Serialized gcp_resources proto tracking the custom job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=version.LLM_EVAL_IMAGE_TAG, + args=[ + '--postprocessor', + 'true', + '--batch_prediction_results', + batch_prediction_results.path, + '--postprocessed_predictions_gcs_source', + postprocessed_predictions_gcs_source.path, + '--class_labels', + class_labels, + '--postprocessed_class_labels', + postprocessed_class_labels, + '--executor_input', + '{{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + reserved_ip_ranges=reserved_ip_ranges, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) + + +@dsl.pipeline(name='ModelEvaluationLLMClassificationPredictionsPostprocessorOp') +def llm_classification_predictions_postprocessor_graph_component( + project: str, + location: str, + batch_prediction_results: Input[Artifact], + class_labels: List[str], + display_name: str = 'llm-classification-predictions-postprocessor', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + reserved_ip_ranges: Optional[List[str]] = None, + encryption_spec_key_name: str = '', +) -> NamedTuple( + 'outputs', + postprocessed_predictions_gcs_source=Artifact, + postprocessed_class_labels=List[str], +): + """Graph component to postprocess LLM predictions for evaluating classification task. + + For each output string, find the first appearance of a class label in the + list of classes, and output the index of this class in a one-hot encoding + format for evaluation. If the output string does not contain any class labels + from the list, label it as “UNKNOWN”. + + Constraints + 1. In rare cases, if the model outputs verbose answers like "The topic of + the text is not business, but is health". In this case, the first answer in + the list the model outputs isn't what the model actually chose, and the + postprocessor output would be incorrect. + 2. Cannot handle cases where class names are substrings of each other. For + example, "toxic, nontoxic". + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + batch_prediction_results: An Artifact pointing toward a GCS directory with + prediction files to be used for this component. + class_labels: The JSON array of class names for the target_field, in the + same order they appear in the batch predictions input file. + display_name: The name of the custom job. + machine_type: The machine type of this custom job. If not set, defaulted to + ``e2-highmem-16``. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC + network that can be used for this job. If set, we will deploy the job + within the provided ip ranges. Otherwise, the job will be deployed to any + ip ranges under the provided VPC network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this Custom Job will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + + Returns: + NamedTuple: + postprocessed_predictions_gcs_source: A string URI pointing toward a GCS + directory with postprocessed prediction files to be used for Evaluation + component. + postprocessed_class_labels: The list of class names for the target_field + with an additional field named "UNKNOWN", in the same order they appear + in the batch predictions input file. + """ + outputs = NamedTuple( + 'outputs', + postprocessed_predictions_gcs_source=Artifact, + postprocessed_class_labels=List[str], + ) + + postprocessor_task = llm_classification_predictions_postprocessor_internal( + project=project, + batch_prediction_results=batch_prediction_results, + class_labels=add_json_escape_class_labels( + class_labels=class_labels + ).output, + location=location, + display_name=display_name, + machine_type=machine_type, + service_account=service_account, + network=network, + reserved_ip_ranges=reserved_ip_ranges, + encryption_spec_key_name=encryption_spec_key_name, + ) + + return outputs( + postprocessed_predictions_gcs_source=postprocessor_task.outputs[ + 'postprocessed_predictions_gcs_source' + ], + postprocessed_class_labels=postprocessor_task.outputs[ + 'postprocessed_class_labels' + ], + ) From 1dc84534d406e1b3fd683fbc1504587e22d5f5d8 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Tue, 8 Aug 2023 09:46:24 -0700 Subject: [PATCH 071/253] feat(components): Internal change PiperOrigin-RevId: 554854392 --- .../llm_evaluation/component.py | 19 +- .../text_classification_pipeline.py | 195 ++++++++++++++++++ .../text_generation_pipeline.py | 157 ++++++++++++++ .../model_evaluation/version.py | 2 +- 4 files changed, 363 insertions(+), 10 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py index 1144875449..8628bc66fc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -19,6 +19,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp import dsl +from kfp.dsl import Artifact from kfp.dsl import Metrics from kfp.dsl import Output from kfp.dsl import OutputPath @@ -34,8 +35,8 @@ def model_evaluation_text_generation( target_field_name: str = 'instance.ground_truth', prediction_field_name: str = 'predictions.content', predictions_format: str = 'jsonl', - joined_predictions_gcs_source: str = '', - predictions_gcs_source: str = '', + joined_predictions_gcs_source: dsl.Input[Artifact] = None, + predictions_gcs_source: dsl.Input[Artifact] = None, ground_truth_gcs_source: str = '', display_name: str = 'model-evaluation-text-generation', machine_type: str = 'e2-highmem-16', @@ -67,11 +68,11 @@ def model_evaluation_text_generation( predictions_format: The file format for the LLM Batch Prediction results. ``jsonl`` is currently the only allowed format. If not set, defaulted to ``jsonl``. - joined_predictions_gcs_source: A storage URI pointing toward a GCS directory - or a GCS file with joined prediction & ground truth files to be used for - this evaluation. - predictions_gcs_source: A storage URI pointing toward a GCS directory with - only prediction files to be used for this evaluation. + joined_predictions_gcs_source: An Artifact with an URI pointing toward a GCS + directory or a GCS file with joined prediction & ground truth files to be + used for this evaluation. + predictions_gcs_source: An Artifact with an URI pointing toward a GCS + directory with only prediction files to be used for this evaluation. ground_truth_gcs_source: A storage URI pointing toward a GCS directory with only ground truth files to be used for this evaluation. display_name: The name of the evaluation custom job. @@ -123,8 +124,8 @@ def model_evaluation_text_generation( f'--target_field_name={target_field_name}', f'--prediction_field_name={prediction_field_name}', f'--predictions_format={predictions_format}', - f'--joined_predictions_gcs_source={joined_predictions_gcs_source}', - f'--predictions_gcs_source={predictions_gcs_source}', + f'--joined_predictions_gcs_source={joined_predictions_gcs_source.uri}', + f'--predictions_gcs_source={predictions_gcs_source.uri}', f'--ground_truth_gcs_source={ground_truth_gcs_source}', f'--evaluation_metrics_output_path={evaluation_metrics.path}', '--executor_input={{$.json_escape[1]}}', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py new file mode 100644 index 0000000000..b494171d62 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py @@ -0,0 +1,195 @@ +"""Vertex LLM standalone Evaluation for text classification task.""" + +from typing import List, NamedTuple + +from google_cloud_pipeline_components._implementation.model_evaluation import LLMEvaluationClassificationPredictionsPostprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp +from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics +from google_cloud_pipeline_components.types.artifact_types import VertexModel +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp +from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp +from kfp import dsl + + +_PIPELINE_NAME = 'evaluation-llm-classification-pipeline' + + +@dsl.pipeline(name=_PIPELINE_NAME) +def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + target_field_name: str, + batch_predict_gcs_destination_output_uri: str, + model_name: str = 'publishers/google/models/text-bison@001', + evaluation_task: str = 'text-classification', + evaluation_class_labels: List[str] = [], + batch_predict_instances_format: str = 'jsonl', + batch_predict_gcs_source_uris: List[str] = [], + batch_predict_predictions_format: str = 'jsonl', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + dataflow_machine_type: str = 'n1-standard-4', + dataflow_disk_size_gb: int = 50, + dataflow_max_num_workers: int = 5, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +) -> NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, +): + """The LLM Text Classification Evaluation pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + target_field_name: The target field's name. Formatted to be able to find + nested columns, delimited by ``.``. Prefixed with 'instance.' on the + component for Vertex Batch Prediction. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. + model_name: The Model name used to run evaluation. Must be a publisher Model + or a managed Model sharing the same ancestor location. Starting this job + has no impact on any existing deployments of the Model and their + resources. + evaluation_task: The task that the large language model will be evaluated + on. The evaluation component computes a set of metrics relevant to that + specific task. Currently supported Classification tasks is: + ``text-classification``. + evaluation_class_labels: The JSON array of class names for the target_field, + in the same order they appear in the batch predictions input file. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + machine_type: The machine type of the custom jobs in this pipeline. If not + set, defaulted to ``e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. If not set, defaulted to ``50``. + dataflow_max_num_workers: The max number of workers executing the evaluation + run. If not set, defaulted to ``5``. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + + Returns: + NamedTuple: + evaluation_metrics: ClassificationMetrics Artifact for LLM Text + Classification. + evaluation_resource_name: If run on an user's managed VertexModel, the + imported evaluation resource name. Empty if run on a publisher model. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=ClassificationMetrics, + evaluation_resource_name=str, + ) + + get_vertex_model_task = dsl.importer_node.importer( + artifact_uri=( + f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' + ), + artifact_class=VertexModel, + metadata={'resourceName': model_name}, + ) + get_vertex_model_task.set_display_name('get-vertex-model') + + batch_predict_task = ModelBatchPredictOp( + project=project, + location=location, + model=get_vertex_model_task.outputs['artifact'], + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=batch_predict_gcs_source_uris, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + encryption_spec_key_name=encryption_spec_key_name, + ) + + postprocessor_task = LLMEvaluationClassificationPredictionsPostprocessorOp( + project=project, + batch_prediction_results=batch_predict_task.outputs[ + 'gcs_output_directory' + ], + class_labels=evaluation_class_labels, + location=location, + machine_type=machine_type, + network=network, + service_account=service_account, + encryption_spec_key_name=encryption_spec_key_name, + ) + + eval_task = ModelEvaluationClassificationOp( + project=project, + location=location, + class_labels=postprocessor_task.outputs['postprocessed_class_labels'], + target_field_name=target_field_name, + predictions_gcs_source=postprocessor_task.outputs[ + 'postprocessed_predictions_gcs_source' + ], + prediction_label_column='prediction.classes', + prediction_score_column='prediction.scores', + predictions_format=batch_predict_predictions_format, + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + + import_evaluation_task = ModelImportEvaluationOp( + classification_metrics=eval_task.outputs['evaluation_metrics'], + model=get_vertex_model_task.outputs['artifact'], + dataset_type=batch_predict_instances_format, + dataset_paths=batch_predict_gcs_source_uris, + display_name=_PIPELINE_NAME, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py new file mode 100644 index 0000000000..2c7b818c4a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py @@ -0,0 +1,157 @@ +"""Vertex LLM standalone Evaluation for text generation task.""" + +from typing import List, NamedTuple + +from google_cloud_pipeline_components._implementation.model_evaluation import LLMEvaluationTextGenerationOp +from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp +from google_cloud_pipeline_components.types.artifact_types import VertexModel +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp +from kfp import dsl +from kfp.dsl import Metrics + + +_PIPELINE_NAME = 'evaluation-llm-text-generation-pipeline' + + +@dsl.pipeline(name=_PIPELINE_NAME) +def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-value + project: str, + location: str, + batch_predict_gcs_destination_output_uri: str, + model_name: str = 'publishers/google/models/text-bison@001', + evaluation_task: str = 'text-generation', + batch_predict_instances_format: str = 'jsonl', + batch_predict_gcs_source_uris: List[str] = [], + batch_predict_predictions_format: str = 'jsonl', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +) -> NamedTuple( + 'outputs', evaluation_metrics=Metrics, evaluation_resource_name=str +): + """LLM Text Generation Evaluation pipeline. + + This pipeline supports evaluating large language models, publisher or managed + models, performing the following generative tasks: ``summarization``, + ``question-answering``, and ``text-generation``. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. + model_name: The Model name used to run evaluation. Must be a publisher Model + or a managed Model sharing the same ancestor location. Starting this job + has no impact on any existing deployments of the Model and their + resources. + evaluation_task: The task that the large language model will be evaluated + on. The evaluation component computes a set of metrics relevant to that + specific task. Currently supported tasks are: ``summarization``, + ``question-answering``, ``text-generation``. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. Only "jsonl" is + currently supported. For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. + Only "jsonl" is currently supported. For more details about this output + config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + machine_type: The machine type of this custom job. If not set, defaulted to + ``e2-highmem-16``. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + + Returns: + NamedTuple: + evaluation_metrics: Metrics Artifact for LLM Text Generation. + evaluation_resource_name: If run on an user's managed VertexModel, the + imported evaluation resource name. Empty if run on a publisher model. + """ + outputs = NamedTuple( + 'outputs', + evaluation_metrics=Metrics, + evaluation_resource_name=str, + ) + + get_vertex_model_task = dsl.importer_node.importer( + artifact_uri=( + f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' + ), + artifact_class=VertexModel, + metadata={'resourceName': model_name}, + ) + get_vertex_model_task.set_display_name('get-vertex-model') + + batch_predict_task = ModelBatchPredictOp( + project=project, + location=location, + model=get_vertex_model_task.outputs['artifact'], + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=batch_predict_gcs_source_uris, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + encryption_spec_key_name=encryption_spec_key_name, + ) + + eval_task = LLMEvaluationTextGenerationOp( + project=project, + location=location, + evaluation_task=evaluation_task, + target_field_name='instance.ground_truth', + prediction_field_name='predictions.content', + predictions_format=batch_predict_predictions_format, + joined_predictions_gcs_source=batch_predict_task.outputs[ + 'gcs_output_directory' + ], + display_name=_PIPELINE_NAME, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + + import_evaluation_task = ModelImportEvaluationOp( + metrics=eval_task.outputs['evaluation_metrics'], + model=get_vertex_model_task.outputs['artifact'], + problem_type=evaluation_task, + dataset_type=batch_predict_predictions_format, + dataset_paths=batch_predict_gcs_source_uris, + display_name=_PIPELINE_NAME, + ) + + return outputs( + evaluation_metrics=eval_task.outputs['evaluation_metrics'], + evaluation_resource_name=import_evaluation_task.outputs[ + 'evaluation_resource_name' + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py index 3c40e9ddeb..3bd9886e11 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py @@ -14,7 +14,7 @@ """Version constants for model evaluation components.""" _EVAL_VERSION = 'v0.9.2' -_LLM_EVAL_VERSION = 'v0.1' +_LLM_EVAL_VERSION = 'v0.2' _EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/model-evaluation' _LLM_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/llm-model-evaluation' From d98fa90bff79f3c13853d6ed9044c308253deba4 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 8 Aug 2023 10:54:24 -0700 Subject: [PATCH 072/253] feat: Adding new test infrastructure for e2e pipeline tests PiperOrigin-RevId: 554877529 --- .../google-cloud/ncl/blueprint_defs.ncl | 47 +++++++++++++++++++ .../google-cloud/ncl/blueprint_defs_test.ncl | 45 ++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 components/google-cloud/ncl/blueprint_defs.ncl create mode 100644 components/google-cloud/ncl/blueprint_defs_test.ncl diff --git a/components/google-cloud/ncl/blueprint_defs.ncl b/components/google-cloud/ncl/blueprint_defs.ncl new file mode 100644 index 0000000000..5320f839a8 --- /dev/null +++ b/components/google-cloud/ncl/blueprint_defs.ncl @@ -0,0 +1,47 @@ +include "devtools/blueprint/ncl/blueprint_file.ncl"; +include "testing/integration/config/blueprint_extension.proto"; +include "testing/integration/config/buganizer.proto"; + +def PipelinesBuildableUnitName(service, version, test_type) = + service + "-" + version + "-" + test_type + "-" + "workflows"; + +def PipelinesTargetName(service, version, test_type) = + "//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_" + service + "_" + version + "_" + test_type + "_" + "workflow"; + +def PipelinesBuildableUnit(service, version, test_type) = + ::blueprint::BuildableUnit( + name = "buildable-unit-" + PipelinesBuildableUnitName(service, version, test_type), + test_patterns = [PipelinesTargetName(service, version, test_type)], + enable_release = false, + ); + +def PipelinesContinuousIntegrationTest( + service, + version, + test_type, + throttle_rule = "every 24 hours", + cluster_name = "vertex-pipelines-platform-e2e-test-cluster", + requester = "cloud-aiplatform-guitar", + env_params = [], + bug_component = 1088378) = let + unit_name = PipelinesBuildableUnitName(service, version, test_type); + in + ::blueprint::ContinuousIntegrationTest( + name = "continuous-integration-" + unit_name, + buildable_unit_name = "buildable-unit-" + unit_name, + base_version_spec = "cl:HEAD", + throttle = throttle_rule, + cluster_name = cluster_name, + requester = requester, + env_params = env_params, + notification_info = guitar::update_buganizer_config( + ::blueprint::ContinuousIntegrationNotificationInfo( + email_notification_criteria = ::devtools_blueprint::ContinuousIntegrationNotificationInfo::NotificationCriteria::NO_NOTIFICATION, + ), + ::guitar::BuganizerConfig( + component_id = bug_component, + reporter = "cloud-ml-pipelines+e2e-tests@google.com", + cc = ["cjmccarthy@google.com"], + ), + ), + ); diff --git a/components/google-cloud/ncl/blueprint_defs_test.ncl b/components/google-cloud/ncl/blueprint_defs_test.ncl new file mode 100644 index 0000000000..820b518756 --- /dev/null +++ b/components/google-cloud/ncl/blueprint_defs_test.ncl @@ -0,0 +1,45 @@ +include "third_party/py/google_cloud_pipeline_components/ncl/blueprint_defs.ncl"; + +def test_PipelinesBuildableUnitName() = let + assert PipelinesBuildableUnitName(service = "bigquery", version = "v1", test_type = "public") == "bigquery-v1-public-workflows"; + in (); + +def test_PipelinesTargetName() = let + assert PipelinesTargetName(service = "bigquery", version = "v1", test_type = "public") == "//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_bigquery_v1_public_workflow"; + in (); + +def test_PipelinesBuildableUnit() = let + buildable_unit = ::blueprint::BuildableUnit( + name = "buildable-unit-bigquery-v1-public-workflows", + test_patterns = ["//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_bigquery_v1_public_workflow"], + enable_release = false, + ); + assert PipelinesBuildableUnit( + service = "bigquery", + version = "v1", + test_type = "public", + ) == buildable_unit; + in (); + +def test_PipelinesContinuousIntegrationTest() = let + continuous_test = ::blueprint::ContinuousIntegrationTest( + name = "continuous-integration-bigquery-v1-public-workflows", + buildable_unit_name = "buildable-unit-bigquery-v1-public-workflows", + base_version_spec = "cl:HEAD", + throttle = "every 24 hours", + cluster_name = "vertex-pipelines-platform-e2e-test-cluster", + requester = "cloud-aiplatform-guitar", + env_params = [], + notification_info = guitar::update_buganizer_config( + ::blueprint::ContinuousIntegrationNotificationInfo( + email_notification_criteria = ::devtools_blueprint::ContinuousIntegrationNotificationInfo::NotificationCriteria::NO_NOTIFICATION, + ), + ::guitar::BuganizerConfig( + component_id = 1088378, + reporter = "cloud-ml-pipelines+e2e-tests@google.com", + cc = ["cjmccarthy@google.com"], + ), + ), + ); + assert PipelinesContinuousIntegrationTest(service = "bigquery", version = "v1", test_type = "public") == continuous_test; + in (); From 82386d790690c7d1894adf9fa2186714c92ccd8a Mon Sep 17 00:00:00 2001 From: gkcalat <35157096+gkcalat@users.noreply.github.com> Date: Tue, 8 Aug 2023 12:06:56 -0700 Subject: [PATCH 073/253] chore: Add config for stale GHA (#9817) * Add config for stale GHA As [stale-bot is deprecated](https://github.com/probot/stale), we need to migrate to [Stale GHA](https://github.com/actions/stale) * Mimic current logic in stalebot --- .github/workflows/stale.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 0000000000..725e6b632c --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,37 @@ +# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/actions/stale +name: Mark stale issues and pull requests + +on: + schedule: + - cron: '39 7 * * *' + +jobs: + stale: + + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@v5 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-stale: -1 + days-before-close: -1 + days-before-issue-stale: 90 + days-before-issue-close: 90 + stale-issue-message: > + This issue has been automatically marked as stale because it has not had + recent activity. It will be closed if no further activity occurs. Thank you + for your contributions. + close-issue-message: > + This issue has been automatically closed because it has not had recent + activity. Please comment "/reopen" to reopen it. + stale-issue-label: 'lifecycle/stale' + exempt-issue-labels: lifecycle/frozen + exempt-pr-labels: lifecycle/frozen From 443a67438ad1a2fba0f284ede6f1378aaee2a549 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:33 -0700 Subject: [PATCH 074/253] test(frontend): Improve v1 frontend-integration-test (#9811) * test: access specific field via "id". * test: access specific field via "id". * update snapshots. * Add test to validata description changes. * Change id def for input tag. * Skip unit tests * Add unit tests back and change id * Add a helper for clear default value in input field. Remove unnecessary 'tab' command * Add comment to explain the parameter name for tests Remove default value for runName before typing new value --- frontend/src/pages/NewRun.tsx | 2 + .../pages/__snapshots__/NewRun.test.tsx.snap | 16 ++++++ .../helloworld.spec.js | 51 +++++++++---------- 3 files changed, 43 insertions(+), 26 deletions(-) diff --git a/frontend/src/pages/NewRun.tsx b/frontend/src/pages/NewRun.tsx index 0d86fe6eef..3ac7aded21 100644 --- a/frontend/src/pages/NewRun.tsx +++ b/frontend/src/pages/NewRun.tsx @@ -466,6 +466,7 @@ export class NewRun extends Page { {/* Run metadata inputs */} { variant='outlined' /> { before(async () => { await browser.url('/'); @@ -49,7 +54,9 @@ describe('deploy helloworld sample run', () => { await $('#localPackageBtn').click(); const remoteFilePath = await browser.uploadFile('./helloworld.yaml'); await $('#dropZone input[type="file"]').addValue(remoteFilePath); - await $('#newPipelineName').setValue(pipelineName); + await $('#newPipelineName').click(); + await clearDefaultInput() + await browser.keys(pipelineName) await $('#createNewPipelineOrVersionBtn').click(); await browser.waitUntil(async () => { return new URL(await browser.getUrl()).hash.startsWith('#/pipelines/details'); @@ -99,22 +106,16 @@ describe('deploy helloworld sample run', () => { reverse: true, }); - await browser.keys('Tab'); + await $('#runNameInput').click(); + await clearDefaultInput() await browser.keys(runName); - await browser.keys('Tab'); + await $('#descriptionInput').click(); await browser.keys(runDescription); - - // Skip over "choose experiment" button - await browser.keys('Tab'); - // Skip over service account help button - await browser.keys('Tab'); - // Skip over "service account" textbox - await browser.keys('Tab'); - // Skip over "Run Type" radio button - await browser.keys('Tab'); - - await browser.keys('Tab'); + + // the parameter name is "message" in this testing pipeline + await $('input#newRunPipelineParam0').click(); + await clearDefaultInput() await browser.keys(outputParameterValue); // Deploy @@ -179,6 +180,11 @@ describe('deploy helloworld sample run', () => { ); }); + it('displays run description inputs correctly', async () => { + const descriptionValue = await getValueFromDetailsTable('Description'); + assert.equal(descriptionValue, runDescription, 'run description is not shown correctly'); + }); + it('displays run inputs correctly', async () => { const paramValue = await getValueFromDetailsTable('message'); assert.equal(paramValue, outputParameterValue, 'run message is not shown correctly'); @@ -230,21 +236,14 @@ describe('deploy helloworld sample run', () => { await $('#pipelineSelectorDialog').waitForDisplayed({ timeout: waitTimeout, reverse: true }); + await $('#runNameInput').click(); await browser.keys(runWithoutExperimentName); - await browser.keys('Tab'); + await $('#descriptionInput').click(); await browser.keys(runWithoutExperimentDescription); - - // Skip over "choose experiment" button - await browser.keys('Tab'); - // Skip over service account help button - await browser.keys('Tab'); - // Skip over "service account" textbox - await browser.keys('Tab'); - // Skip over "Run Type" radio button - await browser.keys('Tab'); - - await browser.keys('Tab'); + + await $('input#newRunPipelineParam0').click(); + await clearDefaultInput() await browser.keys(outputParameterValue); // Deploy From f9e9efab8c1e33570ef4378cb8a0079db830f464 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 8 Aug 2023 15:52:38 -0700 Subject: [PATCH 075/253] chore(components): internal PiperOrigin-RevId: 554966570 --- .../google-cloud/ncl/blueprint_defs.ncl | 47 ------------------- .../google-cloud/ncl/blueprint_defs_test.ncl | 45 ------------------ 2 files changed, 92 deletions(-) delete mode 100644 components/google-cloud/ncl/blueprint_defs.ncl delete mode 100644 components/google-cloud/ncl/blueprint_defs_test.ncl diff --git a/components/google-cloud/ncl/blueprint_defs.ncl b/components/google-cloud/ncl/blueprint_defs.ncl deleted file mode 100644 index 5320f839a8..0000000000 --- a/components/google-cloud/ncl/blueprint_defs.ncl +++ /dev/null @@ -1,47 +0,0 @@ -include "devtools/blueprint/ncl/blueprint_file.ncl"; -include "testing/integration/config/blueprint_extension.proto"; -include "testing/integration/config/buganizer.proto"; - -def PipelinesBuildableUnitName(service, version, test_type) = - service + "-" + version + "-" + test_type + "-" + "workflows"; - -def PipelinesTargetName(service, version, test_type) = - "//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_" + service + "_" + version + "_" + test_type + "_" + "workflow"; - -def PipelinesBuildableUnit(service, version, test_type) = - ::blueprint::BuildableUnit( - name = "buildable-unit-" + PipelinesBuildableUnitName(service, version, test_type), - test_patterns = [PipelinesTargetName(service, version, test_type)], - enable_release = false, - ); - -def PipelinesContinuousIntegrationTest( - service, - version, - test_type, - throttle_rule = "every 24 hours", - cluster_name = "vertex-pipelines-platform-e2e-test-cluster", - requester = "cloud-aiplatform-guitar", - env_params = [], - bug_component = 1088378) = let - unit_name = PipelinesBuildableUnitName(service, version, test_type); - in - ::blueprint::ContinuousIntegrationTest( - name = "continuous-integration-" + unit_name, - buildable_unit_name = "buildable-unit-" + unit_name, - base_version_spec = "cl:HEAD", - throttle = throttle_rule, - cluster_name = cluster_name, - requester = requester, - env_params = env_params, - notification_info = guitar::update_buganizer_config( - ::blueprint::ContinuousIntegrationNotificationInfo( - email_notification_criteria = ::devtools_blueprint::ContinuousIntegrationNotificationInfo::NotificationCriteria::NO_NOTIFICATION, - ), - ::guitar::BuganizerConfig( - component_id = bug_component, - reporter = "cloud-ml-pipelines+e2e-tests@google.com", - cc = ["cjmccarthy@google.com"], - ), - ), - ); diff --git a/components/google-cloud/ncl/blueprint_defs_test.ncl b/components/google-cloud/ncl/blueprint_defs_test.ncl deleted file mode 100644 index 820b518756..0000000000 --- a/components/google-cloud/ncl/blueprint_defs_test.ncl +++ /dev/null @@ -1,45 +0,0 @@ -include "third_party/py/google_cloud_pipeline_components/ncl/blueprint_defs.ncl"; - -def test_PipelinesBuildableUnitName() = let - assert PipelinesBuildableUnitName(service = "bigquery", version = "v1", test_type = "public") == "bigquery-v1-public-workflows"; - in (); - -def test_PipelinesTargetName() = let - assert PipelinesTargetName(service = "bigquery", version = "v1", test_type = "public") == "//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_bigquery_v1_public_workflow"; - in (); - -def test_PipelinesBuildableUnit() = let - buildable_unit = ::blueprint::BuildableUnit( - name = "buildable-unit-bigquery-v1-public-workflows", - test_patterns = ["//third_party/py/google_cloud_pipeline_components/google/centralized_testing:gcpc_bigquery_v1_public_workflow"], - enable_release = false, - ); - assert PipelinesBuildableUnit( - service = "bigquery", - version = "v1", - test_type = "public", - ) == buildable_unit; - in (); - -def test_PipelinesContinuousIntegrationTest() = let - continuous_test = ::blueprint::ContinuousIntegrationTest( - name = "continuous-integration-bigquery-v1-public-workflows", - buildable_unit_name = "buildable-unit-bigquery-v1-public-workflows", - base_version_spec = "cl:HEAD", - throttle = "every 24 hours", - cluster_name = "vertex-pipelines-platform-e2e-test-cluster", - requester = "cloud-aiplatform-guitar", - env_params = [], - notification_info = guitar::update_buganizer_config( - ::blueprint::ContinuousIntegrationNotificationInfo( - email_notification_criteria = ::devtools_blueprint::ContinuousIntegrationNotificationInfo::NotificationCriteria::NO_NOTIFICATION, - ), - ::guitar::BuganizerConfig( - component_id = 1088378, - reporter = "cloud-ml-pipelines+e2e-tests@google.com", - cc = ["cjmccarthy@google.com"], - ), - ), - ); - assert PipelinesContinuousIntegrationTest(service = "bigquery", version = "v1", test_type = "public") == continuous_test; - in (); From 7bb24e70a7dad82228ddc21b23aa07d4982d98a2 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 8 Aug 2023 22:11:26 -0700 Subject: [PATCH 076/253] chore(components): clean up PiperOrigin-RevId: 555043188 --- components/google-cloud/OWNERS | 11 -- components/google-cloud/docs/Makefile | 20 --- .../google-cloud/docs/add_gcpc_version.sh | 26 --- .../v1/forecasting/README.md | 152 ------------------ 4 files changed, 209 deletions(-) delete mode 100644 components/google-cloud/OWNERS delete mode 100644 components/google-cloud/docs/Makefile delete mode 100644 components/google-cloud/docs/add_gcpc_version.sh delete mode 100644 components/google-cloud/google_cloud_pipeline_components/v1/forecasting/README.md diff --git a/components/google-cloud/OWNERS b/components/google-cloud/OWNERS deleted file mode 100644 index 4d945bea26..0000000000 --- a/components/google-cloud/OWNERS +++ /dev/null @@ -1,11 +0,0 @@ -# Google Cloud components - MB SDK -approvers: - - chensun - - IronPan - - neuromage - - sasha-gitg - - sinachavoshi -reviewers: - - chensun - - sasha-gitg - - sinachavoshi diff --git a/components/google-cloud/docs/Makefile b/components/google-cloud/docs/Makefile deleted file mode 100644 index d0c3cbf102..0000000000 --- a/components/google-cloud/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/components/google-cloud/docs/add_gcpc_version.sh b/components/google-cloud/docs/add_gcpc_version.sh deleted file mode 100644 index 836c9c9a1f..0000000000 --- a/components/google-cloud/docs/add_gcpc_version.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# read the current version from environment variable -GCPC_VERSION=$1 -SCRIPT_DIR=$(dirname "$0") - -# check if jq is installed -if ! command -v jq &> /dev/null -then - echo "jq could not be found" - echo "Please install jq using the following command:" - echo "sudo apt-get install jq" - exit -fi - -# create a new JSON object -new_version=$(cat < $SCRIPT_DIR/temp.json && mv $SCRIPT_DIR/temp.json $SCRIPT_DIR/source/versions.json diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/README.md b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/README.md deleted file mode 100644 index b28f8f8fdc..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# Forecasting Components Inputs - -## input_tables - -**input_tables** is a seriazlied JSON array required by both ForecastingPreprocessingOp and ForecastingValidationOp. - -Proto definition of TableSpecs: -```protobuf -// Desribes a BigQuery user input table for Vertex AI validation, preprocessing -// and training. -message TableSpecs { - // [Required] BigQuery table path of the table. e.g.: - // bq://projectId.datasetId.tableId - string bigquery_uri = 1; - - // [Required] The table type from the eligible types: FORECASTING_PRIMARY, - // FORECASTING_ATTRIBUTE and FORECASTING_PLAN - string table_type = 2; - - // Some table types require additional information about the table. If - // table_type is FORECASTING_PRIMARY, forecasting_primary_table_metadata is - // required. If table_type is FORECASTING_ATTRIBUTE, - // forecasting_attribute_table_metadata is required. - oneof metadata { - ForecastingPrimaryTableMetadata forecasting_primary_table_metadata = 3; - ForecastingAttributeTableMetadata forecasting_attribute_table_metadata = 4; - } -} - -// The metadata that desribes the primary table in Vertex forecasting. -// -// The primary table must contain historical data at the granularity it will -// predict at. For example, if the task is to predict daily sales, this table -// should have a target column with historical sales data at daily granularity. -// -// One or more time series identifier columns are needed in this table. If this -// table has 2 time series identifier columns - "channel_id" and "product_id", -// a time series will be identified by the combination of these 2 columns. -// -// A time column must be present in this table with DATE, DATETIME or TIMESTAMP -// type that reflects the specified granularity. -// -// Two rows cannot have the same value in both the time column and the time -// series identifier column(s). -// -// Except for the time series identifier column(s), every column in the -// primary table will be considered time variant. For example, a holiday -// column or promotion column could have different values at different time -// given a specific time series identifier. If a column has fixed value given a -// time series identifier, i.e. the color of a product given the product ID as -// time series identifier, the column should be moved to the attribute table. -message ForecastingPrimaryTableMetadata { - // [Required] The name of the column that identifies time order in the time - // series. - string time_column = 1; - // [Required] The name of the column that the model is to predict. - string target_column = 2; - // [Required] Names of columns that jointly identify the time series. - repeated string time_series_identifier_columns = 3; - // [Optional] Names of columns that are unavailable when a forecast is - // requested. This column contains information for the given entity - // (identified by the time_series_identifier_columns) that is unknown before - // the forecast For example, actual weather on a given day. - repeated string unavailable_at_forecast_columns = 4; - - // [Required] The granularity of time units presented in the time_column. - TimeGranularity time_granularity = 5; - // [Optional] The name of the column that splits the table. Eligible values - // are: TRAIN, VALIDATE, TEST - string predefined_splits_column = 6; - // [Optional] The name of the column that measures the importance of the row. - // Higher weight values give more importance to the corresponding row during - // model training. For example, to let the model pay more attention to - // holidays, the holiday rows can have weight value 1.0 and the rest rows have - // a weight value 0.5. Weight value must be >= 0. 0 means ignored in training, - // validation or test. If not specified, all rows will have an equal weight of - // 1. - string weight_column = 7; -} - -// A duration of time expressed in time granularity units. -message TimeGranularity { - // [Required] The unit of this time period. Eligible values are: MINUTE, HOUR, - // DAY, WEEK, MONTH, YEAR - string unit = 1; - // [Required] The number of units per period, e.g. 3 weeks or 2 months. - int64 quantity = 2; -} - -// The metadata that desribes the attribute table in Vertex forecasting. -// -// Attribute table contains features that desribe time series that are not -// changed with time. For example, if the primary table has 2 -// time_series_identifier_columns columns - product_id and channel_id, an -// optional attribute table can provide product attributes such as category, -// color etc. -// -// The attribute table should have a single identifier column that is the same -// as one of the time_series_identifier_columns in -// ForecastingPrimaryTableMetadata. -message ForecastingAttributeTableMetadata { - // [Required] The name of the primary key column. - string primary_key_column = 1; -} -``` - -# Example pipeline using ForecastingPreprocessingOp and ForecastingValidationOp -```python -import json -from google_cloud_pipeline_components.v1 import forecasting - - -primary_table_specs = { - "bigquery_uri": "bq://endless-forms-most-beautiful.iowa_liquor_sales_forecast.sales_table", - "table_type": "FORECASTING_PRIMARY", - "forecasting_primary_table_metadata": { - "time_column": "datetime", - "target_column": "gross_quantity", - "time_series_identifier_columns": ["product_id", "location_id"], - "unavailable_at_forecast_columns": ['sale_dollars', 'state_bottle_cost', 'state_bottle_retail'], - "time_granularity": {"unit": "DAY", "quantity": 1 }, - "predefined_splits_column": "ml_use" - } -} - -attribute_table_specs1 = { - "bigquery_uri": "bq://endless-forms-most-beautiful.iowa_liquor_sales_forecast.product_table", - "table_type": "FORECASTING_ATTRIBUTE", - "forecasting_attribute_table_metadata": { - "primary_key_column": "product_id" - } -} - -attribute_table_specs2 = { - "bigquery_uri": "bq://endless-forms-most-beautiful.iowa_liquor_sales_forecast.location_table", - "table_type": "FORECASTING_ATTRIBUTE", - "forecasting_attribute_table_metadata": { - "primary_key_column": "location_id" - } -} - -input_table_specs = [primary_table_specs, attribute_table_specs1, attribute_table_specs2] -input_tables = json.dumps(input_table_specs) - - -@dsl.pipeline(name='forecasting-pipeline-training') -def pipeline(input_tables: str): - # A workflow consists of training validation and preprocessing: - validation = forecasting.ForecastingValidationOp(input_tables=input_tables, validation_theme='FORECASTING_TRAINING') - preprocess = forecasting.ForecastingPreprocessingOp(project_id='endless-forms-most-beautiful', input_tables=input_tables) - preprocess.after(validation) -``` From 2f32b23c1ec4fe21baa8c234151921ef75536a05 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 9 Aug 2023 19:48:03 -0400 Subject: [PATCH 077/253] chore(test): change hard-coded GKE version to 1.25 (#9841) * update gke version * try stable channel * use 1.25.10-gke.2100 * use 1.25 --- test/deploy-cluster.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/deploy-cluster.sh b/test/deploy-cluster.sh index 0662dd6b04..39436fadd6 100755 --- a/test/deploy-cluster.sh +++ b/test/deploy-cluster.sh @@ -92,7 +92,9 @@ else # reference: https://github.com/kubeflow/pipelines/issues/6696 # Hard-coded GKE to 1.25.10-gke.1200 (the latest 1.25 in STABLE channel). Reference: # https://github.com/kubeflow/pipelines/issues/9704#issuecomment-1622310358 - gcloud container clusters create ${TEST_CLUSTER} --image-type cos_containerd --release-channel stable --cluster-version 1.25.10-gke.1200 ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} + # 08/09/2023 update: 1.25.10-gke.1200 no longer supported, use 1.25.10-gke.2100 instead. Reference: + # https://cloud.google.com/kubernetes-engine/docs/release-notes-nochannel#2023-r17_version_updates + gcloud container clusters create ${TEST_CLUSTER} --image-type cos_containerd --release-channel stable --cluster-version 1.25 ${SCOPE_ARG} ${NODE_POOL_CONFIG_ARG} ${WI_ARG} fi gcloud container clusters get-credentials ${TEST_CLUSTER} From b350ac4ddc32bd699c4cf92e3f6774088fb89f4f Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Thu, 10 Aug 2023 10:07:17 -0700 Subject: [PATCH 078/253] feat(components): Upgrade LLM evaluation classification and text generation pipelines to preview PiperOrigin-RevId: 555540517 --- ...evaluation_llm_classification_pipeline.py} | 29 +++++++++---------- ...valuation_llm_text_generation_pipeline.py} | 27 +++++++++-------- 2 files changed, 27 insertions(+), 29 deletions(-) rename components/google-cloud/google_cloud_pipeline_components/{_implementation/model_evaluation/text_classification_pipeline.py => preview/model_evaluation/evaluation_llm_classification_pipeline.py} (95%) rename components/google-cloud/google_cloud_pipeline_components/{_implementation/model_evaluation/text_generation_pipeline.py => preview/model_evaluation/evaluation_llm_text_generation_pipeline.py} (94%) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py similarity index 95% rename from components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py rename to components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index b494171d62..afd93a668b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -15,16 +15,16 @@ @dsl.pipeline(name=_PIPELINE_NAME) -def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value +def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, target_field_name: str, + batch_predict_gcs_source_uris: List[str], batch_predict_gcs_destination_output_uri: str, model_name: str = 'publishers/google/models/text-bison@001', evaluation_task: str = 'text-classification', evaluation_class_labels: List[str] = [], batch_predict_instances_format: str = 'jsonl', - batch_predict_gcs_source_uris: List[str] = [], batch_predict_predictions_format: str = 'jsonl', machine_type: str = 'e2-highmem-16', service_account: str = '', @@ -49,6 +49,13 @@ def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by ``.``. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. model_name: The Model name used to run evaluation. Must be a publisher Model @@ -65,13 +72,6 @@ def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see @@ -113,11 +113,10 @@ def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value created. Returns: - NamedTuple: - evaluation_metrics: ClassificationMetrics Artifact for LLM Text - Classification. - evaluation_resource_name: If run on an user's managed VertexModel, the - imported evaluation resource name. Empty if run on a publisher model. + evaluation_metrics: ClassificationMetrics Artifact for LLM Text + Classification. + evaluation_resource_name: If run on an user's managed VertexModel, the + imported evaluation resource name. Empty if run on a publisher model. """ outputs = NamedTuple( 'outputs', @@ -125,7 +124,7 @@ def llm_eval_classification_pipeline( # pylint: disable=dangerous-default-value evaluation_resource_name=str, ) - get_vertex_model_task = dsl.importer_node.importer( + get_vertex_model_task = dsl.importer( artifact_uri=( f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' ), diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py similarity index 94% rename from components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py rename to components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index 2c7b818c4a..f7e33159e5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -14,14 +14,14 @@ @dsl.pipeline(name=_PIPELINE_NAME) -def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-value +def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-default-value project: str, location: str, + batch_predict_gcs_source_uris: List[str], batch_predict_gcs_destination_output_uri: str, model_name: str = 'publishers/google/models/text-bison@001', evaluation_task: str = 'text-generation', batch_predict_instances_format: str = 'jsonl', - batch_predict_gcs_source_uris: List[str] = [], batch_predict_predictions_format: str = 'jsonl', machine_type: str = 'e2-highmem-16', service_account: str = '', @@ -39,6 +39,13 @@ def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-valu Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances data to run batch prediction on. The instances data should also + contain the ground truth (target) data, used for evaluation. May contain + wildcards. For more information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. model_name: The Model name used to run evaluation. Must be a publisher Model @@ -53,13 +60,6 @@ def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-valu must be one of the Model's supportedInputStorageFormats. Only "jsonl" is currently supported. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. Only "jsonl" is currently supported. For more details about this output @@ -91,10 +91,9 @@ def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-valu created. Returns: - NamedTuple: - evaluation_metrics: Metrics Artifact for LLM Text Generation. - evaluation_resource_name: If run on an user's managed VertexModel, the - imported evaluation resource name. Empty if run on a publisher model. + evaluation_metrics: Metrics Artifact for LLM Text Generation. + evaluation_resource_name: If run on an user's managed VertexModel, the + imported evaluation resource name. Empty if run on a publisher model. """ outputs = NamedTuple( 'outputs', @@ -102,7 +101,7 @@ def llm_eval_text_generation_pipeline( # pylint: disable=dangerous-default-valu evaluation_resource_name=str, ) - get_vertex_model_task = dsl.importer_node.importer( + get_vertex_model_task = dsl.importer( artifact_uri=( f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' ), From e1f0c010f80031ea09af69f9bbedf2e24509605f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 10 Aug 2023 17:51:05 -0400 Subject: [PATCH 079/253] feat(backend): add postgres initialization (#9798) * add postgres initialization * remove load balancer * go mod tidy * update license * license update for viewer * (test) disable controller license check * (test) disable persistence agence licence check * (test) disable scheduled workflow license check * (test) disable cacheserver license check * fix db config location * fix mysql support * test * test * no long set host address * address comments * address comments and enable license check * format * remove extra blank line * update licenses * cache server license * address comments * centralize error message * remove pv in postgres deployment --- backend/src/apiserver/client/sql.go | 36 ++- .../{ => client_manager}/client_manager.go | 272 ++++++++++++------ backend/src/apiserver/config/config.json | 15 +- backend/src/apiserver/main.go | 7 +- backend/test/integration/README.md | 15 + backend/test/integration/db_test.go | 79 +++++ backend/test/integration/flags.go | 2 + backend/test/integration/run_tests_locally.sh | 25 +- backend/third_party_licenses/apiserver.csv | 10 +- backend/third_party_licenses/cache_server.csv | 10 +- .../persistence_agent.csv | 10 +- backend/third_party_licenses/swf.csv | 10 +- backend/third_party_licenses/viewer.csv | 8 +- go.mod | 5 +- go.sum | 49 +++- .../third-party/postgresql/README.md | 4 +- .../postgresql/base/kustomization.yaml | 2 +- .../postgresql/base/pg-deployment.yaml | 12 +- .../postgresql/base/pg-service.yaml | 7 +- .../postgresql/base/pg-serviceaccount.yaml | 1 - 20 files changed, 428 insertions(+), 151 deletions(-) rename backend/src/apiserver/{ => client_manager}/client_manager.go (63%) create mode 100644 backend/test/integration/db_test.go diff --git a/backend/src/apiserver/client/sql.go b/backend/src/apiserver/client/sql.go index c4d44d61ae..026ef05619 100644 --- a/backend/src/apiserver/client/sql.go +++ b/backend/src/apiserver/client/sql.go @@ -15,13 +15,22 @@ package client import ( + "bytes" "fmt" "github.com/go-sql-driver/mysql" ) -func CreateMySQLConfig(user, password string, mysqlServiceHost string, - mysqlServicePort string, dbName string, mysqlGroupConcatMaxLen string, mysqlExtraParams map[string]string, +const ( + MYSQL_TEXT_FORMAT string = "longtext not null" + MYSQL_EXIST_ERROR string = "database exists" + + PGX_TEXT_FORMAT string = "text" + PGX_EXIST_ERROR string = "already exists" +) + +func CreateMySQLConfig(user, password, mysqlServiceHost, mysqlServicePort, + dbName, mysqlGroupConcatMaxLen string, mysqlExtraParams map[string]string, ) *mysql.Config { params := map[string]string{ "charset": "utf8", @@ -44,3 +53,26 @@ func CreateMySQLConfig(user, password string, mysqlServiceHost string, AllowNativePasswords: true, } } + +func CreatePostgreSQLConfig(user, password, postgresHost, dbName string, postgresPort uint16, +) string { + var b bytes.Buffer + if dbName != "" { + fmt.Fprintf(&b, "database=%s ", dbName) + } + if user != "" { + fmt.Fprintf(&b, "user=%s ", user) + } + if password != "" { + fmt.Fprintf(&b, "password=%s ", password) + } + if postgresHost != "" { + fmt.Fprintf(&b, "host=%s ", postgresHost) + } + if postgresPort != 0 { + fmt.Fprintf(&b, "port=%d ", postgresPort) + } + fmt.Fprint(&b, "sslmode=disable") + + return b.String() +} diff --git a/backend/src/apiserver/client_manager.go b/backend/src/apiserver/client_manager/client_manager.go similarity index 63% rename from backend/src/apiserver/client_manager.go rename to backend/src/apiserver/client_manager/client_manager.go index a02dcf53c0..ec247be375 100644 --- a/backend/src/apiserver/client_manager.go +++ b/backend/src/apiserver/client_manager/client_manager.go @@ -1,4 +1,4 @@ -// Copyright 2018 The Kubeflow Authors +// Copyright 2018-2023 The Kubeflow Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,15 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package clientmanager import ( "database/sql" "fmt" "os" + "strings" "time" "github.com/cenkalti/backoff" + "github.com/go-sql-driver/mysql" "github.com/golang/glog" "github.com/jinzhu/gorm" _ "github.com/jinzhu/gorm/dialects/sqlite" @@ -35,25 +37,33 @@ import ( ) const ( - minioServiceHost = "MINIO_SERVICE_SERVICE_HOST" - minioServicePort = "MINIO_SERVICE_SERVICE_PORT" - minioServiceRegion = "MINIO_SERVICE_REGION" - minioServiceSecure = "MINIO_SERVICE_SECURE" - pipelineBucketName = "MINIO_PIPELINE_BUCKET_NAME" - pipelinePath = "MINIO_PIPELINE_PATH" - mysqlServiceHost = "DBConfig.Host" - mysqlServicePort = "DBConfig.Port" - mysqlUser = "DBConfig.User" - mysqlPassword = "DBConfig.Password" - mysqlDBName = "DBConfig.DBName" - mysqlGroupConcatMaxLen = "DBConfig.GroupConcatMaxLen" - mysqlExtraParams = "DBConfig.ExtraParams" - archiveLogFileName = "ARCHIVE_CONFIG_LOG_FILE_NAME" - archiveLogPathPrefix = "ARCHIVE_CONFIG_LOG_PATH_PREFIX" - dbConMaxLifeTime = "DBConfig.ConMaxLifeTime" - - visualizationServiceHost = "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST" - visualizationServicePort = "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT" + minioServiceHost = "MINIO_SERVICE_SERVICE_HOST" + minioServicePort = "MINIO_SERVICE_SERVICE_PORT" + minioServiceRegion = "MINIO_SERVICE_REGION" + minioServiceSecure = "MINIO_SERVICE_SECURE" + pipelineBucketName = "MINIO_PIPELINE_BUCKET_NAME" + pipelinePath = "MINIO_PIPELINE_PATH" + + mysqlServiceHost = "DBConfig.MySQLConfig.Host" + mysqlServicePort = "DBConfig.MySQLConfig.Port" + mysqlUser = "DBConfig.MySQLConfig.User" + mysqlPassword = "DBConfig.MySQLConfig.Password" + mysqlDBName = "DBConfig.MySQLConfig.DBName" + mysqlGroupConcatMaxLen = "DBConfig.MySQLConfig.GroupConcatMaxLen" + mysqlExtraParams = "DBConfig.MySQLConfig.ExtraParams" + + postgresHost = "DBConfig.PostgreSQLConfig.Host" + postgresPort = "DBConfig.PostgreSQLConfig.Port" + postgresUser = "DBConfig.PostgreSQLConfig.User" + postgresPassword = "DBConfig.PostgreSQLConfig.Password" + postgresDBName = "DBConfig.PostgreSQLConfig.DBName" + + archiveLogFileName = "ARCHIVE_CONFIG_LOG_FILE_NAME" + archiveLogPathPrefix = "ARCHIVE_CONFIG_LOG_PATH_PREFIX" + dbConMaxLifeTime = "DBConfig.ConMaxLifeTime" + + VisualizationServiceHost = "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST" + VisualizationServicePort = "ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT" initConnectionTimeout = "InitConnectionTimeout" @@ -158,7 +168,7 @@ func (c *ClientManager) Authenticators() []auth.Authenticator { func (c *ClientManager) init() { glog.Info("Initializing client manager") - db := initDBClient(common.GetDurationConfig(initConnectionTimeout)) + db := InitDBClient(common.GetDurationConfig(initConnectionTimeout)) db.SetConnMaxLifetime(common.GetDurationConfig(dbConMaxLifeTime)) // time @@ -208,16 +218,12 @@ func (c *ClientManager) Close() { c.db.Close() } -func initDBClient(initConnectionTimeout time.Duration) *storage.DB { - driverName := common.GetStringConfig("DBConfig.DriverName") - var arg string - - switch driverName { - case "mysql": - arg = initMysql(driverName, initConnectionTimeout) - default: - glog.Fatalf("Driver %v is not supported", driverName) - } +func InitDBClient(initConnectionTimeout time.Duration) *storage.DB { + // Allowed driverName values: + // 1) To use MySQL, use `mysql` + // 2) To use PostgreSQL, use `pgx` + driverName := common.GetStringConfig("DBDriverName") + arg := initDBDriver(driverName, initConnectionTimeout) // db is safe for concurrent use by multiple goroutines // and maintains its own pool of idle connections. @@ -250,8 +256,18 @@ func initDBClient(initConnectionTimeout time.Duration) *storage.DB { &model.ResourceReference{}, ) - if response.Error != nil { - glog.Fatalf("Failed to initialize the databases.") + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to initialize the databases. Error: %s", response.Error) + } + + var textFormat string + switch driverName { + case "mysql": + textFormat = client.MYSQL_TEXT_FORMAT + case "pgx": + textFormat = client.PGX_TEXT_FORMAT + default: + glog.Fatalf("Unsupported database driver %s, please use `mysql` for MySQL, or `pgx` for PostgreSQL.", driverName) } response = db.Model(&model.Experiment{}).RemoveIndex("Name") @@ -264,50 +280,71 @@ func initDBClient(initConnectionTimeout time.Duration) *storage.DB { glog.Fatalf("Failed to drop unique key on pipeline name. Error: %s", response.Error) } - response = db.Model(&model.ResourceReference{}).ModifyColumn("Payload", "longtext not null") + response = db.Model(&model.ResourceReference{}).ModifyColumn("Payload", textFormat) if response.Error != nil { glog.Fatalf("Failed to update the resource reference payload type. Error: %s", response.Error) } response = db.Model(&model.Run{}).AddIndex("experimentuuid_createatinsec", "ExperimentUUID", "CreatedAtInSec") - if response.Error != nil { + if ignoreAlreadyExistError(driverName, response.Error) != nil { glog.Fatalf("Failed to create index experimentuuid_createatinsec on run_details. Error: %s", response.Error) } response = db.Model(&model.Run{}).AddIndex("experimentuuid_conditions_finishedatinsec", "ExperimentUUID", "Conditions", "FinishedAtInSec") - if response.Error != nil { + if ignoreAlreadyExistError(driverName, response.Error) != nil { glog.Fatalf("Failed to create index experimentuuid_conditions_finishedatinsec on run_details. Error: %s", response.Error) } response = db.Model(&model.Run{}).AddIndex("namespace_createatinsec", "Namespace", "CreatedAtInSec") - if response.Error != nil { + if ignoreAlreadyExistError(driverName, response.Error) != nil { glog.Fatalf("Failed to create index namespace_createatinsec on run_details. Error: %s", response.Error) } response = db.Model(&model.Run{}).AddIndex("namespace_conditions_finishedatinsec", "Namespace", "Conditions", "FinishedAtInSec") - if response.Error != nil { + if ignoreAlreadyExistError(driverName, response.Error) != nil { glog.Fatalf("Failed to create index namespace_conditions_finishedatinsec on run_details. Error: %s", response.Error) } response = db.Model(&model.Pipeline{}).AddUniqueIndex("name_namespace_index", "Name", "Namespace") - if response.Error != nil { + if ignoreAlreadyExistError(driverName, response.Error) != nil { glog.Fatalf("Failed to create index name_namespace_index on run_details. Error: %s", response.Error) } - response = db.Model(&model.RunMetric{}). - AddForeignKey("RunUUID", "run_details(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* update */) - if response.Error != nil { - glog.Fatalf("Failed to create a foreign key for RunID in run_metrics table. Error: %s", response.Error) - } - response = db.Model(&model.PipelineVersion{}). - AddForeignKey("PipelineId", "pipelines(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* update */) - if response.Error != nil { - glog.Fatalf("Failed to create a foreign key for PipelineId in pipeline_versions table. Error: %s", response.Error) - } - response = db.Model(&model.Task{}). - AddForeignKey("RunUUID", "run_details(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* update */) - if response.Error != nil { - glog.Fatalf("Failed to create a foreign key for RunUUID in task table. Error: %s", response.Error) + switch driverName { + case "pgx": + response = db.Model(&model.RunMetric{}). + AddForeignKey("\"RunUUID\"", "run_details(\"UUID\")", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for RunUUID in run_metrics table. Error: %s", response.Error) + } + response = db.Model(&model.PipelineVersion{}). + AddForeignKey("\"PipelineId\"", "pipelines(\"UUID\")", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for PipelineId in pipeline_versions table. Error: %s", response.Error) + } + response = db.Model(&model.Task{}). + AddForeignKey("\"RunUUID\"", "run_details(\"UUID\")", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for RunUUID in task table. Error: %s", response.Error) + } + case "mysql": + response = db.Model(&model.RunMetric{}). + AddForeignKey("RunUUID", "run_details(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for RunUUID in run_metrics table. Error: %s", response.Error) + } + response = db.Model(&model.PipelineVersion{}). + AddForeignKey("PipelineId", "pipelines(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for PipelineId in pipeline_versions table. Error: %s", response.Error) + } + response = db.Model(&model.Task{}). + AddForeignKey("RunUUID", "run_details(UUID)", "CASCADE" /* onDelete */, "CASCADE" /* onUpdate */) + if ignoreAlreadyExistError(driverName, response.Error) != nil { + glog.Fatalf("Failed to create a foreign key for RunUUID in task table. Error: %s", response.Error) + } + default: + glog.Fatalf("Driver %v is not supported, use \"mysql\" for MySQL, or \"pgx\" for PostgreSQL", driverName) } // Data backfill for pipeline_versions if this is the first time for @@ -320,44 +357,66 @@ func initDBClient(initConnectionTimeout time.Duration) *storage.DB { glog.Fatalf("Failed to backfill experiment UUID in run_details table: %s", err) } - response = db.Model(&model.Pipeline{}).ModifyColumn("Description", "longtext not null") + response = db.Model(&model.Pipeline{}).ModifyColumn("Description", textFormat) if response.Error != nil { glog.Fatalf("Failed to update pipeline description type. Error: %s", response.Error) } - // If the old unique index idx_pipeline_version_uuid_name on pipeline_versions exists, remove it. - rows, err := db.Raw(`show index from pipeline_versions where Key_name='idx_pipeline_version_uuid_name'`).Rows() - if err != nil { - glog.Fatalf("Failed to query pipeline_version table's indices. Error: %s", err) - } - if err := rows.Err(); err != nil { - glog.Fatalf("Failed to query pipeline_version table's indices. Error: %s", err) - } - if rows.Next() { - db.Exec(`drop index idx_pipeline_version_uuid_name on pipeline_versions`) + // Because PostgreSQL was supported later, there's no need to delete the relic index + if driverName == "mysql" { + // If the old unique index idx_pipeline_version_uuid_name on pipeline_versions exists, remove it. + rows, err := db.Raw(`show index from pipeline_versions where Key_name='idx_pipeline_version_uuid_name'`).Rows() + if err != nil { + glog.Fatalf("Failed to query pipeline_version table's indices. Error: %s", err) + } + if err := rows.Err(); err != nil { + glog.Fatalf("Failed to query pipeline_version table's indices. Error: %s", err) + } + if rows.Next() { + db.Exec(`drop index idx_pipeline_version_uuid_name on pipeline_versions`) + } + defer rows.Close() } - defer rows.Close() return storage.NewDB(db.DB(), storage.NewMySQLDialect()) } -// Initialize the connection string for connecting to Mysql database -// Format would be something like root@tcp(ip:port)/dbname?charset=utf8&loc=Local&parseTime=True. -func initMysql(driverName string, initConnectionTimeout time.Duration) string { - mysqlConfig := client.CreateMySQLConfig( - common.GetStringConfigWithDefault(mysqlUser, "root"), - common.GetStringConfigWithDefault(mysqlPassword, ""), - common.GetStringConfigWithDefault(mysqlServiceHost, "mysql"), - common.GetStringConfigWithDefault(mysqlServicePort, "3306"), - "", - common.GetStringConfigWithDefault(mysqlGroupConcatMaxLen, "1024"), - common.GetMapConfig(mysqlExtraParams), - ) +// Initializes Database driver. Use `driverName` to indicate which type of DB to use: +// 1) "mysql" for MySQL +// 2) "pgx" for PostgreSQL +func initDBDriver(driverName string, initConnectionTimeout time.Duration) string { + var sqlConfig, dbName string + var mysqlConfig *mysql.Config + switch driverName { + case "mysql": + mysqlConfig = client.CreateMySQLConfig( + common.GetStringConfigWithDefault(mysqlUser, "root"), + common.GetStringConfigWithDefault(mysqlPassword, ""), + common.GetStringConfigWithDefault(mysqlServiceHost, "mysql"), + common.GetStringConfigWithDefault(mysqlServicePort, "3306"), + "", + common.GetStringConfigWithDefault(mysqlGroupConcatMaxLen, "1024"), + common.GetMapConfig(mysqlExtraParams), + ) + sqlConfig = mysqlConfig.FormatDSN() + dbName = common.GetStringConfig(mysqlDBName) + case "pgx": + sqlConfig = client.CreatePostgreSQLConfig( + common.GetStringConfigWithDefault(postgresUser, "user"), + common.GetStringConfigWithDefault(postgresPassword, "password"), + common.GetStringConfigWithDefault(postgresHost, "postgresql"), + "postgres", + uint16(common.GetIntConfigWithDefault(postgresPort, 5432)), + ) + dbName = common.GetStringConfig(postgresDBName) + default: + glog.Fatalf("Driver %v is not supported, use \"mysql\" for MySQL, or \"pgx\" for PostgreSQL", driverName) + } var db *sql.DB var err error operation := func() error { - db, err = sql.Open(driverName, mysqlConfig.FormatDSN()) + db, err = sql.Open(driverName, sqlConfig) if err != nil { return err } @@ -365,7 +424,6 @@ func initMysql(driverName string, initConnectionTimeout time.Duration) string { } b := backoff.NewExponentialBackOff() b.MaxElapsedTime = initConnectionTimeout - // err = backoff.Retry(operation, b) err = backoff.RetryNotify(operation, b, func(e error, duration time.Duration) { glog.Errorf("%v", e) }) @@ -374,10 +432,9 @@ func initMysql(driverName string, initConnectionTimeout time.Duration) string { util.TerminateIfError(err) // Create database if not exist - dbName := common.GetStringConfig(mysqlDBName) operation = func() error { - _, err = db.Exec(fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", dbName)) - if err != nil { + _, err = db.Exec(fmt.Sprintf("CREATE DATABASE %s", dbName)) + if ignoreAlreadyExistError(driverName, err) != nil { return err } return nil @@ -387,13 +444,30 @@ func initMysql(driverName string, initConnectionTimeout time.Duration) string { err = backoff.Retry(operation, b) util.TerminateIfError(err) - mysqlConfig.DBName = dbName - // When updating, return rows matched instead of rows affected. This counts rows that are being - // set as the same values as before. If updating using a primary key and rows matched is 0, then - // it means this row is not found. - // Config reference: https://github.com/go-sql-driver/mysql#clientfoundrows - mysqlConfig.ClientFoundRows = true - return mysqlConfig.FormatDSN() + + switch driverName { + case "mysql": + mysqlConfig.DBName = dbName + // When updating, return rows matched instead of rows affected. This counts rows that are being + // set as the same values as before. If updating using a primary key and rows matched is 0, then + // it means this row is not found. + // Config reference: https://github.com/go-sql-driver/mysql#clientfoundrows + mysqlConfig.ClientFoundRows = true + sqlConfig = mysqlConfig.FormatDSN() + case "pgx": + // Note: postgreSQL does not have the option `ClientFoundRows` + // Config reference: https://www.postgresql.org/docs/current/libpq-connect.html + sqlConfig = client.CreatePostgreSQLConfig( + common.GetStringConfigWithDefault(postgresUser, "root"), + common.GetStringConfigWithDefault(postgresPassword, ""), + common.GetStringConfigWithDefault(postgresHost, "postgresql"), + dbName, + uint16(common.GetIntConfigWithDefault(postgresPort, 5432)), + ) + default: + glog.Fatalf("Driver %v is not supported, use \"mysql\" for MySQL, or \"pgx\" for PostgreSQL", driverName) + } + return sqlConfig } func initMinioClient(initConnectionTimeout time.Duration) storage.ObjectStoreInterface { @@ -448,8 +522,8 @@ func initLogArchive() (logArchive archive.LogArchiveInterface) { return } -// newClientManager creates and Init a new instance of ClientManager. -func newClientManager() ClientManager { +// NewClientManager creates and Init a new instance of ClientManager. +func NewClientManager() ClientManager { clientManager := ClientManager{} clientManager.init() @@ -489,7 +563,7 @@ func initPipelineVersionsFromPipelines(db *gorm.DB) { func backfillExperimentIDToRunTable(db *gorm.DB) error { // check if there is any row in the run table has experiment ID being empty - rows, err := db.CommonDB().Query(`SELECT ExperimentUUID FROM run_details WHERE ExperimentUUID = '' LIMIT 1`) + rows, err := db.CommonDB().Query("SELECT \"ExperimentUUID\" FROM run_details WHERE \"ExperimentUUID\" = '' LIMIT 1") if err != nil { return err } @@ -516,3 +590,15 @@ func backfillExperimentIDToRunTable(db *gorm.DB) error { `) return err } + +// Returns the same error, if it's not "already exists" related. +// Otherwise, return nil. +func ignoreAlreadyExistError(driverName string, err error) error { + if driverName == "pgx" && err != nil && strings.Contains(err.Error(), client.PGX_EXIST_ERROR) { + return nil + } + if driverName == "mysql" && err != nil && strings.Contains(err.Error(), client.MYSQL_EXIST_ERROR) { + return nil + } + return err +} diff --git a/backend/src/apiserver/config/config.json b/backend/src/apiserver/config/config.json index e27bab65f6..251d22a387 100644 --- a/backend/src/apiserver/config/config.json +++ b/backend/src/apiserver/config/config.json @@ -1,10 +1,13 @@ { "DBConfig": { - "DriverName": "mysql", - "DataSourceName": "", - "DBName": "mlpipeline", - "GroupConcatMaxLen": "4194304", - "ConMaxLifeTime": "120s" + "MySQLConfig": { + "DataSourceName": "", + "DBName": "mlpipeline", + "GroupConcatMaxLen": "4194304" + }, + "PostgreSQLConfig": { + "DBName": "mlpipeline" + } }, "ObjectStoreConfig": { "AccessKey": "minio", @@ -12,6 +15,8 @@ "BucketName": "mlpipeline", "PipelinePath": "pipelines" }, + "DBDriverName": "mysql", + "ConMaxLifeTime": "120s", "ARCHIVE_CONFIG_LOG_FILE_NAME": "main.log", "ARCHIVE_CONFIG_LOG_PATH_PREFIX": "/artifacts", "InitConnectionTimeout": "6m", diff --git a/backend/src/apiserver/main.go b/backend/src/apiserver/main.go index 4efab33d56..276a39ee1b 100644 --- a/backend/src/apiserver/main.go +++ b/backend/src/apiserver/main.go @@ -35,6 +35,7 @@ import ( "github.com/grpc-ecosystem/grpc-gateway/runtime" apiv1beta1 "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" apiv2beta1 "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client" + cm "github.com/kubeflow/pipelines/backend/src/apiserver/client_manager" "github.com/kubeflow/pipelines/backend/src/apiserver/common" "github.com/kubeflow/pipelines/backend/src/apiserver/model" "github.com/kubeflow/pipelines/backend/src/apiserver/resource" @@ -59,7 +60,7 @@ func main() { flag.Parse() initConfig() - clientManager := newClientManager() + clientManager := cm.NewClientManager() resourceManager := resource.NewResourceManager( &clientManager, ) @@ -119,8 +120,8 @@ func startRpcServer(resourceManager *resource.ResourceManager) { s, server.NewVisualizationServer( resourceManager, - common.GetStringConfig(visualizationServiceHost), - common.GetStringConfig(visualizationServicePort), + common.GetStringConfig(cm.VisualizationServiceHost), + common.GetStringConfig(cm.VisualizationServicePort), )) apiv1beta1.RegisterAuthServiceServer(s, server.NewAuthServer(resourceManager)) diff --git a/backend/test/integration/README.md b/backend/test/integration/README.md index ee6e510971..d87a330310 100644 --- a/backend/test/integration/README.md +++ b/backend/test/integration/README.md @@ -5,7 +5,22 @@ ### How to run +The default integration test will test the default Database, MySQL. + 1. Configure kubectl to connect to your kfp cluster. 2. Run the following for all integration tests: `NAMESPACE= ./run_tests_locally.sh`. 3. Or run the following to select certain tests: `NAMESPACE= ./run_tests_locally.sh -testify.m Job`. Reference: https://stackoverflow.com/a/43312451 + +### Run database tests with PostgreSQL + +To run this test, you need to first deploy the PostgreSQL images on your Kubernetes cluster. For how to deploy, +see [instructions here](../../../manifests/kustomize/third-party/postgresql/README.md). + +When testing against postgreSQL, all integration tests with MySQL will be disabled. Use an argument `postgres` to run +test against a PostgreSQL database: +``` +NAMESPACE= ./run_tests_locally.sh postgres +``` + + diff --git a/backend/test/integration/db_test.go b/backend/test/integration/db_test.go new file mode 100644 index 0000000000..be65889eff --- /dev/null +++ b/backend/test/integration/db_test.go @@ -0,0 +1,79 @@ +// Copyright 2023 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package integration + +import ( + "testing" + "time" + + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + _ "github.com/jackc/pgx/v5/stdlib" + cm "github.com/kubeflow/pipelines/backend/src/apiserver/client_manager" +) + +type DBTestSuite struct { + suite.Suite +} + +// Skip if it's not integration test running. +func (s *DBTestSuite) SetupTest() { + if !*runIntegrationTests { + s.T().SkipNow() + return + } +} + +// Test MySQL initializes correctly +func (s *DBTestSuite) TestInitDBClient_MySQL() { + if *runPostgreSQLTests { + s.T().SkipNow() + return + } + t := s.T() + viper.Set("DBDriverName", "mysql") + viper.Set("DBConfig.MySQLConfig.DBName", "mlpipeline") + // The default port-forwarding IP address that test uses is different compared to production + if *localTest { + viper.Set("DBConfig.MySQLConfig.Host", "localhost") + } + duration, _ := time.ParseDuration("1m") + db := cm.InitDBClient(duration) + assert.NotNil(t, db) +} + +// Test PostgreSQL initializes correctly +func (s *DBTestSuite) TestInitDBClient_PostgreSQL() { + if !*runPostgreSQLTests { + s.T().SkipNow() + return + } + t := s.T() + viper.Set("DBDriverName", "pgx") + viper.Set("DBConfig.PostgreSQLConfig.DBName", "mlpipeline") + // The default port-forwarding IP address that test uses is different compared to production + viper.Set("DBConfig.PostgreSQLConfig.Host", "127.0.0.3") + viper.Set("DBConfig.PostgreSQLConfig.User", "user") + viper.Set("DBConfig.PostgreSQLConfig.Password", "password") + duration, _ := time.ParseDuration("1m") + db := cm.InitDBClient(duration) + assert.NotNil(t, db) +} + +func TestDB(t *testing.T) { + suite.Run(t, new(DBTestSuite)) +} diff --git a/backend/test/integration/flags.go b/backend/test/integration/flags.go index 7585d2d75c..95af0e0d55 100644 --- a/backend/test/integration/flags.go +++ b/backend/test/integration/flags.go @@ -24,6 +24,8 @@ var ( initializeTimeout = flag.Duration("initializeTimeout", 2*time.Minute, "Duration to wait for test initialization") runIntegrationTests = flag.Bool("runIntegrationTests", false, "Whether to also run integration tests that call the service") runUpgradeTests = flag.Bool("runUpgradeTests", false, "Whether to run upgrade tests") + runPostgreSQLTests = flag.Bool("runPostgreSQLTests", false, "Run integration test with PostgreSQL") + localTest = flag.Bool("localTest", false, "Run integration test locally") ) /** diff --git a/backend/test/integration/run_tests_locally.sh b/backend/test/integration/run_tests_locally.sh index 8dfe095e00..371ad83f4e 100755 --- a/backend/test/integration/run_tests_locally.sh +++ b/backend/test/integration/run_tests_locally.sh @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -set -e +set -ex if [ -z "${NAMESPACE}" ]; then echo "NAMESPACE env var is not provided, please set it to your KFP namespace" @@ -32,7 +32,26 @@ case "$response" in ;; esac +function cleanup() { + echo "killing kubectl port forward before exit" + kill "$PORT_FORWARD_PID" +} +trap cleanup EXIT + echo "Starting integration tests..." -command="go test -v ./... -namespace ${NAMESPACE} -args -runIntegrationTests=true -isDevMode=true" -echo $command "$@" + +if [ "$1" == "postgres" ]; then + echo "Starting PostgreSQL DB port forwarding..." + kubectl -n "$NAMESPACE" port-forward svc/postgres-service 5432:5432 --address="127.0.0.3" & PORT_FORWARD_PID=$! + # wait for kubectl port forward + sleep 10 + command="go test -v ./... -namespace ${NAMESPACE} -args -runIntegrationTests=true -isDevMode=true -runPostgreSQLTests=true -localTest=true" +else + echo "Starting MySQL DB port forwarding..." + kubectl -n "$NAMESPACE" port-forward svc/mysql 3306:3306 --address=localhost & PORT_FORWARD_PID=$! + # wait for kubectl port forward + sleep 10 + command="go test -v ./... -namespace ${NAMESPACE} -args -runIntegrationTests=true -isDevMode=true -localTest=true" +fi + $command "$@" diff --git a/backend/third_party_licenses/apiserver.csv b/backend/third_party_licenses/apiserver.csv index 2e2bb84ee4..ef4893cbb1 100644 --- a/backend/third_party_licenses/apiserver.csv +++ b/backend/third_party_licenses/apiserver.csv @@ -104,12 +104,12 @@ github.com/valyala/fasttemplate,https://github.com/valyala/fasttemplate/blob/v1. go.mongodb.org/mongo-driver,https://github.com/mongodb/mongo-go-driver/blob/v1.8.2/LICENSE,Apache-2.0 go.opencensus.io,https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/LICENSE,Apache-2.0 gocloud.dev,https://github.com/google/go-cloud/blob/v0.22.0/LICENSE,Apache-2.0 -golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/86341886:LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/v0.9.0:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause golang.org/x/xerrors,https://cs.opensource.google/go/x/xerrors/+/5ec99f83:LICENSE,BSD-3-Clause google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/LICENSE,BSD-3-Clause diff --git a/backend/third_party_licenses/cache_server.csv b/backend/third_party_licenses/cache_server.csv index a920b3116c..20d1fe62be 100644 --- a/backend/third_party_licenses/cache_server.csv +++ b/backend/third_party_licenses/cache_server.csv @@ -71,12 +71,12 @@ github.com/spf13/pflag,https://github.com/spf13/pflag/blob/v1.0.5/LICENSE,BSD-3- github.com/valyala/bytebufferpool,https://github.com/valyala/bytebufferpool/blob/v1.0.0/LICENSE,MIT github.com/valyala/fasttemplate,https://github.com/valyala/fasttemplate/blob/v1.2.1/LICENSE,MIT go.mongodb.org/mongo-driver,https://github.com/mongodb/mongo-go-driver/blob/v1.8.2/LICENSE,Apache-2.0 -golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/86341886:LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/v0.9.0:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/persistence_agent.csv b/backend/third_party_licenses/persistence_agent.csv index edaf797e87..102c483cbd 100644 --- a/backend/third_party_licenses/persistence_agent.csv +++ b/backend/third_party_licenses/persistence_agent.csv @@ -75,12 +75,12 @@ github.com/subosito/gotenv,https://github.com/subosito/gotenv/blob/v1.2.0/LICENS github.com/valyala/bytebufferpool,https://github.com/valyala/bytebufferpool/blob/v1.0.0/LICENSE,MIT github.com/valyala/fasttemplate,https://github.com/valyala/fasttemplate/blob/v1.2.1/LICENSE,MIT go.mongodb.org/mongo-driver,https://github.com/mongodb/mongo-go-driver/blob/v1.8.2/LICENSE,Apache-2.0 -golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/86341886:LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/v0.9.0:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/swf.csv b/backend/third_party_licenses/swf.csv index da73199a71..54d644960f 100644 --- a/backend/third_party_licenses/swf.csv +++ b/backend/third_party_licenses/swf.csv @@ -77,12 +77,12 @@ github.com/subosito/gotenv,https://github.com/subosito/gotenv/blob/v1.2.0/LICENS github.com/valyala/bytebufferpool,https://github.com/valyala/bytebufferpool/blob/v1.0.0/LICENSE,MIT github.com/valyala/fasttemplate,https://github.com/valyala/fasttemplate/blob/v1.2.1/LICENSE,MIT go.mongodb.org/mongo-driver,https://github.com/mongodb/mongo-go-driver/blob/v1.8.2/LICENSE,Apache-2.0 -golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/86341886:LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/crypto,https://cs.opensource.google/go/x/crypto/+/v0.9.0:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/viewer.csv b/backend/third_party_licenses/viewer.csv index 678302087b..f6589f7589 100644 --- a/backend/third_party_licenses/viewer.csv +++ b/backend/third_party_licenses/viewer.csv @@ -35,11 +35,11 @@ github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/L github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 github.com/spf13/pflag,https://github.com/spf13/pflag/blob/v1.0.5/LICENSE,BSD-3-Clause -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause gomodules.xyz/jsonpatch/v2,https://github.com/gomodules/jsonpatch/blob/v2.2.0/v2/LICENSE,Apache-2.0 google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause diff --git a/go.mod b/go.mod index c0aed537fe..d88deba061 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/gorilla/mux v1.8.0 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/grpc-ecosystem/grpc-gateway v1.16.0 + github.com/jackc/pgx/v5 v5.4.2 github.com/jinzhu/gorm v1.9.1 github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.4 // indirect @@ -41,9 +42,9 @@ require ( github.com/robfig/cron v1.2.0 github.com/sirupsen/logrus v1.8.1 github.com/spf13/viper v1.10.1 - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.8.1 gocloud.dev v0.22.0 - golang.org/x/net v0.0.0-20220225172249-27dd8689420f + golang.org/x/net v0.10.0 google.golang.org/genproto v0.0.0-20220310185008-1973136f34c6 google.golang.org/grpc v1.44.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0 diff --git a/go.sum b/go.sum index 7c0e71a44d..70508caa62 100644 --- a/go.sum +++ b/go.sum @@ -834,6 +834,13 @@ github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/C github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/itchyny/gojq v0.12.6/go.mod h1:ZHrkfu7A+RbZLy5J1/JKpS4poEqrzItSTGDItqsfP0A= github.com/itchyny/timefmt-go v0.1.3/go.mod h1:0osSSCQSASBJMsIZnhAaF1C2fCBTJZXrnj37mG8/c+A= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk= +github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.4.2 h1:u1gmGDwbdRUZiwisBm/Ky2M14uQyUP65bG8+20nnyrg= +github.com/jackc/pgx/v5 v5.4.2/go.mod h1:q6iHT8uDNXWiFNOlRqJzBTaSH3+2xCXkokxHZC5qWFY= +github.com/jackc/puddle/v2 v2.2.0/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/jawher/mow.cli v1.0.4/go.mod h1:5hQj2V8g+qYmLUVWqu4Wuja1pI57M83EChYLVZ0sMKk= github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= @@ -1257,6 +1264,8 @@ github.com/streadway/amqp v1.0.0/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1Sd github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -1264,8 +1273,11 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stripe/stripe-go v70.15.0+incompatible/go.mod h1:A1dQZmO/QypXmsL0T8axYZkSN/uA/T/A64pfKdBAMiY= github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= @@ -1332,6 +1344,7 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9/go.mod h1:E1AXubJBdNmFERAOucpDIxNzeGfLzg0mYh+UfMWdChA= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= @@ -1445,8 +1458,9 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220112180741-5e0467b6c7ce/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220128200615-198e4374d7ed/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.0.0-20220214200702-86341886e292 h1:f+lwQ+GtmgoY+A2YaQxlSOnDjXcQ7ZRLWOHbC6HtRqE= golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g= +golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -1492,8 +1506,10 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1570,8 +1586,11 @@ golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220121210141-e204ce36a2ba/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/oauth2 v0.0.0-20180227000427-d7d64896b5ff/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -1605,8 +1624,10 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180224232135-f6cff0780e54/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1722,13 +1743,19 @@ golang.org/x/sys v0.0.0-20220111092808-5a964db01320/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8 h1:OH54vjqzRWmbJ62fjuhxy7AxFFgoHN0/DPc/UrL8cAs= golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1738,8 +1765,10 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1835,8 +1864,10 @@ golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210820212750-d4cc65f0b2ff/go.mod h1:YD9qOF0M9xpSpdWTBbzEl5e/RnCefISl8E5Noe10jFM= golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= -golang.org/x/tools v0.1.10 h1:QjFRCZxdOhBJ/UNgnBZLbNV13DlbnK0quyivTnXJM20= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/manifests/kustomize/third-party/postgresql/README.md b/manifests/kustomize/third-party/postgresql/README.md index 5096dddf30..c0c1cda433 100644 --- a/manifests/kustomize/third-party/postgresql/README.md +++ b/manifests/kustomize/third-party/postgresql/README.md @@ -3,7 +3,7 @@ ```bash # In this folder of manifests/kustomize/third-party/postgresql rm -rf build -mkdir buidl +mkdir build kustomize build ./base -o build ``` @@ -11,5 +11,5 @@ kustomize build ./base -o build ```bash # In this folder of manifests/kustomize/third-party/postgresql -kubectl apply -f build +kubectl -n apply -f build ``` \ No newline at end of file diff --git a/manifests/kustomize/third-party/postgresql/base/kustomization.yaml b/manifests/kustomize/third-party/postgresql/base/kustomization.yaml index cc22ca5f7e..e7951ef9f4 100644 --- a/manifests/kustomize/third-party/postgresql/base/kustomization.yaml +++ b/manifests/kustomize/third-party/postgresql/base/kustomization.yaml @@ -5,4 +5,4 @@ resources: - pg-pvc.yaml - pg-service.yaml - pg-secret.yaml -- pg-serviceaccount.yaml \ No newline at end of file +- pg-serviceaccount.yaml diff --git a/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml b/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml index 9979be0238..bd0bf4baaf 100644 --- a/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml +++ b/manifests/kustomize/third-party/postgresql/base/pg-deployment.yaml @@ -1,14 +1,15 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: postgres-deployment + name: postgres labels: app: postgres spec: - replicas: 2 selector: matchLabels: app: postgres + strategy: + type: Recreate template: metadata: labels: @@ -33,10 +34,15 @@ spec: ports: - containerPort: 5432 name: postgres + readinessProbe: + exec: + command: ["psql", "-U", "user", "-d", "postgres", "-c", "SELECT 1"] + initialDelaySeconds: 15 + timeoutSeconds: 2 volumeMounts: - name: postgres-stateful-data mountPath: /var/lib/postgresql/data volumes: - name: postgres-stateful-data persistentVolumeClaim: - claimName: postgres-pvc \ No newline at end of file + claimName: postgres-pvc diff --git a/manifests/kustomize/third-party/postgresql/base/pg-service.yaml b/manifests/kustomize/third-party/postgresql/base/pg-service.yaml index 3e365fbdd4..002eff982d 100644 --- a/manifests/kustomize/third-party/postgresql/base/pg-service.yaml +++ b/manifests/kustomize/third-party/postgresql/base/pg-service.yaml @@ -6,7 +6,8 @@ metadata: app: postgres spec: ports: - - port: 5432 - type: LoadBalancer + - protocol: TCP + port: 5432 + targetPort: 5432 selector: - app: postgres \ No newline at end of file + app: postgres diff --git a/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml b/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml index 87dacc7a3d..4397c93c10 100644 --- a/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml +++ b/manifests/kustomize/third-party/postgresql/base/pg-serviceaccount.yaml @@ -2,4 +2,3 @@ apiVersion: v1 kind: ServiceAccount metadata: name: postgresql - From 3e453289816ab2e2b0cb7d00365e9e29335c3553 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 10 Aug 2023 15:50:16 -0700 Subject: [PATCH 080/253] chore(components): INTERNAL PiperOrigin-RevId: 555686552 --- .../v1/automl/tabular/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py index 2522350d36..840cd055f5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py @@ -14,6 +14,8 @@ """GA AutoML tabular components.""" +import os + from google_cloud_pipeline_components.v1.automl.tabular.cv_trainer import automl_tabular_cv_trainer as CvTrainerOp from google_cloud_pipeline_components.v1.automl.tabular.ensemble import automl_tabular_ensemble as EnsembleOp from google_cloud_pipeline_components.v1.automl.tabular.finalizer import automl_tabular_finalizer as FinalizerOp @@ -23,6 +25,7 @@ from google_cloud_pipeline_components.v1.automl.tabular.stats_and_example_gen import tabular_stats_and_example_gen as StatsAndExampleGenOp from google_cloud_pipeline_components.v1.automl.tabular.training_configurator_and_validator import training_configurator_and_validator as TrainingConfiguratorAndValidatorOp from google_cloud_pipeline_components.v1.automl.tabular.transform import automl_tabular_transform as TransformOp +from kfp import components __all__ = [ 'CvTrainerOp', @@ -35,3 +38,9 @@ 'SplitMaterializedDataOp', 'TrainingConfiguratorAndValidatorOp', ] + +automl_tabular_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join(os.path.dirname(__file__), 'automl_tabular_pipeline.yaml') +) From 83d7e719d08c73c2c535722b66b77cdf0cb4cd08 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:44:33 -0700 Subject: [PATCH 081/253] fix(frontend): Introduce ALLOWED_ARTIFACT_DOMAIN_REGEX flag to prevent accessing undesired domains. Remove user input string from server response. (#9844) * fix(frontend): Introduce ALLOWED_ARTIFACT_DOMAIN_REGEX flag to prevent accessing undesired domains. Remove user input string from server response. 1. Remove user query parameter string from server response. 2. Introduce ALLOWED_ARTIFACT_DOMAIN_REGEX to allow configuration of allowed domains querying. 3. By default, ALLOWED_ARTIFACT_DOMAIN_REGEX is match all. Default configuration is meant to be demo purpose and not for production. 4. Users can provide ALLOWED_ARTIFACT_DOMAIN_REGEX environment variable to restrict object storage endpoint querying domains. * add test --- frontend/server/app.ts | 1 + frontend/server/configs.ts | 4 ++ frontend/server/handlers/artifacts.ts | 48 +++++++++++-------- frontend/server/handlers/domain-checker.ts | 32 +++++++++++++ .../integration-tests/artifact-get.test.ts | 18 ++----- 5 files changed, 68 insertions(+), 35 deletions(-) create mode 100644 frontend/server/handlers/domain-checker.ts diff --git a/frontend/server/app.ts b/frontend/server/app.ts index 9498d98283..f6ae1988bf 100644 --- a/frontend/server/app.ts +++ b/frontend/server/app.ts @@ -131,6 +131,7 @@ function createUIServer(options: UIConfigs) { '/artifacts/*', getArtifactsProxyHandler({ enabled: options.artifacts.proxy.enabled, + allowedDomain: options.artifacts.allowedDomain, namespacedServiceGetter: getArtifactServiceGetter(options.artifacts.proxy), }), ); diff --git a/frontend/server/configs.ts b/frontend/server/configs.ts index 09b2884acd..f6ef044d12 100644 --- a/frontend/server/configs.ts +++ b/frontend/server/configs.ts @@ -64,6 +64,8 @@ export function loadConfigs(argv: string[], env: ProcessEnv): UIConfigs { AWS_S3_ENDPOINT, /** http/https base URL */ HTTP_BASE_URL = '', + /** By default, allowing access to all domains. Modify this flag to allow querying matching domains */ + ALLOWED_ARTIFACT_DOMAIN_REGEX = '^.*$', /** http/https fetch with this authorization header key (for example: 'Authorization') */ HTTP_AUTHORIZATION_KEY = '', /** http/https fetch with this authorization header value by default when absent in client request at above key */ @@ -155,6 +157,7 @@ export function loadConfigs(argv: string[], env: ProcessEnv): UIConfigs { }, proxy: loadArtifactsProxyConfig(env), streamLogsFromServerApi: asBool(STREAM_LOGS_FROM_SERVER_API), + allowedDomain: ALLOWED_ARTIFACT_DOMAIN_REGEX, }, metadata: { envoyService: { @@ -274,6 +277,7 @@ export interface UIConfigs { http: HttpConfigs; proxy: ArtifactsProxyConfig; streamLogsFromServerApi: boolean; + allowedDomain: string; }; pod: { logContainerName: string; diff --git a/frontend/server/handlers/artifacts.ts b/frontend/server/handlers/artifacts.ts index 7af04b4aac..cbc8a5da84 100644 --- a/frontend/server/handlers/artifacts.ts +++ b/frontend/server/handlers/artifacts.ts @@ -23,6 +23,7 @@ import proxy from 'http-proxy-middleware'; import { HACK_FIX_HPM_PARTIAL_RESPONSE_HEADERS } from '../consts'; import * as fs from 'fs'; +import { isAllowedDomain } from './domain-checker'; /** * ArtifactsQueryStrings describes the expected query strings key value pairs @@ -56,11 +57,12 @@ export function getArtifactsHandler({ aws: AWSConfigs; http: HttpConfigs; minio: MinioConfigs; + allowedDomain: string; }; tryExtract: boolean; useParameter: boolean; }): Handler { - const { aws, http, minio } = artifactsConfigs; + const { aws, http, minio, allowedDomain } = artifactsConfigs; return async (req, res) => { const source = useParameter ? req.params.source : req.query.source; const bucket = useParameter ? req.params.bucket : req.query.bucket; @@ -109,6 +111,7 @@ export function getArtifactsHandler({ case 'http': case 'https': getHttpArtifactsHandler( + allowedDomain, getHttpUrl(source, http.baseUrl || '', bucket, key), http.auth, peek, @@ -126,7 +129,7 @@ export function getArtifactsHandler({ break; default: - res.status(500).send('Unknown storage source: ' + source); + res.status(500).send('Unknown storage source'); return; } }; @@ -146,6 +149,7 @@ function getHttpUrl(source: 'http' | 'https', baseUrl: string, bucket: string, k } function getHttpArtifactsHandler( + allowedDomain: string, url: string, auth: { key: string; @@ -162,9 +166,13 @@ function getHttpArtifactsHandler( headers[auth.key] = req.headers[auth.key] || req.headers[auth.key.toLowerCase()] || auth.defaultValue; } + if (!isAllowedDomain(url, allowedDomain)) { + res.status(500).send(`Domain not allowed.`); + return; + } const response = await fetch(url, { headers }); response.body - .on('error', err => res.status(500).send(`Unable to retrieve artifact at ${url}: ${err}`)) + .on('error', err => res.status(500).send(`Unable to retrieve artifact: ${err}`)) .pipe(new PreviewStream({ peek })) .pipe(res); }; @@ -178,20 +186,12 @@ function getMinioArtifactHandler( try { const stream = await getObjectStream(options); stream - .on('error', err => - res - .status(500) - .send( - `Failed to get object in bucket ${options.bucket} at path ${options.key}: ${err}`, - ), - ) + .on('error', err => res.status(500).send(`Failed to get object in bucket: ${err}`)) .pipe(new PreviewStream({ peek })) .pipe(res); } catch (err) { console.error(err); - res - .status(500) - .send(`Failed to get object in bucket ${options.bucket} at path ${options.key}: ${err}`); + res.status(500).send(`Failed to get object in bucket: ${err}`); } }; } @@ -288,7 +288,8 @@ function getVolumeArtifactsHandler(options: { bucket: string; key: string }, pee filePathInVolume: key, }); if (parseError) { - res.status(404).send(`Failed to open volume://${bucket}/${key}, ${parseError}`); + console.log(`Failed to open volume: ${parseError}`); + res.status(404).send(`Failed to open volume.`); return; } @@ -297,9 +298,7 @@ function getVolumeArtifactsHandler(options: { bucket: string; key: string }, pee if (stat.isDirectory()) { res .status(400) - .send( - `Failed to open volume://${bucket}/${key}, file ${filePath} is directory, does not support now`, - ); + .send(`Failed to open volume file ${filePath} is directory, does not support now`); return; } @@ -307,7 +306,8 @@ function getVolumeArtifactsHandler(options: { bucket: string; key: string }, pee .pipe(new PreviewStream({ peek })) .pipe(res); } catch (err) { - res.status(500).send(`Failed to open volume://${bucket}/${key}: ${err}`); + console.log(`Failed to open volume: ${err}`); + res.status(500).send(`Failed to open volume.`); } }; } @@ -341,9 +341,11 @@ const QUERIES = { export function getArtifactsProxyHandler({ enabled, + allowedDomain, namespacedServiceGetter, }: { enabled: boolean; + allowedDomain: string; namespacedServiceGetter: NamespacedServiceGetter; }): Handler { if (!enabled) { @@ -367,9 +369,15 @@ export function getArtifactsProxyHandler({ router: req => { const namespace = getNamespaceFromUrl(req.url || ''); if (!namespace) { - throw new Error(`namespace query param expected in ${req.url}.`); + console.log(`namespace query param expected in ${req.url}.`); + throw new Error(`namespace query param expected.`); + } + const urlStr = namespacedServiceGetter(namespace!); + if (!isAllowedDomain(urlStr, allowedDomain)) { + console.log(`Domain is not allowed.`); + throw new Error(`Domain is not allowed.`); } - return namespacedServiceGetter(namespace); + return namespacedServiceGetter(namespace!); }, target: '/artifacts', headers: HACK_FIX_HPM_PARTIAL_RESPONSE_HEADERS, diff --git a/frontend/server/handlers/domain-checker.ts b/frontend/server/handlers/domain-checker.ts new file mode 100644 index 0000000000..88ad491759 --- /dev/null +++ b/frontend/server/handlers/domain-checker.ts @@ -0,0 +1,32 @@ +// Copyright 2023 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +export function isAllowedDomain(urlStr: string, allowedDomain: string): boolean { + const allowedRegExp = new RegExp(allowedDomain); + const domain = domain_from_url(urlStr); + const allowed = allowedRegExp.test(domain); + if (!allowed) { + console.log(`Domain not allowed: ${urlStr}`); + } + return allowed; +} + +function domain_from_url(url: string): string { + let result: string = ''; + let match = url.match(/^(?:https?:\/\/)?(?:[^@\/\n]+@)?([^:\/?\n]+)/); + if (match) { + result = match[0]; + } + return result; +} diff --git a/frontend/server/integration-tests/artifact-get.test.ts b/frontend/server/integration-tests/artifact-get.test.ts index 868678233d..52cce57d7c 100644 --- a/frontend/server/integration-tests/artifact-get.test.ts +++ b/frontend/server/integration-tests/artifact-get.test.ts @@ -438,11 +438,7 @@ describe('/artifacts', () => { const request = requests(app.start()); request .get(`/artifacts/get?source=volume&bucket=notexist&key=content`) - .expect( - 404, - 'Failed to open volume://notexist/content, Cannot find file "volume://notexist/content" in pod "ml-pipeline-ui": volume "notexist" not configured', - done, - ); + .expect(404, 'Failed to open volume.', done); }); it('responds error with a not exist volume mount path if source=volume', done => { @@ -485,11 +481,7 @@ describe('/artifacts', () => { const request = requests(app.start()); request .get(`/artifacts/get?source=volume&bucket=artifact&key=notexist/config`) - .expect( - 404, - 'Failed to open volume://artifact/notexist/config, Cannot find file "volume://artifact/notexist/config" in pod "ml-pipeline-ui": volume "artifact" not mounted or volume "artifact" with subPath (which is prefix of notexist/config) not mounted', - done, - ); + .expect(404, 'Failed to open volume.', done); }); it('responds error with a not exist volume mount artifact if source=volume', done => { @@ -529,11 +521,7 @@ describe('/artifacts', () => { const request = requests(app.start()); request .get(`/artifacts/get?source=volume&bucket=artifact&key=subartifact/notxist.csv`) - .expect( - 500, - "Failed to open volume://artifact/subartifact/notxist.csv: Error: ENOENT: no such file or directory, stat '/foo/bar/notxist.csv'", - done, - ); + .expect(500, 'Failed to open volume.', done); }); }); From 1002e0cf8cbb452c8839d4631ce90851e293581c Mon Sep 17 00:00:00 2001 From: Ryan Jaemun Jung Date: Sat, 12 Aug 2023 04:53:33 +0900 Subject: [PATCH 082/253] fix(samples): update samples to v2 pipelines (#9851) * Update samples to v2 pipelines * lint --- samples/core/XGBoost/xgboost_sample.py | 6 +- samples/core/XGBoost/xgboost_sample_test.py | 4 +- samples/core/caching/caching.ipynb | 27 ++++----- samples/core/caching/caching_sample.py | 28 ++++------ samples/core/caching/caching_test.py | 6 +- samples/core/exit_handler/exit_handler.py | 55 ++++++++++--------- .../core/exit_handler/exit_handler_test.py | 9 +-- samples/core/exit_handler/exit_handler_v2.py | 53 ------------------ samples/core/sequential/sequential.py | 27 +++++---- 9 files changed, 73 insertions(+), 142 deletions(-) mode change 100755 => 100644 samples/core/exit_handler/exit_handler.py delete mode 100644 samples/core/exit_handler/exit_handler_v2.py diff --git a/samples/core/XGBoost/xgboost_sample.py b/samples/core/XGBoost/xgboost_sample.py index d44cfaa73a..e11388a8a0 100644 --- a/samples/core/XGBoost/xgboost_sample.py +++ b/samples/core/XGBoost/xgboost_sample.py @@ -1,5 +1,5 @@ -import kfp.deprecated as kfp -from kfp.deprecated import components +import kfp as kfp +from kfp import components chicago_taxi_dataset_op = components.load_component_from_url( 'https://raw.githubusercontent.com/kubeflow/pipelines/e3337b8bdcd63636934954e592d4b32c95b49129/components/datasets/Chicago%20Taxi/component.yaml' @@ -47,7 +47,7 @@ def xgboost_pipeline(): # Training and prediction on dataset in Apache Parquet format training_data_parquet = convert_csv_to_apache_parquet_op( - training_data_csv).output + data=training_data_csv).output model_trained_on_parquet = xgboost_train_on_parquet_op( training_data=training_data_parquet, diff --git a/samples/core/XGBoost/xgboost_sample_test.py b/samples/core/XGBoost/xgboost_sample_test.py index 44e2c2a6b7..8f43c17edc 100644 --- a/samples/core/XGBoost/xgboost_sample_test.py +++ b/samples/core/XGBoost/xgboost_sample_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp.deprecated as kfp +import kfp as kfp from .xgboost_sample import xgboost_pipeline from kfp.samples.test.utils import run_pipeline_func, TestCase run_pipeline_func([ TestCase( pipeline_func=xgboost_pipeline, - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, + mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, ), ]) diff --git a/samples/core/caching/caching.ipynb b/samples/core/caching/caching.ipynb index 0a03f8e761..8efc1e6fbc 100644 --- a/samples/core/caching/caching.ipynb +++ b/samples/core/caching/caching.ipynb @@ -11,11 +11,11 @@ "import datetime\n", "import time\n", "\n", - "import kfp.deprecated as kfp\n", - "from kfp.deprecated.components import create_component_from_func\n", + "import kfp as kfp\n", + "from kfp import dsl\n", "\n", "\n", - "@create_component_from_func\n", + "@dsl.component\n", "def do_work_op(seconds: float = 60) -> str:\n", " import datetime\n", " import time\n", @@ -27,9 +27,11 @@ " return datetime.datetime.now().isoformat()\n", "\n", "\n", + "@kfp.dsl.pipeline(name='caching-pipeline')\n", "def caching_pipeline(seconds: float = 60):\n", " # All outputs of successful executions are cached\n", - " work_task = do_work_op(seconds)\n" + " work_task = do_work_op(seconds=seconds)\n", + "\n" ] }, { @@ -81,21 +83,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Test 3\n", - "# For each task we can specify the maximum cached data staleness.\n", - "# For example: task.execution_options.caching_strategy.max_cache_staleness = \"P7D\" # (7 days)\n", - "# The `max_cache_staleness` attribute uses the [RFC3339 duration format](https://tools.ietf.org/html/rfc3339#appendix-A). For example: \"P0D\" (0 days), \"PT5H\" (5 hours; notice the \"T\")\n", - "# Cached results that are older than the specified time span, are not reused.\n", - "# In this case, the pipeline should not reuse the cached result, since they will be stale.\n", "\n", + "# Test 3\n", + "# In this case, the pipeline should not reuse the cached result, since they are disabled.\n", + "@kfp.dsl.pipeline(name='caching-pipeline3')\n", "def caching_pipeline3(seconds: float = 60):\n", - " # All outputs of successful executions are cached\n", - " work_task = do_work_op(seconds)\n", - " # TODO(Ark-kun): Fix handling non-zero periods in the backend\n", - " work_task.execution_options.caching_strategy.max_cache_staleness = 'P0D' # = Period: Time: 0 seconds\n", + " work_task = do_work_op(seconds=seconds)\n", + " work_task.set_caching_options(enable_caching=False)\n", "\n", - "# Waiting for some time for the cached data to become stale:\n", - "time.sleep(10)\n", "print(\"Starting test 3\")\n", "start_time = datetime.datetime.now()\n", "kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func(\n", diff --git a/samples/core/caching/caching_sample.py b/samples/core/caching/caching_sample.py index bd1220bddd..12ca3780d1 100644 --- a/samples/core/caching/caching_sample.py +++ b/samples/core/caching/caching_sample.py @@ -3,11 +3,11 @@ import datetime import time -import kfp.deprecated as kfp -from kfp.deprecated.components import create_component_from_func +import kfp as kfp +from kfp import dsl -@create_component_from_func +@dsl.component def do_work_op(seconds: float = 60) -> str: import datetime import time @@ -19,9 +19,10 @@ def do_work_op(seconds: float = 60) -> str: return datetime.datetime.now().isoformat() +@kfp.dsl.pipeline(name='caching-pipeline') def caching_pipeline(seconds: float = 60): # All outputs of successful executions are cached - work_task = do_work_op(seconds) + work_task = do_work_op(seconds=seconds) # Test 1 @@ -37,7 +38,6 @@ def caching_pipeline(seconds: float = 60): elapsed_time = datetime.datetime.now() - start_time print(f"Total run time: {int(elapsed_time.total_seconds())} seconds") - # Test 2 # Running the pipeline the second time. # The pipeline should reuse the cached results and complete faster. @@ -56,20 +56,14 @@ def caching_pipeline(seconds: float = 60): # Test 3 -# For each task we can specify the maximum cached data staleness. -# For example: task.execution_options.caching_strategy.max_cache_staleness = "P7D" # (7 days) -# The `max_cache_staleness` attribute uses the [RFC3339 duration format](https://tools.ietf.org/html/rfc3339#appendix-A). For example: "P0D" (0 days), "PT5H" (5 hours; notice the "T") -# Cached results that are older than the specified time span, are not reused. -# In this case, the pipeline should not reuse the cached result, since they will be stale. - +# In this case, the pipeline should not reuse the cached result, since they are +# disabled. +@kfp.dsl.pipeline(name='caching-pipeline3') def caching_pipeline3(seconds: float = 60): - # All outputs of successful executions are cached - work_task = do_work_op(seconds) - # TODO(Ark-kun): Fix handling non-zero periods in the backend - work_task.execution_options.caching_strategy.max_cache_staleness = 'P0D' # = Period: Time: 0 seconds + work_task = do_work_op(seconds=seconds) + work_task.set_caching_options(enable_caching=False) + -# Waiting for some time for the cached data to become stale: -time.sleep(10) print("Starting test 3") start_time = datetime.datetime.now() kfp.Client(host=kfp_endpoint).create_run_from_pipeline_func( diff --git a/samples/core/caching/caching_test.py b/samples/core/caching/caching_test.py index 43c9970914..a81618e69b 100644 --- a/samples/core/caching/caching_test.py +++ b/samples/core/caching/caching_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp.deprecated as kfp +import kfp as kfp from kfp.samples.test.utils import TestCase, relative_path, run_pipeline_func run_pipeline_func([ TestCase( pipeline_file=relative_path(__file__, 'caching.ipynb'), - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, - run_pipeline=False, + mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, + run_pipeline=False, ), ]) diff --git a/samples/core/exit_handler/exit_handler.py b/samples/core/exit_handler/exit_handler.py old mode 100755 new mode 100644 index eaf0effb7c..a02122ab4e --- a/samples/core/exit_handler/exit_handler.py +++ b/samples/core/exit_handler/exit_handler.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python3 -# Copyright 2019-2023 The Kubeflow Authors +# Copyright 2021 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,41 +11,43 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Pipeline using ExitHandler.""" -from kfp import dsl, components, compiler -from kfp.components import InputPath, load_component_from_url +import os +from kfp import dsl +from kfp import compiler +from kfp.dsl import component -gcs_download_op = load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/pipelines/961b17fa6844e1d79e5d3686bb557d830d7b5a95/components/google-cloud/storage/download_blob/component.yaml' -) +# In tests, we install a KFP package from the PR under test. Users should not +# normally need to specify `kfp_package_path` in their component definitions. +_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') -@components.create_component_from_func -def print_file(file_path: InputPath('Any')): - """Print a file.""" - with open(file_path) as f: - print(f.read()) +@component(kfp_package_path=_KFP_PACKAGE_PATH) +def print_op(message: str): + """Prints a message.""" + print(message) -@components.create_component_from_func -def echo_msg(msg: str): - """Echo a message by parameter.""" - print(msg) +@component(kfp_package_path=_KFP_PACKAGE_PATH) +def fail_op(message: str): + """Fails.""" + import sys + print(message) + sys.exit(1) -@dsl.pipeline( - name='exit-handler', - description= - 'Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).' -) -def pipeline_exit_handler(url: str = 'gs://ml-pipeline/shakespeare1.txt'): - """A sample pipeline showing exit handler.""" - exit_task = echo_msg('exit!') +@dsl.pipeline(name='pipeline-with-exit-handler') +def pipeline_exit_handler(message: str = 'Hello World!'): + + exit_task = print_op(message='Exit handler has worked!') with dsl.ExitHandler(exit_task): - download_task = gcs_download_op(url) - echo_task = print_file(download_task.output) + print_op(message=message) + fail_op(message='Task failed.') if __name__ == '__main__': - compiler.Compiler().compile(pipeline_exit_handler, __file__ + '.yaml') + compiler.Compiler().compile( + pipeline_func=pipeline_exit_handler, + package_path=__file__.replace('.py', '.yaml')) diff --git a/samples/core/exit_handler/exit_handler_test.py b/samples/core/exit_handler/exit_handler_test.py index 94b19f5380..5e801d4dc6 100644 --- a/samples/core/exit_handler/exit_handler_test.py +++ b/samples/core/exit_handler/exit_handler_test.py @@ -17,11 +17,10 @@ import unittest from pprint import pprint -import kfp.deprecated as kfp +import kfp as kfp import kfp_server_api -from .exit_handler import pipeline_exit_handler -from .exit_handler_v2 import pipeline_exit_handler as pipeline_exit_handler_v2 +from .exit_handler import pipeline_exit_handler as pipeline_exit_handler from kfp.samples.test.utils import run_pipeline_func, TestCase, KfpMlmdClient @@ -53,10 +52,6 @@ def verify(mlmd_connection_config, run: kfp_server_api.ApiRun, **kwargs): run_pipeline_func([ TestCase( pipeline_func=pipeline_exit_handler, - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, - ), - TestCase( - pipeline_func=pipeline_exit_handler_v2, mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, ), ]) diff --git a/samples/core/exit_handler/exit_handler_v2.py b/samples/core/exit_handler/exit_handler_v2.py deleted file mode 100644 index a02122ab4e..0000000000 --- a/samples/core/exit_handler/exit_handler_v2.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Pipeline using ExitHandler.""" - -import os -from kfp import dsl -from kfp import compiler -from kfp.dsl import component - -# In tests, we install a KFP package from the PR under test. Users should not -# normally need to specify `kfp_package_path` in their component definitions. -_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') - - -@component(kfp_package_path=_KFP_PACKAGE_PATH) -def print_op(message: str): - """Prints a message.""" - print(message) - - -@component(kfp_package_path=_KFP_PACKAGE_PATH) -def fail_op(message: str): - """Fails.""" - import sys - print(message) - sys.exit(1) - - -@dsl.pipeline(name='pipeline-with-exit-handler') -def pipeline_exit_handler(message: str = 'Hello World!'): - - exit_task = print_op(message='Exit handler has worked!') - - with dsl.ExitHandler(exit_task): - print_op(message=message) - fail_op(message='Task failed.') - - -if __name__ == '__main__': - compiler.Compiler().compile( - pipeline_func=pipeline_exit_handler, - package_path=__file__.replace('.py', '.yaml')) diff --git a/samples/core/sequential/sequential.py b/samples/core/sequential/sequential.py index 39819c3dc1..db63a1fff2 100755 --- a/samples/core/sequential/sequential.py +++ b/samples/core/sequential/sequential.py @@ -17,35 +17,34 @@ from kfp import dsl, compiler -def gcs_download_op(url): - return dsl.ContainerOp( - name='GCS - Download', +@dsl.container_component +def gcs_download_op(url: str, output: dsl.OutputPath(str)): + return dsl.ContainerSpec( image='google/cloud-sdk:279.0.0', command=['sh', '-c'], - arguments=['gsutil cat $0 | tee $1', url, '/tmp/results.txt'], - file_outputs={ - 'data': '/tmp/results.txt', - } + args=['gsutil cat $0 | tee $1', url, output], ) -def echo_op(text): - return dsl.ContainerOp( - name='echo', +@dsl.container_component +def echo_op(text: str): + return dsl.ContainerSpec( image='library/bash:4.4.23', command=['sh', '-c'], - arguments=['echo "$0"', text] + args=['echo "$0"', text] ) + @dsl.pipeline( name='sequential-pipeline', description='A pipeline with two sequential steps.' ) -def sequential_pipeline(url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'): +def sequential_pipeline(url: str = 'gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'): """A pipeline with two sequential steps.""" - download_task = gcs_download_op(url) - echo_task = echo_op(download_task.output) + download_task = gcs_download_op(url=url) + echo_task = echo_op(text=download_task.output) + if __name__ == '__main__': compiler.Compiler().compile(sequential_pipeline, __file__ + '.yaml') From b630d5c8ae7559be0011e67f01e3aec1946ef765 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 11 Aug 2023 13:46:29 -0700 Subject: [PATCH 083/253] feat(components): add Vertex RAI safety bias evaluation pipeline PiperOrigin-RevId: 556075708 --- .../model_evaluation/__init__.py | 2 +- .../__init__.py | 2 +- .../llm_safety_bias/component.py | 20 +- .../evaluation_llm_safety_bias_pipeline.py | 178 ++++++++++++++++++ .../rai_safety/llm_safety_bias/__init__.py | 14 -- .../rai_safety/safety_metrics_pipeline.py | 159 ---------------- .../evaluation_llm_classification_pipeline.py | 13 ++ ...evaluation_llm_text_generation_pipeline.py | 13 ++ 8 files changed, 223 insertions(+), 178 deletions(-) rename components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/{rai_safety => llm_safety_bias}/__init__.py (90%) rename components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/{rai_safety => }/llm_safety_bias/component.py (82%) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py delete mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 90571e99d4..32e3798599 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -24,7 +24,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp -from google_cloud_pipeline_components._implementation.model_evaluation.rai_safety.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/__init__.py similarity index 90% rename from components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py rename to components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/__init__.py index b40cffdf86..febcd8f962 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Google Cloud Pipeline RAI Safety Bias Components.""" +"""Google Cloud Pipeline LLM Safety Bias Evaluation Components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py similarity index 82% rename from components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py rename to components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py index b5d16cc961..002bb7aeae 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py @@ -1,6 +1,19 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """LLM Safety Bias Metrics component used in KFP pipelines.""" - +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from kfp.dsl import Artifact @@ -16,7 +29,6 @@ def llm_safety_bias_metrics( gcp_resources: OutputPath(str), llm_safety_bias_evaluation_metrics: Output[Artifact], - project: str, location: str = 'us-central1', slice_spec_gcs_source: str = '', predictions_gcs_source: str = '', @@ -25,11 +37,11 @@ def llm_safety_bias_metrics( service_account: str = '', network: str = '', encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): """Aggregates LLM safety bias metrics based on specified data slices. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. slice_spec_gcs_source: Google Cloud Storage location to file with JSONL slicing spec definition. @@ -60,6 +72,8 @@ def llm_safety_bias_metrics( ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: llm_safety_bias_evaluation_metrics: ``Artifact`` tracking the LLM safety diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py new file mode 100644 index 0000000000..da06d0fdc1 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py @@ -0,0 +1,178 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Vertex LLM Safety Bias Evaluation Pipeline.""" + +from typing import NamedTuple + +from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.model_evaluation import LLMSafetyBiasMetricsOp +from google_cloud_pipeline_components.types.artifact_types import VertexBatchPredictionJob +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import ConcatPlaceholder +from kfp.dsl import container_component +from kfp.dsl import ContainerSpec +from kfp.dsl import Output +from kfp.dsl import OutputPath + +_PRIVATE_BP_IMAGE = ( + 'us-docker.pkg.dev/vertex-ai-restricted/llm-eval/private-bp:v0.1' +) + + +@container_component +def private_model_batch_predict( + location: str, + model_name: str, + gcp_resources: OutputPath(str), + batchpredictionjob: Output[VertexBatchPredictionJob], + gcs_output_directory: Output[Artifact], + bp_output_gcs_uri: OutputPath(str), + predictions_format: str = 'jsonl', + job_display_name: str = 'evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + accelerator_type: str = '', + accelerator_count: int = 0, + encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, +): + return ContainerSpec( + image=_image.GCPC_IMAGE_TAG, + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--payload', + ConcatPlaceholder([ + '{', + '"display_name": "', + job_display_name, + '", ', + '"job_spec": {"worker_pool_specs": [{"replica_count":"1', + '", "machine_spec": {"machine_type": "e2-standard-4', + '"},', + '"container_spec": {"image_uri":"', + _PRIVATE_BP_IMAGE, + '", "args": ["--project=', + project, + '", "--location=', + location, + '", "--model=', + model_name, + '", "--instances_format=', + 'jsonl', + '", "--predictions_format=', + predictions_format, + '", "--accelerator_type=', + accelerator_type, + '", "--accelerator_count=', + accelerator_count, + '", "--bp_output_gcs_uri=', + bp_output_gcs_uri, + '", "--executor_input={{$.json_escape[1]}}"]}}]', + ', "encryption_spec": {"kms_key_name":"', + encryption_spec_key_name, + '"}', + '}}', + ]), + ], + ) + + +@dsl.pipeline(name='evaluation-llm-safety-bias-pipeline') +def evaluation_llm_safety_bias_pipeline( + project: str, + location: str, + model_name: str, + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +) -> NamedTuple('outputs', llm_safety_bias_evaluation_metrics=Artifact): + """LLM RAI Safety Bias Evaluation pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: The Model name used to run evaluation. Must be a publisher Model + or a managed Model sharing the same ancestor location. Starting this job + has no impact on any existing deployments of the Model and their + resources. + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, ``projects/12345/global/networks/myVPC``. Format + is of the form ``projects/{project}/global/networks/{network}``. Where + ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + network name, as in ``myVPC``. To specify this field, you must have + already configured VPC Network Peering for Vertex AI + (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + + Returns: + llm_safety_bias_evaluation_metrics: Metrics Artifact for LLM Safety Bias. + """ + outputs = NamedTuple( + 'outputs', + llm_safety_bias_evaluation_metrics=Artifact, + ) + batch_predict_task = private_model_batch_predict( + project=project, + location=location, + model_name=model_name, + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + predictions_format='jsonl', + encryption_spec_key_name=encryption_spec_key_name, + ) + + llm_safety_bias_task = LLMSafetyBiasMetricsOp( + project=project, + predictions_gcs_source=batch_predict_task.outputs['bp_output_gcs_uri'], + slice_spec_gcs_source=( + 'gs://vertex-evaluation-llm-dataset-us-central1/safety_slicing_spec/all_subgroup.json' + ), + location=location, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + return outputs( + llm_safety_bias_evaluation_metrics=llm_safety_bias_task.outputs[ + 'llm_safety_bias_evaluation_metrics' + ] + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py deleted file mode 100644 index 10ad4cbf18..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/llm_safety_bias/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Google Cloud Pipeline LLM Safety Bias Components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py deleted file mode 100644 index 95f39921cd..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/rai_safety/safety_metrics_pipeline.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Vertex LLM safety metrics pipeline.""" - -import sys - -import kfp -from vertexevaluation.llm.component import function_based -from vertexevaluation.llm.component.batch_predict import model_batch_predict -from google_cloud_pipeline_components._implementation.model_evaluation import SafetyMetricsOp -from vertexevaluation.llm.pipelines import utils - - -@kfp.dsl.pipeline(name='llm-safety-eval-pipeline') -def llm_safety_eval_pipeline( - project: str, - model_name: str, - batch_predict_gcs_destination_output_uri: str, - slice_spec_gcs_source: str = '', - location: str = 'us-central1', - batch_predict_gcs_source_uris: list = [], # pylint: disable=g-bare-generic - batch_predict_instances_format: str = 'jsonl', - batch_predict_predictions_format: str = 'jsonl', - batch_predict_accelerator_type: str = '', - batch_predict_accelerator_count: int = 0, - machine_type: str = 'n1-standard-4', - service_account: str = '', - enable_web_access: bool = True, - network: str = '', - reserved_ip_ranges: list = [], # pylint: disable=g-bare-generic - encryption_spec_key_name: str = '', -): - """The LLM Data Slicing and Safety Metrics Evaluation pipeline with batch prediction. - - Args: - project: Required. Project to run the component. - model_name: The Model name used to get predictions via this job. Must share - the same ancestor location. Starting this job has no impact on any - existing deployments of the Model and their resources. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - slice_spec_gcs_source: The Google Cloud Storage location of the file where - the slice spec definition is located. - location: Location for running the component. If not set, defaulted to - `us-central1`. - batch_predict_gcs_source_uris: The Google Cloud Storage batch predict source - locations. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. If not set, - default to "jsonl". For more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which predictions are given, - must be one of the Model's supportedInputStorageFormats. If not set, - default to "jsonl". - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `accelerator_count`. Only used if - `machine_type` is set. For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `machine_type`. Only used if `machine_type` is set. For more details - about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - machine_type: The machine type of this custom job. If not set, defaulted to - `e2-highmem-16`. More details: - https://cloud.google.com/compute/docs/machine-resource - service_account: Optional. Service account to run the dataflow job. If not - set, dataflow will use the default worker service account. For more - details, see - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account - enable_web_access (Optional[bool]): Whether you want Vertex AI to enable - [interactive shell access] - https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell - to training containers. If set to `true`, you can access interactive - shells at the URIs given by [CustomJob.web_access_uris][]. - network: Dataflow's fully qualified subnetwork name, when empty the default - subnetwork will be used. More details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - reserved_ip_ranges: The reserved ip ranges. - encryption_spec_key_name: Customer-managed encryption key options for the - CustomJob. If this is set, then all resources created by the CustomJob - will be encrypted with the provided encryption key. - """ - - batch_predict_task = model_batch_predict( - project=project, - location=location, - model=model_name, - job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', - gcs_source_uris=batch_predict_gcs_source_uris, - instances_format=batch_predict_instances_format, - predictions_format=batch_predict_predictions_format, - gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, - encryption_spec_key_name=encryption_spec_key_name, - accelerator_type=batch_predict_accelerator_type, - accelerator_count=batch_predict_accelerator_count, - ) - - converter_task = function_based.convert_artifact_to_string( - input_artifact=batch_predict_task.outputs['gcs_output_directory'] - ) - - SafetyMetricsOp( - project=project, - predictions_gcs_source=converter_task.output, - slice_spec_gcs_source=slice_spec_gcs_source, - location=location, - machine_type=machine_type, - service_account=service_account, - enable_web_access=enable_web_access, - network=network, - reserved_ip_ranges=reserved_ip_ranges, - encryption_spec_key_name=encryption_spec_key_name - ) - - -def main(argv: list[str]) -> None: - parsed_args = utils.parse_args('llm_safety_eval_pipeline', argv) - - parameters = utils.get_parameters_from_input_args_for_pipeline( - parsed_args, llm_safety_eval_pipeline - ) - - parameters.update( - { - 'batch_predict_gcs_source_uris': [ - 'gs://lakeyk-llm-test/golden_dataset/adversarial_with_gender_identity_1k_col_renamed.jsonl' - ] - } - ) - - job = utils.run_pipeline( - llm_safety_eval_pipeline, - parameters=parameters, - project=parameters['project'], - location=parameters['location'], - pipeline_root=parameters['batch_predict_gcs_destination_output_uri'], - ) - - if parsed_args.wait: - job.wait() - - -if __name__ == '__main__': - main(sys.argv) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index afd93a668b..2df0303395 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -1,3 +1,16 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Vertex LLM standalone Evaluation for text classification task.""" from typing import List, NamedTuple diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index f7e33159e5..827bf9dcc5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -1,3 +1,16 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Vertex LLM standalone Evaluation for text generation task.""" from typing import List, NamedTuple From b71d43eff3d4cf95b9b67eb3a890524f9d115807 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 14 Aug 2023 10:28:16 -0700 Subject: [PATCH 084/253] feat(eval): Implement embedding metrics importing PiperOrigin-RevId: 556837125 --- .../model_evaluation/import_model_evaluation.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py index 4ee02b6041..d82b8340e2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/model_evaluation/import_model_evaluation.py @@ -36,12 +36,15 @@ 'text-generation': 'gs://google-cloud-aiplatform/schema/modelevaluation/general_text_generation_metrics_1.0.0.yaml', 'question-answering': 'gs://google-cloud-aiplatform/schema/modelevaluation/question_answering_metrics_1.0.0.yaml', 'summarization': 'gs://google-cloud-aiplatform/schema/modelevaluation/summarization_metrics_1.0.0.yaml', + 'embedding': 'gs://google-cloud-aiplatform/schema/modelevaluation/embedding_metrics_1.0.0.yaml', } MODEL_EVALUATION_RESOURCE_TYPE = 'ModelEvaluation' MODEL_EVALUATION_SLICE_RESOURCE_TYPE = 'ModelEvaluationSlice' SLICE_BATCH_IMPORT_LIMIT = 50 -ULM_TASKS = set(['text-generation', 'question-answering', 'summarization']) +ULM_TASKS = set( + ['text-generation', 'question-answering', 'summarization', 'embedding'] +) def _make_parent_dirs_and_return_path(file_path: str): @@ -83,6 +86,12 @@ def _make_parent_dirs_and_return_path(file_path: str): type=str, default='', ) +parser.add_argument( + '--embedding_metrics', + dest='embedding_metrics', + type=str, + default='', +) parser.add_argument( '--feature_attributions', dest='feature_attributions', @@ -167,6 +176,9 @@ def main(argv): elif parsed_args.summarization_metrics: metrics_file_path = parsed_args.summarization_metrics problem_type = 'summarization' + elif parsed_args.embedding_metrics: + metrics_file_path = parsed_args.embedding_metrics + problem_type = 'embedding' else: metrics_file_path = parsed_args.metrics problem_type = parsed_args.problem_type From 88e1045c116a6dc8adac83b5936821fe2ef9b263 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 14 Aug 2023 10:49:31 -0700 Subject: [PATCH 085/253] feat(components): use GCPC project id placeholder as project parameter default PiperOrigin-RevId: 556844000 --- .../preview/dataflow/flex_template/component.py | 7 ++++--- .../preview/model_evaluation/data_bias_component.py | 5 +++-- .../model_evaluation/feature_attribution_component.py | 5 +++-- .../feature_attribution_graph_component.py | 6 ++++-- .../preview/model_evaluation/model_bias_component.py | 5 +++-- .../v1/batch_predict_job/component.py | 5 +++-- .../v1/bigquery/create_model/component.py | 5 +++-- .../v1/bigquery/detect_anomalies_model/component.py | 5 +++-- .../v1/bigquery/drop_model/component.py | 5 +++-- .../v1/bigquery/evaluate_model/component.py | 5 +++-- .../v1/bigquery/explain_forecast_model/component.py | 5 +++-- .../v1/bigquery/explain_predict_model/component.py | 5 +++-- .../v1/bigquery/export_model/component.py | 5 +++-- .../v1/bigquery/feature_importance/component.py | 6 ++++-- .../v1/bigquery/forecast_model/component.py | 6 ++++-- .../v1/bigquery/global_explain/component.py | 5 +++-- .../v1/bigquery/ml_advanced_weights/component.py | 5 +++-- .../v1/bigquery/ml_arima_coefficients/component.py | 5 +++-- .../v1/bigquery/ml_arima_evaluate/component.py | 5 +++-- .../v1/bigquery/ml_centroids/component.py | 5 +++-- .../v1/bigquery/ml_confusion_matrix/component.py | 5 +++-- .../v1/bigquery/ml_feature_info/component.py | 5 +++-- .../v1/bigquery/ml_principal_component_info/component.py | 6 +++--- .../v1/bigquery/ml_principal_components/component.py | 7 ++++--- .../v1/bigquery/ml_recommend/component.py | 5 +++-- .../v1/bigquery/ml_reconstruction_loss/component.py | 7 ++++--- .../v1/bigquery/ml_roc_curve/component.py | 5 +++-- .../v1/bigquery/ml_training_info/component.py | 5 +++-- .../v1/bigquery/ml_trial_info/component.py | 5 +++-- .../v1/bigquery/ml_weights/component.py | 5 +++-- .../v1/bigquery/predict_model/component.py | 5 +++-- .../v1/bigquery/query_job/component.py | 5 +++-- .../v1/custom_job/component.py | 5 +++-- .../v1/dataflow/python_job/component.py | 5 +++-- .../v1/dataproc/create_pyspark_batch/component.py | 5 +++-- .../v1/dataproc/create_spark_batch/component.py | 5 +++-- .../v1/dataproc/create_spark_r_batch/component.py | 5 +++-- .../v1/dataproc/create_spark_sql_batch/component.py | 5 +++-- .../v1/dataset/create_image_dataset/component.py | 5 +++-- .../v1/dataset/create_tabular_dataset/component.py | 6 ++++-- .../v1/dataset/create_text_dataset/component.py | 5 +++-- .../v1/dataset/create_time_series_dataset/component.py | 5 +++-- .../v1/dataset/create_video_dataset/component.py | 5 +++-- .../v1/dataset/export_image_dataset/component.py | 5 +++-- .../v1/dataset/export_tabular_dataset/component.py | 5 +++-- .../v1/dataset/export_text_dataset/component.py | 5 +++-- .../v1/dataset/export_time_series_dataset/component.py | 5 +++-- .../v1/dataset/export_video_dataset/component.py | 5 +++-- .../v1/dataset/import_image_dataset/component.py | 5 +++-- .../v1/dataset/import_text_dataset/component.py | 5 +++-- .../v1/dataset/import_video_dataset/component.py | 5 +++-- .../v1/endpoint/create_endpoint/component.py | 5 +++-- .../v1/hyperparameter_tuning_job/component.py | 5 +++-- .../v1/model/upload_model/component.py | 5 +++-- .../v1/model_evaluation/classification_component.py | 5 +++-- .../v1/model_evaluation/error_analysis_pipeline.py | 6 ++++-- .../v1/model_evaluation/evaluated_annotation_pipeline.py | 6 ++++-- ...aluation_automl_tabular_feature_attribution_pipeline.py | 6 ++++-- .../model_evaluation/evaluation_automl_tabular_pipeline.py | 7 ++++--- .../evaluation_automl_unstructure_data_pipeline.py | 6 ++++-- .../evaluation_feature_attribution_pipeline.py | 6 ++++-- .../v1/model_evaluation/forecasting_component.py | 6 ++++-- .../v1/model_evaluation/regression_component.py | 6 +++--- 63 files changed, 203 insertions(+), 132 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py index 80b56b729f..677cb50021 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -23,7 +24,6 @@ @container_component def dataflow_flex_template( - project: str, container_spec_gcs_path: str, gcp_resources: OutputPath(str), location: str = 'us-central1', @@ -56,13 +56,12 @@ def dataflow_flex_template( update: bool = False, transform_name_mappings: Dict[str, str] = {}, validate_only: bool = False, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a job with a Dataflow Flex Template. Args: - project: The ID of the Cloud Platform project that the job - belongs to. location: The regional endpoint to which to direct the request. E.g., us-central1, us-west1. Defaults to `us-central1` if not set. job_name: The job name to use for the created job. For update job requests, the job @@ -157,6 +156,8 @@ def dataflow_flex_template( https://cloud.google.com/dataflow/docs/guides/updating-a-pipeline#Mapping validate_only: If true, the request is validated but not actually executed. Defaults to false. + project: The ID of the Cloud Platform project that the job + belongs to. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py index 05a10896c4..3b25fac5a0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py @@ -14,6 +14,7 @@ from typing import Any, List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp.dsl import Artifact @@ -31,7 +32,6 @@ def detect_data_bias( gcp_resources: OutputPath(str), data_bias_metrics: Output[Artifact], - project: str, target_field_name: str, bias_configs: List[Any], location: str = 'us-central1', @@ -40,6 +40,7 @@ def detect_data_bias( dataset: Input[VertexDataset] = None, columns: List[str] = [], encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Detects data bias metrics in a dataset. @@ -49,7 +50,6 @@ def detect_data_bias( bias metrics for the dataset. Args: - project: Project to run data bias detection. location: Location for running data bias detection. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, @@ -92,6 +92,7 @@ def detect_data_bias( ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. + project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. Returns: data_bias_metrics: diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py index 4510ff8abf..e0f210f6ee 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import Artifact @@ -32,7 +33,6 @@ def feature_attribution( gcp_resources: OutputPath(str), feature_attributions: Output[Metrics], - project: str, problem_type: str, location: str = 'us-central1', predictions_format: str = 'jsonl', @@ -47,6 +47,7 @@ def feature_attribution( dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Compute feature attribution on a trained model's batch explanation @@ -57,7 +58,6 @@ def feature_attribution( possible, typically possible for AutoML Classification models. Args: - project: Project to run feature attribution container. location: Location running feature attribution. If not set, defaulted to `us-central1`. problem_type: Problem type of the pipeline: one of `classification`, @@ -95,6 +95,7 @@ def feature_attribution( Dataflow job will be encrypted with the provided encryption key. force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` and `Dataflow`. + project: Project to run feature attribution container. Defaults to the project in which the PipelineJob is run. Returns: gcs_output_directory: JsonArray of the downsampled dataset GCS diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py index ecb436dbd4..5a12ee186f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py @@ -14,6 +14,7 @@ from typing import List, NamedTuple +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp from google_cloud_pipeline_components.types.artifact_types import VertexModel @@ -23,7 +24,6 @@ @kfp.dsl.pipeline(name='feature-attribution-graph-component') def feature_attribution_graph_component( # pylint: disable=dangerous-default-value - project: str, location: str, prediction_type: str, vertex_model: VertexModel, @@ -49,6 +49,7 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics): """A pipeline to compute feature attributions by sampling data for batch explanations. @@ -56,7 +57,6 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va valid explanation_spec. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. prediction_type: The type of prediction the model is to produce. "classification", "regression", or "forecasting". @@ -175,6 +175,8 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: A system.Metrics artifact with feature attributions. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py index 513cc7363d..1442977634 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import Artifact @@ -30,7 +31,6 @@ def detect_model_bias( gcp_resources: OutputPath(str), bias_model_metrics: Output[Artifact], - project: str, target_field_name: str, bias_configs: list, location: str = 'us-central1', @@ -39,6 +39,7 @@ def detect_model_bias( predictions_bigquery_source: Input[BQTable] = None, thresholds: list = [0.5], encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Detects bias metrics from a model's predictions. @@ -48,7 +49,6 @@ def detect_model_bias( bias metrics for classification problems. Args: - project: Project to run data bias detection. location: Location for running data bias detection. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited @@ -94,6 +94,7 @@ def detect_model_bias( ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. + project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. Returns: bias_model_metrics: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py index 75c86ab906..78fd2027a7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel from google_cloud_pipeline_components.types.artifact_types import VertexBatchPredictionJob @@ -31,7 +32,6 @@ @container_component def model_batch_predict( - project: str, job_display_name: str, gcp_resources: OutputPath(str), batchpredictionjob: Output[VertexBatchPredictionJob], @@ -62,6 +62,7 @@ def model_batch_predict( explanation_parameters: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a Google Cloud Vertex `BatchPredictionJob `_ and waits for it to complete. @@ -69,7 +70,6 @@ def model_batch_predict( For more details, see `BatchPredictionJob.Create `_. Args: - project: Project to create the BatchPredictionJob. job_display_name: The user-defined name of this BatchPredictionJob. location: Location for creating the BatchPredictionJob. instances_format: The format in which instances are @@ -262,6 +262,7 @@ def model_batch_predict( ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. + project: Project to create the BatchPredictionJob. Defaults to the project in which the PipelineJob is run. Returns: batchpredictionjob: [**Deprecated. Use gcs_output_directory and bigquery_output_table diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py index 77852d3935..d68ca55473 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -25,7 +26,6 @@ @container_component def bigquery_create_model_job( - project: str, query: str, model: Output[BQMLModel], gcp_resources: OutputPath(str), @@ -33,12 +33,12 @@ def bigquery_create_model_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery create model job and waits for it to finish. Args: - project: Project to run BigQuery model creation job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -59,6 +59,7 @@ def bigquery_create_model_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. Returns: model: Describes the model which is created. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py index 022b2333c7..95f080f23d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_detect_anomalies_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -40,12 +40,12 @@ def bigquery_detect_anomalies_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery detect anomalies model job and waits for it to finish. Args: - project: Project to run BigQuery model prediction job. location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -92,6 +92,7 @@ def bigquery_detect_anomalies_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the model prediction results should be diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py index 2b383e4e6a..32660189e8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -25,19 +26,18 @@ @container_component def bigquery_drop_model_job( - project: str, model: Input[BQMLModel], gcp_resources: OutputPath(str), location: str = 'us-central1', query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery drop model job and waits for it to finish. Args: - project: Project to run BigQuery model drop job. location: Location of the job to drop the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -57,6 +57,7 @@ def bigquery_drop_model_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery model drop job. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py index 91b6ecbc82..cc1db25803 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_evaluate_model_job( - project: str, model: Input[BQMLModel], evaluation_metrics: Output[Artifact], gcp_resources: OutputPath(str), @@ -39,12 +39,12 @@ def bigquery_evaluate_model_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery evaluate model job and waits for it to finish. Args: - project: Project to run BigQuery model evaluation job. location: Location to run the BigQuery model evaluation job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -82,6 +82,7 @@ def bigquery_evaluate_model_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model evaluation job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the model prediction results should be diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py index b0043c645b..861385cef8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_explain_forecast_model_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -38,6 +38,7 @@ def bigquery_explain_forecast_model_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.EXPLAIN_FORECAST job and let you explain forecast an @@ -46,7 +47,6 @@ def bigquery_explain_forecast_model_job( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - project: Project to run the BigQuery job. location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -80,6 +80,7 @@ def bigquery_explain_forecast_model_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the model explain forecast results should diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py index cb6e2f086b..f133af8a9e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py @@ -14,6 +14,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -26,7 +27,6 @@ @container_component def bigquery_explain_predict_model_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -40,12 +40,12 @@ def bigquery_explain_predict_model_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery explain predict model job and waits for it to finish. Args: - project: Project to run BigQuery model prediction job. location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -95,6 +95,7 @@ def bigquery_explain_predict_model_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the model prediction results should be diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py index 032246d387..772b00d04c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py @@ -15,6 +15,7 @@ from typing import Dict from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -25,7 +26,6 @@ @container_component def bigquery_export_model_job( - project: str, model: Input[BQMLModel], model_destination_path: str, exported_model_path: OutputPath(str), @@ -33,12 +33,12 @@ def bigquery_export_model_job( location: str = 'us-central1', job_configuration_extract: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery export model job and waits for it to finish. Args: - project: Project to run BigQuery model export job. location: Location of the job to export the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -57,6 +57,7 @@ def bigquery_export_model_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery model export job. Defaults to the project in which the PipelineJob is run. Returns: exported_model_path: The gcs bucket path where you export the model to. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py index 446049504a..68fa94f944 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_feature_importance_job( - project: str, model: Input[BQMLModel], feature_importance: Output[Artifact], gcp_resources: OutputPath(str), @@ -36,13 +36,13 @@ def bigquery_ml_feature_importance_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery feature importance fetching job and waits for it to finish. Args: - project: Project to run BigQuery model creation job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -71,6 +71,8 @@ def bigquery_ml_feature_importance_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. + Returns: feature_importance: Describes common metrics applicable to the type of model supplied. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py index 2e8363b5a4..e2c0ce7f72 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_forecast_model_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -38,6 +38,7 @@ def bigquery_forecast_model_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.FORECAST job and let you forecast an ARIMA_PLUS or @@ -46,7 +47,6 @@ def bigquery_forecast_model_job( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - project: Project to run the BigQuery job. location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -79,6 +79,8 @@ def bigquery_forecast_model_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. + Returns: destination_table: Describes the table where the model forecast results should be diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py index 6e7c55ef3a..3791cc5298 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_global_explain_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -37,12 +37,12 @@ def bigquery_ml_global_explain_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery global explain fetching job and waits for it to finish. Args: - project: Project to run BigQuery model creation job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -57,6 +57,7 @@ def bigquery_ml_global_explain_job( of each class. By default, class_level_explain is set to FALSE. This option only applies to classification models. Regression models only have model-level global feature importance. + project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the global explain results should be stored. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py index f6a9536d32..d41ccee4dc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_advanced_weights_job( - project: str, model: Input[BQMLModel], advanced_weights: Output[Artifact], gcp_resources: OutputPath(str), @@ -35,12 +35,12 @@ def bigquery_ml_advanced_weights_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ml advanced weights job and waits for it to finish. Args: - project: Project to run BigQuery ml advanced weights job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -62,6 +62,7 @@ def bigquery_ml_advanced_weights_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery ml advanced weights job. Defaults to the project in which the PipelineJob is run. Returns: weights: Describes different output columns for different models. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py index 60f817b277..ac1e2eea70 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_arima_coefficients( - project: str, model: Input[BQMLModel], arima_coefficients: Output[Artifact], gcp_resources: OutputPath(str), @@ -36,6 +36,7 @@ def bigquery_ml_arima_coefficients( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.ARIMA_COEFFICIENTS job and let you see the ARIMA @@ -44,7 +45,6 @@ def bigquery_ml_arima_coefficients( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - project: Project to run the BigQuery job. location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -66,6 +66,7 @@ def bigquery_ml_arima_coefficients( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. Returns: arima_coefficients: Describes arima_coefficients to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py index 519d60db1d..130a3e105b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_arima_evaluate_job( - project: str, model: Input[BQMLModel], arima_evaluation_metrics: Output[Artifact], gcp_resources: OutputPath(str), @@ -37,12 +37,12 @@ def bigquery_ml_arima_evaluate_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.ARIMA_EVALUATE job and waits for it to finish. Args: - project: Project to run BigQuery model evaluation job. location: Location to run the BigQuery model evaluation job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -77,6 +77,7 @@ def bigquery_ml_arima_evaluate_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model evaluation job. Defaults to the project in which the PipelineJob is run. Returns: arima_evaluation_metrics: Describes arima metrics. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py index 62dfaa6a63..71e2493914 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_centroids_job( - project: str, model: Input[BQMLModel], centroids: Output[Artifact], gcp_resources: OutputPath(str), @@ -37,12 +37,12 @@ def bigquery_ml_centroids_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.CENTROIDS job and waits for it to finish. Args: - project: Project to run BigQuery ML.CENTROIDS job. location: Location to run the BigQuery ML.CENTROIDS job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -75,6 +75,7 @@ def bigquery_ml_centroids_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ML.CENTROIDS job. Defaults to the project in which the PipelineJob is run. Returns: centroids: Information about the centroids in a k-means model. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py index 8005826b64..fa86384ac7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_confusion_matrix_job( - project: str, model: Input[BQMLModel], confusion_matrix: Output[BQTable], gcp_resources: OutputPath(str), @@ -38,12 +38,12 @@ def bigquery_ml_confusion_matrix_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery confusion matrix job and waits for it to finish. Args: - project: Project to run BigQuery confusion matrix job. location: Location to run the BigQuery confusion matrix job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -73,6 +73,7 @@ def bigquery_ml_confusion_matrix_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery confusion matrix job. Defaults to the project in which the PipelineJob is run. Returns: confusion_matrix: Describes common metrics applicable to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py index d4e2dfd3bb..6af06e09a3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_feature_info_job( - project: str, model: Input[BQMLModel], feature_info: Output[Artifact], gcp_resources: OutputPath(str), @@ -35,12 +35,12 @@ def bigquery_ml_feature_info_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery feature info job and waits for it to finish. Args: - project: Project to run BigQuery feature info job. location: Location of the job to run BigQuery feature info job. If not set, default to `US` multi-region. For more details, see @@ -62,6 +62,7 @@ def bigquery_ml_feature_info_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery feature info job. Defaults to the project in which the PipelineJob is run. Returns: feature_info: Describes common metrics applicable to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py index ad0032f43f..16af856380 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_principal_component_info_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -36,14 +36,13 @@ def bigquery_ml_principal_component_info_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.principal_component_info job and waits for it to finish. Args: - project: Project to run BigQuery - ML.principal_component_info job. location: Location to run the BigQuery ML.principal_component_info job. If not set, default to `US` multi-region. For more details, see @@ -73,6 +72,7 @@ def bigquery_ml_principal_component_info_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ML.principal_component_info job. Defaults to the project in which PipelineJob is run. Returns: destination_table: Describes the table which stores common metrics applicable to the type diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py index aafae8803d..bf1858dd8f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_principal_components_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -36,13 +36,12 @@ def bigquery_ml_principal_components_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.principal_components job and waits for it to finish. Args: - project: Project to run BigQuery ML.principal_components - job. location: Location to run the BigQuery ML.principal_components job. If not set, default to `US` multi-region. For more details, see @@ -70,6 +69,8 @@ def bigquery_ml_principal_components_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ML.principal_components + job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table which stores common metrics applicable to the type diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py index 2fd7a6e1b0..da86596e7e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_recommend_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -38,12 +38,12 @@ def bigquery_ml_recommend_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ML.Recommend job and waits for it to finish. Args: - project: Project to run BigQuery ML.Recommend job. location: Location to run the BigQuery ML.Recommend job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -76,6 +76,7 @@ def bigquery_ml_recommend_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ML.Recommend job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the recommendation results should be stored. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py index dfaa6b08ff..b65bac2645 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py @@ -14,6 +14,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -26,7 +27,6 @@ @container_component def bigquery_ml_reconstruction_loss_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -37,13 +37,12 @@ def bigquery_ml_reconstruction_loss_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ml reconstruction loss job and waits for it to finish. Args: - project: Project to run BigQuery ml reconstruction loss - job. location: Location to run the BigQuery ml reconstruction loss job. If not set, default to `US` multi-region. For more details, see @@ -77,6 +76,8 @@ def bigquery_ml_reconstruction_loss_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ml reconstruction loss + job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the ml reconstruction loss job results diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py index 4e1de3db2d..c10e723a49 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_roc_curve_job( - project: str, model: Input[BQMLModel], roc_curve: Output[BQTable], gcp_resources: OutputPath(str), @@ -38,12 +38,12 @@ def bigquery_ml_roc_curve_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery roc curve job and waits for it to finish. Args: - project: Project to run BigQuery roc curve job. location: Location of the job to run BigQuery roc curve job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -73,6 +73,7 @@ def bigquery_ml_roc_curve_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery roc curve job. Defaults to the project in which the PipelineJob is run. Returns: roc_curve: Describes common metrics applicable to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py index bb8695d666..1cb6aceb43 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_training_info_job( - project: str, model: Input[BQMLModel], ml_training_info: Output[Artifact], gcp_resources: OutputPath(str), @@ -35,13 +35,13 @@ def bigquery_ml_training_info_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ml training info fetching job and waits for it to finish. Args: - project: Project to run BigQuery ML training info job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -64,6 +64,7 @@ def bigquery_ml_training_info_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery ML training info job. Defaults to the project in which the PipelineJob is run. Returns: ml_training_info: Describes common metrics applicable to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py index 5adbe7f5cf..f3dda16415 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_trial_info_job( - project: str, model: Input[BQMLModel], trial_info: Output[Artifact], gcp_resources: OutputPath(str), @@ -36,12 +36,12 @@ def bigquery_ml_trial_info_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ml trial info job and waits for it to finish. Args: - project: Project to run BigQuery ml trial info job. location: Location to run the BigQuery ml trial info job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -68,6 +68,7 @@ def bigquery_ml_trial_info_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ml trial info job. Defaults to the project in which the PipelineJob is run. Returns: trial_info: Describes the trial info applicable to the type of model supplied. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py index 4f799e8edf..eae4732d05 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from kfp.dsl import Artifact from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_ml_weights_job( - project: str, model: Input[BQMLModel], weights: Output[Artifact], gcp_resources: OutputPath(str), @@ -35,12 +35,12 @@ def bigquery_ml_weights_job( query_parameters: List[str] = [], job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery ml weights job and waits for it to finish. Args: - project: Project to run BigQuery ml weights job. location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see @@ -63,6 +63,7 @@ def bigquery_ml_weights_job( are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. + project: Project to run BigQuery ml weights job. Defaults to the project in which the PipelineJob is run. Returns: weights: Describes different output columns for different models. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py index fa912330dc..bc6a9456c9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQMLModel from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder @@ -27,7 +28,6 @@ @container_component def bigquery_predict_model_job( - project: str, model: Input[BQMLModel], destination_table: Output[BQTable], gcp_resources: OutputPath(str), @@ -39,12 +39,12 @@ def bigquery_predict_model_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery predict model job and waits for it to finish. Args: - project: Project to run BigQuery model prediction job. location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -82,6 +82,7 @@ def bigquery_predict_model_job( requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the model prediction results should be diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py index 4daa7cd03a..1662156b4f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import BQTable from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -25,7 +26,6 @@ @container_component def bigquery_query_job( - project: str, destination_table: Output[BQTable], gcp_resources: OutputPath(str), query: str = '', @@ -34,12 +34,12 @@ def bigquery_query_job( job_configuration_query: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a BigQuery query job and waits for it to finish. Args: - project: Project to run the BigQuery query job. location: Location for creating the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location @@ -69,6 +69,7 @@ def bigquery_query_job( encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run the BigQuery query job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the query results should be stored. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py index 2667f04c87..737d7c1bc7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py @@ -14,6 +14,7 @@ from typing import Dict, List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components import utils from kfp import dsl @@ -21,7 +22,6 @@ # keep identical to create_custom_training_job_from_component @dsl.container_component def custom_training_job( - project: str, display_name: str, gcp_resources: dsl.OutputPath(str), location: str = 'us-central1', @@ -36,6 +36,7 @@ def custom_training_job( base_output_directory: str = '', labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a Vertex AI `custom training job `_ using the `CustomJob `_ API. @@ -45,7 +46,6 @@ def custom_training_job( more information. Args: - project: Project to create the custom training job in. location: Location for creating the custom training job. If not set, default to us-central1. display_name: The name of the CustomJob. @@ -91,6 +91,7 @@ def custom_training_job( encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. + project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the CustomJob. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py index 48f8e5aa49..01a077aa9e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py @@ -14,6 +14,7 @@ from typing import List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import container_component from kfp.dsl import ContainerSpec from kfp.dsl import OutputPath @@ -21,20 +22,19 @@ @container_component def dataflow_python( - project: str, python_module_path: str, temp_location: str, gcp_resources: OutputPath(str), location: str = 'us-central1', requirements_file_path: str = '', args: List[str] = [], + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Launch a self-executing Beam Python file on Google Cloud using the Dataflow Runner. Args: - project: Project to create the Dataflow job. location: Location of the Dataflow job. If not set, defaults to ``'us-central1'``. python_module_path: The GCS path to the Python file to run. @@ -43,6 +43,7 @@ def dataflow_python( requirements_file_path: The GCS path to the pip requirements file. args: The list of args to pass to the Python file. Can include additional parameters for the Dataflow Runner. + project: Project to create the Dataflow job. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py index bb210c49a7..850631a77e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -23,7 +24,6 @@ @container_component def dataproc_create_pyspark_batch( - project: str, main_python_file_uri: str, gcp_resources: OutputPath(str), location: str = 'us-central1', @@ -44,12 +44,12 @@ def dataproc_create_pyspark_batch( file_uris: List[str] = [], archive_uris: List[str] = [], args: List[str] = [], + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Create a Dataproc PySpark batch workload and wait for it to finish. Args: - project: Project to run the Dataproc batch workload. location: Location of the Dataproc batch workload. If not set, defaults to ``"us-central1"``. batch_id: The ID to use for the batch, which will become @@ -94,6 +94,7 @@ def dataproc_create_pyspark_batch( include arguments that can be set as batch properties, such as ``--conf``, since a collision can occur that causes an incorrect batch submission. + project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py index 8bb14a8d7d..ed8e3136e3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -23,7 +24,6 @@ @container_component def dataproc_create_spark_batch( - project: str, gcp_resources: OutputPath(str), location: str = 'us-central1', batch_id: str = '', @@ -44,12 +44,12 @@ def dataproc_create_spark_batch( file_uris: List[str] = [], archive_uris: List[str] = [], args: List[str] = [], + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Create a Dataproc Spark batch workload and wait for it to finish. Args: - project: Project to run the Dataproc batch workload. location: Location of the Dataproc batch workload. If not set, defaults to ``"us-central1"``. batch_id: The ID to use for the batch, which will become @@ -94,6 +94,7 @@ def dataproc_create_spark_batch( include arguments that can be set as batch properties, such as ``--conf``, since a collision can occur that causes an incorrect batch submission. + project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py index a599780af1..9e0923d072 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py @@ -15,6 +15,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -23,7 +24,6 @@ @container_component def dataproc_create_spark_r_batch( - project: str, gcp_resources: OutputPath(str), location: str = 'us-central1', batch_id: str = '', @@ -42,12 +42,12 @@ def dataproc_create_spark_r_batch( file_uris: List[str] = [], archive_uris: List[str] = [], args: List[str] = [], + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Create a Dataproc SparkR batch workload and wait for it to finish. Args: - project: Project to run the Dataproc batch workload. location: Location of the Dataproc batch workload. If not set, defaults to ``"us-central1"``. batch_id: The ID to use for the batch, which will become @@ -86,6 +86,7 @@ def dataproc_create_spark_r_batch( include arguments that can be set as batch properties, such as ``--conf``, since a collision can occur that causes an incorrect batch submission. + project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py index 51e31fa9a5..ed2d615ec8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py @@ -14,6 +14,7 @@ from typing import Dict, List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -22,7 +23,6 @@ @container_component def dataproc_create_spark_sql_batch( - project: str, gcp_resources: OutputPath(str), location: str = 'us-central1', batch_id: str = '', @@ -40,12 +40,12 @@ def dataproc_create_spark_sql_batch( query_file_uri: str = '', query_variables: Dict[str, str] = {}, jar_file_uris: List[str] = [], + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Create a Dataproc Spark SQL batch workload and wait for it to finish. Args: - project: Project to run the Dataproc batch workload. location: Location of the Dataproc batch workload. If not set, defaults to ``"us-central1"``. batch_id: The ID to use for the batch, which will become @@ -81,6 +81,7 @@ def dataproc_create_spark_sql_batch( Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. jar_file_uris: HCFS URIs of jar files to be added to the Spark ``CLASSPATH``. + project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py index 345a7514e0..780d09448d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py @@ -15,6 +15,7 @@ from typing import Dict, Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Output @@ -22,7 +23,6 @@ @dsl.container_component def image_dataset_create( - project: str, display_name: str, dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', @@ -31,6 +31,7 @@ def image_dataset_create( import_schema_uri: Optional[str] = None, labels: Optional[Dict[str, str]] = {}, encryption_spec_key_name: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a new image `Dataset `_ and optionally imports data into Dataset when @@ -63,7 +64,6 @@ def image_dataset_create( pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by ``import_schema_uri``, e.g. jsonl file. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -82,6 +82,7 @@ def image_dataset_create( resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed image Dataset resource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py index 3f19637740..45ea84b29c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Output @@ -23,7 +24,6 @@ @dsl.container_component def tabular_dataset_create( - project: str, display_name: str, dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', @@ -31,6 +31,7 @@ def tabular_dataset_create( bq_source: Optional[str] = None, labels: Optional[dict] = {}, encryption_spec_key_name: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a new tabular `Dataset `_. @@ -46,7 +47,6 @@ def tabular_dataset_create( https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. bq_source: BigQuery URI to the input table. For example, "bq://project.dataset.table_name". - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -65,6 +65,8 @@ def tabular_dataset_create( resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. + Returns: dataset: Instantiated representation of the managed tabular Dataset resource. """ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py index 93395aca13..6c5417370b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Output @@ -23,7 +24,6 @@ @dsl.container_component def text_dataset_create( - project: str, display_name: str, dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', @@ -32,6 +32,7 @@ def text_dataset_create( import_schema_uri: Optional[str] = None, labels: Optional[dict] = {}, encryption_spec_key_name: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a new text `Dataset `_ and optionally imports data into Dataset when @@ -64,7 +65,6 @@ def text_dataset_create( pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by ``import_schema_uri``, e.g. jsonl file. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -83,6 +83,7 @@ def text_dataset_create( resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed text Datasetresource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py index 93bc3ea3f9..4119729f66 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Output @@ -23,7 +24,6 @@ @dsl.container_component def time_series_dataset_create( - project: str, display_name: str, dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', @@ -31,6 +31,7 @@ def time_series_dataset_create( bq_source: Optional[str] = None, labels: Optional[dict] = {}, encryption_spec_key_name: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a new time series `Dataset `_. @@ -46,7 +47,6 @@ def time_series_dataset_create( https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. bq_source: BigQuery URI to the input table. For example, bq://project.dataset.table_name". - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -65,6 +65,7 @@ def time_series_dataset_create( resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed time series Datasetresource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py index e80bd89e9d..51a4b29f8c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Output @@ -23,7 +24,6 @@ @dsl.container_component def video_dataset_create( - project: str, display_name: str, dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', @@ -32,6 +32,7 @@ def video_dataset_create( import_schema_uri: Optional[str] = None, labels: Optional[dict] = {}, encryption_spec_key_name: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a new video `Dataset `_ and optionally imports data into Dataset when @@ -65,7 +66,6 @@ def video_dataset_create( pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by ``import_schema_uri``, - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -84,6 +84,7 @@ def video_dataset_create( resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed video Datasetresource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py index d53d51ecc6..5351e3b6e4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Input @@ -23,11 +24,11 @@ @dsl.container_component def image_dataset_export( - project: str, dataset: Input[VertexDataset], output_dir: str, exported_dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Exports `Dataset `_ to a GCS output directory. @@ -45,8 +46,8 @@ def image_dataset_export( schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: exported_dataset: All of the files that are exported in this export operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py index 140a524743..f10358c4d6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Input @@ -23,11 +24,11 @@ @dsl.container_component def tabular_dataset_export( - project: str, dataset: Input[VertexDataset], output_dir: str, exported_dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Exports `Dataset `_ to a GCS output directory. @@ -45,8 +46,8 @@ def tabular_dataset_export( schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: exported_dataset: All of the files that are exported in this export operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py index 5c453264e5..7450cedd5c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Input @@ -23,11 +24,11 @@ @dsl.container_component def text_dataset_export( - project: str, dataset: Input[VertexDataset], output_dir: str, exported_dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Exports `Dataset `_ to a GCS output directory. @@ -45,8 +46,8 @@ def text_dataset_export( schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: exported_dataset: All of the files that are exported in this export operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py index d186adc71a..10aa1cf34e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Input @@ -23,11 +24,11 @@ @dsl.container_component def time_series_dataset_export( - project: str, dataset: Input[VertexDataset], output_dir: str, exported_dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Exports `Dataset `_ to a GCS output directory. @@ -45,8 +46,8 @@ def time_series_dataset_export( schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. - project: Project to retrieve Datasetfrom. location: Optional location to retrieve Datasetfrom. + project: Project to retrieve Datasetfrom. Defaults to the project in which the PipelineJob is run. Returns: exported_dataset: All of the files that are exported in this export operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py index 6f73809245..83c27efeb0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl from kfp.dsl import Input @@ -23,11 +24,11 @@ @dsl.container_component def video_dataset_export( - project: str, dataset: Input[VertexDataset], output_dir: str, exported_dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Exports `Dataset `_ to a GCS output directory. @@ -45,8 +46,8 @@ def video_dataset_export( schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: exported_dataset: All of the files that are exported in this export operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py index 9dc153bea7..2d0727f97f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components import utils from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl @@ -26,19 +27,18 @@ @utils.gcpc_output_name_converter('dataset') @dsl.container_component def image_dataset_import( - project: str, dataset: Input[VertexDataset], output__dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', data_item_labels: Optional[dict] = {}, gcs_source: Optional[str] = None, import_schema_uri: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Uploads data to an existing managed `Dataset `_. Args: - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. dataset: The Dataset to be updated. gcs_source: @@ -64,6 +64,7 @@ def image_dataset_import( pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by ``import_schema_uri``, e.g. jsonl file. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed Dataset resource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py index dce6754955..2528b6dd40 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py @@ -15,6 +15,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components import utils from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl @@ -25,19 +26,18 @@ @utils.gcpc_output_name_converter('dataset') @dsl.container_component def text_dataset_import( - project: str, dataset: Input[VertexDataset], output__dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', data_item_labels: Optional[dict] = {}, gcs_source: Optional[str] = None, import_schema_uri: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Uploads data to an existing managed `Dataset `_. Args: - project: Project to retrieve Datasetfrom. location: Optional location to retrieve Datasetfrom. dataset: The Datasetto be updated. gcs_source: @@ -65,6 +65,7 @@ def text_dataset_import( labels specified inside index file refenced by ``import_schema_uri``, e.g. jsonl file. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed Datasetresource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py index 920c20862c..fbbaf05aec 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py @@ -16,6 +16,7 @@ from typing import Optional from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components import utils from google_cloud_pipeline_components.types.artifact_types import VertexDataset from kfp import dsl @@ -26,19 +27,18 @@ @utils.gcpc_output_name_converter('dataset') @dsl.container_component def video_dataset_import( - project: str, dataset: Input[VertexDataset], output__dataset: Output[VertexDataset], location: Optional[str] = 'us-central1', data_item_labels: Optional[dict] = {}, gcs_source: Optional[str] = None, import_schema_uri: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Uploads data to an existing managed `Dataset `_. Args: - project: Project to retrieve Dataset from. location: Optional location to retrieve Dataset from. dataset: The Dataset to be updated. gcs_source: @@ -66,6 +66,7 @@ def video_dataset_import( labels specified inside index file refenced by ``import_schema_uri``, e.g. jsonl file. + project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: dataset: Instantiated representation of the managed Dataset resource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py index be799c7238..9fb29d1380 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py @@ -15,6 +15,7 @@ from typing import Dict from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import VertexEndpoint from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component @@ -25,7 +26,6 @@ @container_component def endpoint_create( - project: str, display_name: str, gcp_resources: OutputPath(str), endpoint: Output[VertexEndpoint], @@ -34,6 +34,7 @@ def endpoint_create( labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', network: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """`Creates `_ a Google Cloud Vertex `Endpoint `_ and waits for it to be ready. @@ -42,7 +43,6 @@ def endpoint_create( See the `Endpoint create `_ method for more information. Args: - project: Project to create the Endpoint. location: Location to create the Endpoint. If not set, default to us-central1. display_name: The user-defined name of the Endpoint. The @@ -69,6 +69,7 @@ def endpoint_create( `Format `_: ``projects/{project}/global/networks/{network}``. Where ``{project}`` is a project number, as in ``'12345'``, and ``{network}`` is network name. + project: Project to create the Endpoint. Defaults to the project in which the PipelineJob is run. Returns: endpoint: Artifact tracking the created Endpoint. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py index 6f326ddff5..3de88c0aa6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py @@ -15,6 +15,7 @@ from typing import List from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from kfp.dsl import ConcatPlaceholder from kfp.dsl import container_component from kfp.dsl import ContainerSpec @@ -23,7 +24,6 @@ @container_component def hyperparameter_tuning_job( - project: str, display_name: str, base_output_directory: str, worker_pool_specs: List[str], @@ -39,6 +39,7 @@ def hyperparameter_tuning_job( encryption_spec_key_name: str = '', service_account: str = '', network: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Creates a Vertex AI hyperparameter tuning job and waits for @@ -51,7 +52,6 @@ def hyperparameter_tuning_job( display_name: The user-defined name of the HyperparameterTuningJob. The name can be up to 128 characters long and can be consist of any UTF-8 characters. - project: Project to run the HyperparameterTuningJob in. base_output_directory: The Cloud Storage location to store the output of this HyperparameterTuningJob. The base_output_directory of each child CustomJob backing a Trial is set @@ -142,6 +142,7 @@ def hyperparameter_tuning_job( ``projects/12345/global/networks/myVPC``. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. + project: Project to run the HyperparameterTuningJob in. Defaults to the project in which the PipelineJob is run. Returns: gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which contains the GCP resource ID of the Hyperparameter Tuning job. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py index 412cd5c94d..b4c321c4be 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py @@ -15,6 +15,7 @@ from typing import Dict from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel from google_cloud_pipeline_components.types.artifact_types import VertexModel from kfp import dsl @@ -29,7 +30,6 @@ @container_component def model_upload( - project: str, display_name: str, gcp_resources: OutputPath(str), model: Output[VertexModel], @@ -41,6 +41,7 @@ def model_upload( explanation_parameters: Dict[str, str] = {}, labels: Dict[str, str] = {}, encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """`Uploads `_ a Google Cloud Vertex `Model `_ and returns a Model artifact representing the uploaded Model @@ -49,7 +50,6 @@ def model_upload( See `Model upload `_ method for more information. Args: - project: Project to upload this Model to. location: Optional location to upload this Model to. If not set, defaults to ``us-central1``. display_name: The display name of the Model. The name @@ -90,6 +90,7 @@ def model_upload( numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + project: Project to upload this Model to. Defaults to the project in which the PipelineJob is run. Returns: model: Artifact tracking the created Model. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py index b5c2fcc28d..d38c1a1dd6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py @@ -14,6 +14,7 @@ from typing import Any, List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics @@ -26,7 +27,6 @@ def model_evaluation_classification( gcp_resources: dsl.OutputPath(str), evaluation_metrics: dsl.Output[ClassificationMetrics], - project: str, target_field_name: str, model: dsl.Input[VertexModel] = None, location: str = 'us-central1', @@ -51,6 +51,7 @@ def model_evaluation_classification( dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Computes a ``google.ClassificationMetrics`` Artifact, containing evaluation @@ -62,7 +63,6 @@ def model_evaluation_classification( text data. Args: - project: Project to run evaluation container. location: Location for running the evaluation. predictions_format: The file format for the batch prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed @@ -163,6 +163,7 @@ def model_evaluation_classification( created. force_runner_mode: Flag to choose Beam runner. Valid options are ``DirectRunner`` and ``Dataflow``. + project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: evaluation_metrics: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index ad340aad7f..496259454a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -14,6 +14,7 @@ from typing import List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ErrorAnalysisAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluatedAnnotationOp @@ -30,7 +31,6 @@ @kfp.dsl.pipeline(name='vision-model-error-analysis-pipeline') def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-value - project: str, location: str, model_name: str, batch_predict_gcs_destination_output_uri: str, @@ -55,6 +55,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): """The evaluation vision error analysis pipeline. @@ -64,7 +65,6 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v including Dataflow and BatchPrediction. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of @@ -158,6 +158,8 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. """ evaluation_display_name = 'vision-model-error-analysis-pipeline' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index 9ee8b9c95d..5892f0d63e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -14,6 +14,7 @@ from typing import List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDatasetPreprocessorOp as DatasetPreprocessorOp @@ -27,7 +28,6 @@ @kfp.dsl.pipeline(name='evaluated-annotation-pipeline') def evaluated_annotation_pipeline( - project: str, location: str, model_name: str, batch_predict_gcs_destination_output_uri: str, @@ -49,11 +49,11 @@ def evaluated_annotation_pipeline( dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): """The evaluation evaluated annotation pipeline. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction. @@ -134,6 +134,8 @@ def evaluated_annotation_pipeline( created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. """ evaluation_display_name = 'evaluated-annotation-pipeline' get_model_task = GetVertexModelOp(model_name=model_name) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index 1011a11b76..23f0f8db01 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -14,6 +14,7 @@ from typing import Any, Dict, List, NamedTuple +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp @@ -29,7 +30,6 @@ name='evaluation-automl-tabular-feature-attribution-classification-pipeline' ) def evaluation_automl_tabular_feature_attribution_classification_pipeline( # pylint: disable=dangerous-default-value - project: str, location: str, model_name: str, target_field_name: str, @@ -56,6 +56,7 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -70,7 +71,6 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction. @@ -197,6 +197,8 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: A google.ClassificationMetrics artifact. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index 296bfadf73..ad596db057 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -14,20 +14,19 @@ from typing import Any, List, NamedTuple +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -from google_cloud_pipeline_components.v1.model_evaluation.forecasting_component import model_evaluation_forecasting as ModelEvaluationForecastingOp from google_cloud_pipeline_components.v1.model_evaluation.regression_component import model_evaluation_regression as ModelEvaluationRegressionOp import kfp @kfp.dsl.pipeline(name='evaluation-automl-tabular-classification-pipeline') def evaluation_automl_tabular_classification_pipeline( # pylint: disable=dangerous-default-value - project: str, location: str, model_name: str, target_field_name: str, @@ -51,6 +50,7 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -63,7 +63,6 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger tabular custom models. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction. @@ -171,6 +170,8 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: A google.ClassificationMetrics artifact and imported diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index cfbc8685af..5b14991b91 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -14,6 +14,7 @@ from typing import List, NamedTuple +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp @@ -28,7 +29,6 @@ @kfp.dsl.pipeline(name='evaluation-classification-pipeline') def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disable=dangerous-default-value - project: str, location: str, model_name: str, target_field_name: str, @@ -54,6 +54,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -65,7 +66,6 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab including Text, Video, Image and Custom models. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction. Formatted like @@ -181,6 +181,8 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: A Tuple of google.ClassificationMetrics artifact and the imported diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index c741df43bc..5055dcd0d7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -14,6 +14,7 @@ from typing import Any, Dict, List, NamedTuple +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp @@ -28,7 +29,6 @@ @kfp.dsl.pipeline(name='evaluation-feature-attribution-classification-pipeline') def evaluation_feature_attribution_classification_pipeline( # pylint: disable=dangerous-default-value - project: str, location: str, model_name: str, target_field_name: str, @@ -57,6 +57,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -69,7 +70,6 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d component, which is needed for many tabular custom models. Args: - project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction. @@ -202,6 +202,8 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d created. force_runner_mode: Indicate the runner mode to use forcely. Valid options are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. Returns: A google.ClassificationMetrics artifact. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py index a8f165e997..f45c05d5e1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py @@ -14,6 +14,7 @@ from typing import List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import ForecastingMetrics @@ -26,7 +27,6 @@ def model_evaluation_forecasting( gcp_resources: dsl.OutputPath(str), evaluation_metrics: dsl.Output[ForecastingMetrics], - project: str, target_field_name: str, model: dsl.Input[VertexModel] = None, location: str = 'us-central1', @@ -49,6 +49,7 @@ def model_evaluation_forecasting( dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Computes a ``google.ForecastingMetrics`` Artifact, containing evaluation @@ -59,7 +60,6 @@ def model_evaluation_forecasting( Supports point forecasting and quantile forecasting for tabular data. Args: - project: Project to run evaluation container. location: Location for running the evaluation. predictions_format: The file format for the batch prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed @@ -128,6 +128,8 @@ def model_evaluation_forecasting( created. force_runner_mode: Flag to choose Beam runner. Valid options are ``DirectRunner`` and ``Dataflow``. + project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. + Returns: evaluation_metrics: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py index b39557a4fb..1502f91824 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py @@ -14,6 +14,7 @@ from typing import List +from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import version from google_cloud_pipeline_components.types.artifact_types import BQTable from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics @@ -26,7 +27,6 @@ def model_evaluation_regression( gcp_resources: dsl.OutputPath(str), evaluation_metrics: dsl.Output[RegressionMetrics], - project: str, target_field_name: str, model: dsl.Input[VertexModel] = None, location: str = 'us-central1', @@ -46,6 +46,7 @@ def model_evaluation_regression( dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off """Computes a ``google.RegressionMetrics`` Artifact, containing evaluation @@ -56,7 +57,6 @@ def model_evaluation_regression( Supports regression for tabular data. Args: - project: Project to run evaluation container. location: Location for running the evaluation. predictions_format: The file format for the batch prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed @@ -114,9 +114,9 @@ def model_evaluation_regression( ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. - force_runner_mode: Flag to choose Beam runner. Valid options are ``DirectRunner`` and ``Dataflow``. + project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: evaluation_metrics: From ff90ceae9a4f403a14dd01c5468068c6079d511f Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 14 Aug 2023 11:48:57 -0700 Subject: [PATCH 086/253] feat(components): Internal change PiperOrigin-RevId: 556864524 --- .../model_evaluation/import_evaluation/component.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py index 5c98c4b950..eb8e991d16 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py @@ -70,13 +70,13 @@ def model_evaluation_import( regression_metrics: google.ClassificationMetrics artifact generated from the ModelEvaluationRegressionOp component. text_generation_metrics: system.Metrics artifact generated from - the ModelEvaluationTextGenerationOp component. Subject to change to + the LLMEvaluationTextGenerationOp component. Subject to change to google.TextGenerationMetrics. question_answering_metrics: system.Metrics artifact generated from - the ModelEvaluationTextGenerationOp component. Subject to change to + the LLMEvaluationTextGenerationOp component. Subject to change to google.QuestionAnsweringMetrics. summarization_metrics: system.Metrics artifact generated from - the ModelEvaluationTextGenerationOp component. Subject to change to + the LLMEvaluationTextGenerationOp component. Subject to change to google.SummarizationMetrics. explanation: Path for model explanation metrics generated from an evaluation component. From 450e9108172b5a4eb76abb6647bb65661581747a Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Mon, 14 Aug 2023 12:11:11 -0700 Subject: [PATCH 087/253] feat(chore): Change AutoML Vision Error Analysis pipeline names PiperOrigin-RevId: 556871987 --- .../error_analysis_pipeline.py | 19 ++++++++------- .../evaluated_annotation_pipeline.py | 23 ++++++++++--------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index 496259454a..d5139f6542 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -25,11 +25,10 @@ from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.dataset import GetVertexDatasetOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -import kfp from kfp import dsl -@kfp.dsl.pipeline(name='vision-model-error-analysis-pipeline') +@dsl.pipeline(name='automl-vision-error-analysis-pipeline') def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-value location: str, model_name: str, @@ -68,9 +67,9 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of - projects/{project}/locations/{location}/models/{model} or - projects/{project}/locations/{location}/models/{model}@{model_version_id - or model_version_alias} + ``projects/{project}/locations/{location}/models/{model}`` or + ``projects/{project}/locations/{location}/models/{model}@{model_version_id + or model_version_alias}`` batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is @@ -142,8 +141,8 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. dataflow_service_account: Custom service account to run Dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: @@ -161,9 +160,9 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ - evaluation_display_name = 'vision-model-error-analysis-pipeline' + evaluation_display_name = 'automl-vision-error-analysis-pipeline' - with kfp.dsl.Condition( + with dsl.Condition( ( test_dataset_resource_name != '' and training_dataset_resource_name != '' @@ -293,7 +292,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v ], ) - with kfp.dsl.Condition( + with dsl.Condition( ( ( test_dataset_resource_name == '' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index 5892f0d63e..c41cc81715 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -23,11 +23,11 @@ from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from google_cloud_pipeline_components.v1.dataset import GetVertexDatasetOp from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp -import kfp +from kfp import dsl -@kfp.dsl.pipeline(name='evaluated-annotation-pipeline') -def evaluated_annotation_pipeline( +@dsl.pipeline(name='automl-vision-evaluated-annotation-pipeline') +def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value location: str, model_name: str, batch_predict_gcs_destination_output_uri: str, @@ -56,7 +56,10 @@ def evaluated_annotation_pipeline( Args: location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch - prediction. + prediction, in the format of + ``projects/{project}/locations/{location}/models/{model}`` or + ``projects/{project}/locations/{location}/models/{model}@{model_version_id + or model_version_alias}`` batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is @@ -118,8 +121,8 @@ def evaluated_annotation_pipeline( dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. dataflow_service_account: Custom service account to run Dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: @@ -137,8 +140,8 @@ def evaluated_annotation_pipeline( project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ - evaluation_display_name = 'evaluated-annotation-pipeline' - get_model_task = GetVertexModelOp(model_name=model_name) + evaluation_display_name = 'automl-vision-evaluated-annotation-pipeline' + get_test_dataset_task = GetVertexDatasetOp( dataset_resource_name=test_dataset_resource_name ) @@ -149,7 +152,7 @@ def evaluated_annotation_pipeline( test_dataset_annotation_set_name=test_dataset_annotation_set_name, test_dataset_storage_source_uris=test_dataset_storage_source_uris, ) - + get_model_task = GetVertexModelOp(model_name=model_name) batch_predict_task = ModelBatchPredictOp( project=project, location=location, @@ -207,7 +210,6 @@ def evaluated_annotation_pipeline( dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, ) - model_evaluation_importer_task = ModelImportEvaluationOp( classification_metrics=eval_task.outputs['evaluation_metrics'], model=get_model_task.outputs['model'], @@ -217,7 +219,6 @@ def evaluated_annotation_pipeline( ], display_name=evaluation_display_name, ) - ModelImportEvaluatedAnnotationOp( model=get_model_task.outputs['model'], evaluated_annotation_output_uri=evaluated_annotation_task.outputs[ From 7b4ddf658c77bc5d4663ca100bdd2392d2c4f109 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Tue, 15 Aug 2023 20:07:16 +0300 Subject: [PATCH 088/253] test: enabe back archiving experiment action in kfp_functional_test (#9748) * re-enabe archiving experiment action in kfp_functional_test Signed-off-by: diana * Use kfp.v2 instead of kfp.depricated Signed-off-by: diana * Update periodic kfp_functional_test to work with kfp=2.0.1 Signed-off-by: diana * Updata dependencies and clean up instructions --------- Signed-off-by: diana Co-authored-by: gkcalat --- test/kfp-functional-test/README.md | 26 +++---- test/kfp-functional-test/constants.py | 1 + .../kfp-functional-test.sh | 4 +- test/kfp-functional-test/requirements.in | 2 +- test/kfp-functional-test/requirements.txt | 71 +++++++---------- .../run_kfp_functional_test.py | 78 +++++++++---------- 6 files changed, 81 insertions(+), 101 deletions(-) diff --git a/test/kfp-functional-test/README.md b/test/kfp-functional-test/README.md index 8e2414bd07..c08795d322 100644 --- a/test/kfp-functional-test/README.md +++ b/test/kfp-functional-test/README.md @@ -6,8 +6,8 @@ dependencies. To update dependencies: 1. edit [requirements.in](requirements.in) 1. run - ``` - ../../backend/update_requirements.sh requirements.txt + ```bash + pip-compile requirements.in ``` to update and pin the transitive dependencies. @@ -16,42 +16,42 @@ dependencies. To update dependencies: ### Via python 1. run - ``` + ```bash gcloud auth application-default login ``` acquire new user credentials to use for Application Default Credentials. 1. go to the root directory of kubeflow pipelines project, run - ``` + ```bash cd {YOUR_ROOT_DIRECTORY_OF_KUBEFLOW_PIPELINES} python3 ./test/kfp-functional-test/run_kfp_functional_test.py --host "https://$(curl https://raw.githubusercontent.com/kubeflow/testing/master/test-infra/kfp/endpoint)" ``` ### Via docker 1. run - ``` + ```bash gcloud auth application-default login ``` - acquire new user credentials to use for Application Default Credentials. + acquire new user credentials to use for Application Default Credentials. Credentials saved to file with {CREDENTIALS_PATH} similar to: [$HOME/.config/gcloud/application_default_credentials.json] 1. copy the Credentials to the temp folder - ```` + ````bash cp {CREDENTIALS_PATH} /tmp/keys/{FILENAME}.json ``` -1. Provide authentication credentials by setting the environment variable GOOGLE_APPLICATION_CREDENTIALS. +1. Provide authentication credentials by setting the environment variable GOOGLE_APPLICATION_CREDENTIALS. Replace [PATH] with the file path of the JSON file that contains your credentials. - run + run - ``` + ```bash export GOOGLE_APPLICATION_CREDENTIALS="/tmp/keys/{FILENAME}.json" ``` 1. go to the root directory of kubeflow pipelines project and run - ``` + ```bash cd {YOUR_ROOT_DIRECTORY_OF_KUBEFLOW_PIPELINES} docker run -it -v $(pwd):/tmp/src -w /tmp/src -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/keys/{FILENAME}.json \ -v $GOOGLE_APPLICATION_CREDENTIALS:/tmp/keys/{FILENAME}.json:ro \ - python:3.7-slim /tmp/src/test/kfp-functional-test/kfp-functional-test.sh - ``` \ No newline at end of file + python:3.9-slim /tmp/src/test/kfp-functional-test/kfp-functional-test.sh + ``` diff --git a/test/kfp-functional-test/constants.py b/test/kfp-functional-test/constants.py index fd88b6f420..145be20dcc 100644 --- a/test/kfp-functional-test/constants.py +++ b/test/kfp-functional-test/constants.py @@ -14,3 +14,4 @@ # Common test params RUN_TIMEOUT_SECONDS = 1800 +DEFAULT_USER_NAMESPACE = 'kubeflow-user-example-com' diff --git a/test/kfp-functional-test/kfp-functional-test.sh b/test/kfp-functional-test/kfp-functional-test.sh index 4fb7052fd3..2e07f48f44 100755 --- a/test/kfp-functional-test/kfp-functional-test.sh +++ b/test/kfp-functional-test/kfp-functional-test.sh @@ -1,5 +1,5 @@ #!/bin/sh -ex -# Copyright 2020 The Kubeflow Authors +# Copyright 2023 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,8 +16,6 @@ apt-get update -y apt --no-install-recommends -y -q install curl source_root="$(pwd)" -# TODO(#4853) Skipping pip 20.3 due to a bad version resolution logic. -python3 -m pip install --upgrade pip!=20.3.* python3 -m pip install -r "${source_root}/test/kfp-functional-test/requirements.txt" HOST="https://$(curl https://raw.githubusercontent.com/kubeflow/testing/master/test-infra/kfp/endpoint)" diff --git a/test/kfp-functional-test/requirements.in b/test/kfp-functional-test/requirements.in index db6f93edcb..6dd00f161e 100644 --- a/test/kfp-functional-test/requirements.in +++ b/test/kfp-functional-test/requirements.in @@ -1 +1 @@ -kfp==1.1.2 +kfp==2.0.1 diff --git a/test/kfp-functional-test/requirements.txt b/test/kfp-functional-test/requirements.txt index f3c8124283..a0cba588a7 100644 --- a/test/kfp-functional-test/requirements.txt +++ b/test/kfp-functional-test/requirements.txt @@ -2,10 +2,8 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile --output-file=- - +# pip-compile requirements.in # -attrs==20.3.0 - # via jsonschema cachetools==4.1.1 # via google-auth certifi==2020.12.5 @@ -17,70 +15,70 @@ cffi==1.14.4 # via google-crc32c chardet==3.0.4 # via requests -click==7.1.2 - # via kfp -cloudpickle==1.6.0 - # via kfp -deprecated==1.2.10 +click==8.1.6 # via kfp docstring-parser==0.7.3 # via kfp -google-api-core==1.23.0 - # via google-cloud-core -google-auth==1.23.0 +google-api-core==2.11.1 + # via + # google-cloud-core + # google-cloud-storage + # kfp +google-auth==2.22.0 # via # google-api-core + # google-cloud-core # google-cloud-storage # kfp # kubernetes -google-cloud-core==1.4.4 +google-cloud-core==2.3.3 # via google-cloud-storage -google-cloud-storage==1.32.0 +google-cloud-storage==2.10.0 # via kfp google-crc32c==1.0.0 # via google-resumable-media -google-resumable-media==1.1.0 +google-resumable-media==2.5.0 # via google-cloud-storage -googleapis-common-protos==1.52.0 +googleapis-common-protos==1.60.0 # via google-api-core idna==2.10 # via requests -jsonschema==3.2.0 +kfp-pipeline-spec==0.2.2 # via kfp -kfp==1.1.2 - # via -r requirements.in -kfp-pipeline-spec==0.1.3.1 - # via kfp -kfp-server-api==1.1.2rc1 +kfp-server-api==2.0.0 # via kfp +kfp==2.0.1 + # via -r requirements.in kubernetes==11.0.0 # via kfp oauthlib==3.1.0 # via requests-oauthlib -protobuf==3.14.0 +protobuf==3.20.3 # via # google-api-core # googleapis-common-protos + # kfp + # kfp-pipeline-spec +pyasn1-modules==0.2.8 + # via google-auth pyasn1==0.4.8 # via # pyasn1-modules # rsa -pyasn1-modules==0.2.8 - # via google-auth pycparser==2.20 # via cffi -pyrsistent==0.17.3 - # via jsonschema python-dateutil==2.8.1 # via # kfp-server-api # kubernetes -pytz==2020.4 - # via google-api-core pyyaml==5.3.1 # via # kfp # kubernetes +requests-oauthlib==1.3.0 + # via kubernetes +requests-toolbelt==0.9.1 + # via kfp requests==2.25.0 # via # google-api-core @@ -88,39 +86,26 @@ requests==2.25.0 # kubernetes # requests-oauthlib # requests-toolbelt -requests-oauthlib==1.3.0 - # via kubernetes -requests-toolbelt==0.9.1 - # via kfp rsa==4.6 # via google-auth six==1.15.0 # via - # google-api-core # google-auth - # google-cloud-core - # google-resumable-media - # jsonschema # kfp-server-api # kubernetes - # protobuf # python-dateutil # websocket-client -strip-hints==0.1.9 - # via kfp tabulate==0.8.7 # via kfp urllib3==1.26.2 # via + # google-auth + # kfp # kfp-server-api # kubernetes # requests websocket-client==0.57.0 # via kubernetes -wheel==0.38.1 - # via strip-hints -wrapt==1.12.1 - # via deprecated # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/test/kfp-functional-test/run_kfp_functional_test.py b/test/kfp-functional-test/run_kfp_functional_test.py index faf1083a9c..00f031f3a4 100644 --- a/test/kfp-functional-test/run_kfp_functional_test.py +++ b/test/kfp-functional-test/run_kfp_functional_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 The Kubeflow Authors +# Copyright 2023 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,30 +13,24 @@ # limitations under the License. import argparse +from datetime import datetime import random import string -from datetime import datetime -import kfp.deprecated as kfp -from kfp.deprecated import dsl import constants +import kfp +import kfp.dsl as dsl -def echo_op(): - return dsl.ContainerOp( - name='echo', - image='library/bash:4.4.23', - command=['sh', '-c'], - arguments=['echo "hello world"'] - ) +@dsl.container_component +def say_hello(name: str): + return dsl.ContainerSpec( + image='library/bash:4.4.23', command=['echo'], args=[f'Hello, {name}!']) -@dsl.pipeline( - name='My first pipeline', - description='A hello world pipeline.' -) -def hello_world_pipeline(): - echo_task = echo_op() +@dsl.pipeline(name='My first pipeline', description='A hello pipeline.') +def hello_pipeline(name: str): + say_hello(name=name) # Parsing the input arguments @@ -44,10 +38,8 @@ def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser() - parser.add_argument('--host', - type=str, - required=True, - help='The host of kfp.') + parser.add_argument( + '--host', type=str, required=True, help='The host of kfp.') args = parser.parse_args() return args @@ -57,40 +49,44 @@ def main(): ###### Initialization ###### client = kfp.Client(args.host) - print("host is {}".format(args.host)) + print('host is {}'.format(args.host)) ###### Create Experiment ###### - print("Creating experiment") - experiment_name = "kfp-functional-e2e-expriment-" + "".join(random.choices(string.ascii_uppercase + - string.digits, k=5)) - response = client.create_experiment(experiment_name) - experiment_id = response.id - print("Experiment with id {} created".format(experiment_id)) + print('Creating experiment') + experiment_name = 'kfp-functional-e2e-expriment-' + ''.join( + random.choices(string.ascii_uppercase + string.digits, k=5)) + response = client.create_experiment( + experiment_name, namespace=constants.DEFAULT_USER_NAMESPACE) + experiment_id = response.experiment_id + print('Experiment with id {} created'.format(experiment_id)) try: ###### Create Run from Pipeline Func ###### - print("Creating Run from Pipeline Func") - response = client.create_run_from_pipeline_func(hello_world_pipeline, arguments={}, experiment_name=experiment_name) + print('Creating Run from Pipeline Func') + response = client.create_run_from_pipeline_func( + hello_pipeline, + arguments={'name': 'World'}, + experiment_name=experiment_name, + namespace=constants.DEFAULT_USER_NAMESPACE) run_id = response.run_id - print("Run {} created".format(run_id)) + print('Run {} created'.format(run_id)) ###### Monitor Run ###### start_time = datetime.now() - response = client.wait_for_run_completion(run_id, constants.RUN_TIMEOUT_SECONDS) - succ = (response.run.status.lower() == 'succeeded') + response = client.wait_for_run_completion(run_id, + constants.RUN_TIMEOUT_SECONDS) + success = (response.state.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds - if succ: - print("Run succeeded in {} seconds".format(elapsed_time)) + if success: + print('Run succeeded in {} seconds'.format(elapsed_time)) else: print("Run can't complete in {} seconds".format(elapsed_time)) finally: ###### Archive Experiment ###### - print("Archive experiment has a serious performance problem right now, so we temporarily disable it.") - print("TODO(Bobgy): re-enable archiving experiment action after fixing https://github.com/kubeflow/pipelines/issues/6815#issuecomment-955938098") - # print("Archiving experiment") - # client.experiments.archive_experiment(experiment_id) - # print("Archived experiment with id {}".format(experiment_id)) + print('Archiving experiment') + client.archive_experiment(experiment_id) + print('Archived experiment with id {}'.format(experiment_id)) -if __name__ == "__main__": +if __name__ == '__main__': main() From ff2e002157472cd69eef74c2010756797e4ed460 Mon Sep 17 00:00:00 2001 From: Junggil Lee Date: Wed, 16 Aug 2023 02:59:04 +0900 Subject: [PATCH 089/253] fix(samples): Update loop_parameter, loop_static samples to v2 pipelines (#9870) --- samples/core/loop_parameter/loop_parameter.py | 18 +++++----- .../loop_parameter/loop_parameter_test.py | 11 ++---- .../core/loop_parameter/loop_parameter_v2.py | 36 ------------------- samples/core/loop_static/loop_static.py | 23 ++++++------ samples/core/loop_static/loop_static_test.py | 11 ++---- samples/core/loop_static/loop_static_v2.py | 31 ---------------- 6 files changed, 27 insertions(+), 103 deletions(-) delete mode 100644 samples/core/loop_parameter/loop_parameter_v2.py delete mode 100644 samples/core/loop_static/loop_static_v2.py diff --git a/samples/core/loop_parameter/loop_parameter.py b/samples/core/loop_parameter/loop_parameter.py index 1c5c786ed2..f171589601 100644 --- a/samples/core/loop_parameter/loop_parameter.py +++ b/samples/core/loop_parameter/loop_parameter.py @@ -1,31 +1,33 @@ -from kfp.deprecated import components, dsl -from typing import List +from kfp import compiler, dsl -@components.create_component_from_func +@dsl.component def print_op(text: str) -> str: print(text) return text -@components.create_component_from_func +@dsl.component def concat_op(a: str, b: str) -> str: print(a + b) return a + b -@components.create_component_from_func +@dsl.component def generate_op() -> str: import json return json.dumps([{'a': i, 'b': i * 10} for i in range(1, 5)]) @dsl.pipeline(name='pipeline-with-loop-parameter') -def my_pipeline(greeting:str='this is a test for looping through parameters'): +def my_pipeline( + greeting: str = 'this is a test for looping through parameters'): print_task = print_op(text=greeting) generate_task = generate_op() with dsl.ParallelFor(generate_task.output) as item: concat_task = concat_op(a=item.a, b=item.b) - concat_task.after(print_task) - print_task_2 = print_op(concat_task.output) + print_task_2 = print_op(text=concat_task.output) + +if __name__ == '__main__': + compiler.Compiler().compile(my_pipeline, __file__ + '.yaml') diff --git a/samples/core/loop_parameter/loop_parameter_test.py b/samples/core/loop_parameter/loop_parameter_test.py index 74c49622e4..648d96cb62 100644 --- a/samples/core/loop_parameter/loop_parameter_test.py +++ b/samples/core/loop_parameter/loop_parameter_test.py @@ -14,10 +14,9 @@ from __future__ import annotations import unittest -import kfp.deprecated as kfp +import kfp import kfp_server_api -from .loop_parameter import my_pipeline -from .loop_parameter_v2 import my_pipeline as my_pipeline_v2 +from loop_parameter import my_pipeline from kfp.samples.test.utils import KfpTask, debug_verify, run_pipeline_func, TestCase @@ -35,11 +34,7 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun, run_pipeline_func([ TestCase( - pipeline_func=my_pipeline_v2, + pipeline_func=my_pipeline, mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, verify_func=verify), - TestCase( - pipeline_func=my_pipeline, - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, - ), ]) diff --git a/samples/core/loop_parameter/loop_parameter_v2.py b/samples/core/loop_parameter/loop_parameter_v2.py deleted file mode 100644 index 478f782056..0000000000 --- a/samples/core/loop_parameter/loop_parameter_v2.py +++ /dev/null @@ -1,36 +0,0 @@ -import os - -from kfp import dsl - -# In tests, we install a KFP package from the PR under test. Users should not -# normally need to specify `kfp_package_path` in their component definitions. -_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def print_op(text: str) -> str: - print(text) - return text - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def concat_op(a: str, b: str) -> str: - print(a + b) - return a + b - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def generate_op() -> str: - import json - return json.dumps([{'a': i, 'b': i * 10} for i in range(1, 5)]) - - -@dsl.pipeline(name='pipeline-with-loop-parameter') -def my_pipeline( - greeting: str = 'this is a test for looping through parameters'): - print_task = print_op(text=greeting) - - generate_task = generate_op() - with dsl.ParallelFor(generate_task.output) as item: - concat_task = concat_op(a=item.a, b=item.b) - print_task_2 = print_op(text=concat_task.output) diff --git a/samples/core/loop_static/loop_static.py b/samples/core/loop_static/loop_static.py index 202869338e..d19fe38915 100644 --- a/samples/core/loop_static/loop_static.py +++ b/samples/core/loop_static/loop_static.py @@ -1,30 +1,29 @@ -from kfp.deprecated import components, dsl -from typing import List +from kfp import compiler, dsl -@components.create_component_from_func +@dsl.component def print_op(text: str) -> str: print(text) return text -@components.create_component_from_func +@dsl.component def concat_op(a: str, b: str) -> str: print(a + b) return a + b -_DEFAULT_LOOP_ARGUMENTS = [{'a': '1', 'b': '2'}, {'a': '10', 'b': '20'}] - - @dsl.pipeline(name='pipeline-with-loop-static') def my_pipeline( - static_loop_arguments: List[dict] = _DEFAULT_LOOP_ARGUMENTS, - greeting: str = 'this is a test for looping through parameters', -): + greeting: str = 'this is a test for looping through parameters',): + import json print_task = print_op(text=greeting) + static_loop_arguments = [json.dumps({'a': '1', 'b': '2'}), + json.dumps({'a': '10', 'b': '20'})] with dsl.ParallelFor(static_loop_arguments) as item: concat_task = concat_op(a=item.a, b=item.b) - concat_task.after(print_task) - print_task_2 = print_op(concat_task.output) + print_task_2 = print_op(text=concat_task.output) + +if __name__ == '__main__': + compiler.Compiler().compile(my_pipeline, __file__ + '.yaml') diff --git a/samples/core/loop_static/loop_static_test.py b/samples/core/loop_static/loop_static_test.py index 909d6261f9..276e525997 100644 --- a/samples/core/loop_static/loop_static_test.py +++ b/samples/core/loop_static/loop_static_test.py @@ -14,10 +14,9 @@ from __future__ import annotations import unittest -import kfp.deprecated as kfp +import kfp import kfp_server_api -from .loop_static import my_pipeline -from .loop_static_v2 import my_pipeline as my_pipeline_v2 +from loop_static import my_pipeline from kfp.samples.test.utils import KfpTask, run_pipeline_func, TestCase @@ -50,12 +49,8 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun, run_pipeline_func([ TestCase( - pipeline_func=my_pipeline_v2, + pipeline_func=my_pipeline, mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, verify_func=verify, ), - TestCase( - pipeline_func=my_pipeline, - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, - ), ]) diff --git a/samples/core/loop_static/loop_static_v2.py b/samples/core/loop_static/loop_static_v2.py deleted file mode 100644 index 2f634f77a0..0000000000 --- a/samples/core/loop_static/loop_static_v2.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from typing import List - -from kfp import dsl - -# In tests, we install a KFP package from the PR under test. Users should not -# normally need to specify `kfp_package_path` in their component definitions. -_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def print_op(text: str) -> str: - print(text) - return text - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def concat_op(a: str, b: str) -> str: - print(a + b) - return a + b - - -@dsl.pipeline(name='pipeline-with-loop-static') -def my_pipeline( - greeting: str = 'this is a test for looping through parameters',): - print_task = print_op(text=greeting) - static_loop_arguments = [{'a': '1', 'b': '2'}, {'a': '10', 'b': '20'}] - - with dsl.ParallelFor(static_loop_arguments) as item: - concat_task = concat_op(a=item.a, b=item.b) - print_task_2 = print_op(text=concat_task.output) From f454a86177b85b5cc11a7c57f63fa7f03f45604c Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Tue, 15 Aug 2023 13:11:28 -0700 Subject: [PATCH 090/253] feat(components): Add Feature Attribution components to _implementation/model_evaluation. Add LLM Eval text generation and text classification pipelines to preview namespace init file PiperOrigin-RevId: 557226606 --- .../model_evaluation/__init__.py | 8 +- .../feature_attribution/__init__.py | 14 + .../feature_attribution_component.py | 179 +++++++++++++ .../feature_attribution_graph_component.py | 247 ++++++++++++++++++ .../evaluation_llm_safety_bias_pipeline.py | 2 +- .../preview/model_evaluation/__init__.py | 4 + .../feature_attribution_graph_component.py | 2 +- ...ml_tabular_feature_attribution_pipeline.py | 2 +- ...evaluation_feature_attribution_pipeline.py | 2 +- 9 files changed, 455 insertions(+), 5 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 32e3798599..190eac1cb1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -19,25 +19,31 @@ from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.evaluated_annotation.component import evaluated_annotation as EvaluatedAnnotationOp +from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp __all__ = [ + 'evaluation_llm_safety_bias_pipeline', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', 'ErrorAnalysisAnnotationOp', 'EvaluatedAnnotationOp', + 'FeatureAttributionGraphComponentOp', 'FeatureExtractorOp', 'LLMEvaluationClassificationPredictionsPostprocessorOp', 'LLMEvaluationTextGenerationOp', + 'LLMSafetyBiasMetricsOp', + 'ModelEvaluationFeatureAttributionOp', 'ModelImportEvaluatedAnnotationOp', 'ModelImportEvaluationOp', - 'LLMSafetyBiasMetricsOp', 'TargetFieldDataRemoverOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/__init__.py new file mode 100644 index 0000000000..67319b3f75 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation Feature Extractor Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_component.py new file mode 100644 index 0000000000..e0f210f6ee --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_component.py @@ -0,0 +1,179 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.model_evaluation import version +from google_cloud_pipeline_components.types.artifact_types import BQTable +from kfp.dsl import Artifact +from kfp.dsl import ConcatPlaceholder +from kfp.dsl import container_component +from kfp.dsl import ContainerSpec +from kfp.dsl import IfPresentPlaceholder +from kfp.dsl import Input +from kfp.dsl import Metrics +from kfp.dsl import Output +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_JOB_ID_PLACEHOLDER +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER +from kfp.dsl import PIPELINE_TASK_ID_PLACEHOLDER + + +@container_component +def feature_attribution( + gcp_resources: OutputPath(str), + feature_attributions: Output[Metrics], + problem_type: str, + location: str = 'us-central1', + predictions_format: str = 'jsonl', + predictions_gcs_source: Input[Artifact] = None, + predictions_bigquery_source: Input[BQTable] = None, + dataflow_service_account: str = '', + dataflow_disk_size_gb: int = 50, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_workers_num: int = 1, + dataflow_max_workers_num: int = 5, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, +): + # fmt: off + """Compute feature attribution on a trained model's batch explanation + results. + + Creates a dataflow job with Apache Beam and TFMA to compute feature + attributions. Will compute feature attribution for every target label if + possible, typically possible for AutoML Classification models. + + Args: + location: Location running feature attribution. If not + set, defaulted to `us-central1`. + problem_type: Problem type of the pipeline: one of `classification`, + `regression` and `forecasting`. + predictions_format: The file format for the batch + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`. + predictions_gcs_source: An artifact with its + URI pointing toward a GCS directory with prediction or explanation files + to be used for this evaluation. For prediction results, the files should + be named "prediction.results-*" or "predictions_". For explanation + results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table + with prediction or explanation data to be used for this evaluation. For + prediction results, the table column should be named "predicted_*". + dataflow_service_account: Service account to run the + dataflow job. If not set, dataflow will use the default worker service + account. For more details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine + executing the evaluation run. If not set, defaulted to `50`. + dataflow_machine_type: The machine type executing the + evaluation run. If not set, defaulted to `n1-standard-4`. + dataflow_workers_num: The number of workers executing the + evaluation run. If not set, defaulted to `10`. + dataflow_max_workers_num: The max number of workers + executing the evaluation run. If not set, defaulted to `25`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow + workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key + for the Dataflow job. If this is set, then all resources created by the + Dataflow job will be encrypted with the provided encryption key. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` + and `Dataflow`. + project: Project to run feature attribution container. Defaults to the project in which the PipelineJob is run. + + Returns: + gcs_output_directory: JsonArray of the downsampled dataset GCS + output. + bigquery_output_table: String of the downsampled dataset BigQuery + output. + gcp_resources: Serialized gcp_resources proto tracking the dataflow + job. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + return ContainerSpec( + image=version.EVAL_IMAGE_TAG, + command=[ + 'python3', + '/main.py', + ], + args=[ + '--task', + 'explanation', + '--setup_file', + '/setup.py', + '--project_id', + project, + '--location', + location, + '--problem_type', + problem_type, + '--root_dir', + f'{PIPELINE_ROOT_PLACEHOLDER}/{PIPELINE_JOB_ID_PLACEHOLDER}-{PIPELINE_TASK_ID_PLACEHOLDER}', + '--batch_prediction_format', + predictions_format, + IfPresentPlaceholder( + input_name='predictions_gcs_source', + then=[ + '--batch_prediction_gcs_source', + predictions_gcs_source.uri, + ], + ), + IfPresentPlaceholder( + input_name='predictions_bigquery_source', + then=[ + '--batch_prediction_bigquery_source', + ConcatPlaceholder([ + 'bq://', + predictions_bigquery_source.metadata['projectId'], + '.', + predictions_bigquery_source.metadata['datasetId'], + '.', + predictions_bigquery_source.metadata['tableId'], + ]), + ], + ), + '--dataflow_job_prefix', + f'evaluation-feautre-attribution-{PIPELINE_JOB_ID_PLACEHOLDER}-{PIPELINE_TASK_ID_PLACEHOLDER}', + '--dataflow_service_account', + dataflow_service_account, + '--dataflow_disk_size', + dataflow_disk_size_gb, + '--dataflow_machine_type', + dataflow_machine_type, + '--dataflow_workers_num', + dataflow_workers_num, + '--dataflow_max_workers_num', + dataflow_max_workers_num, + '--dataflow_subnetwork', + dataflow_subnetwork, + '--dataflow_use_public_ips', + dataflow_use_public_ips, + '--kms_key_name', + encryption_spec_key_name, + '--force_runner_mode', + force_runner_mode, + '--gcs_output_path', + feature_attributions.path, + '--gcp_resources', + gcp_resources, + '--executor_input', + '{{$}}', + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py new file mode 100644 index 0000000000..f0ed330f1f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py @@ -0,0 +1,247 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Graph Component for feature attribution evaluation.""" + +from typing import List, NamedTuple + +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.model_evaluation.data_sampler.component import evaluation_data_sampler as EvaluationDataSamplerOp +from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.types.artifact_types import VertexModel +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp +import kfp + + +@kfp.dsl.pipeline(name='feature-attribution-graph-component') +def feature_attribution_graph_component( # pylint: disable=dangerous-default-value + location: str, + prediction_type: str, + vertex_model: VertexModel, + batch_predict_instances_format: str, + batch_predict_gcs_destination_output_uri: str, + batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic + batch_predict_bigquery_source_uri: str = '', + batch_predict_predictions_format: str = 'jsonl', + batch_predict_bigquery_destination_output_uri: str = '', + batch_predict_machine_type: str = 'n1-standard-16', + batch_predict_starting_replica_count: int = 5, + batch_predict_max_replica_count: int = 10, + batch_predict_explanation_metadata: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_parameters: dict = {}, # pylint: disable=g-bare-generic + batch_predict_explanation_data_sample_size: int = 10000, + batch_predict_accelerator_type: str = '', + batch_predict_accelerator_count: int = 0, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_max_num_workers: int = 5, + dataflow_disk_size_gb: int = 50, + dataflow_service_account: str = '', + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', + force_runner_mode: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, +) -> NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics): + """A pipeline to compute feature attributions by sampling data for batch explanations. + + This pipeline guarantees support for AutoML Tabular models that contain a + valid explanation_spec. + + Args: + location: The GCP region that runs the pipeline components. + prediction_type: The type of prediction the model is to produce. + "classification", "regression", or "forecasting". + vertex_model: The Vertex model artifact used for batch explanation. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. For more details + about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location + of the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + ``prediction--``, where timestamp is + in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + ``predictions_0001.``, ``predictions_0002.``, ..., + ``predictions_N.`` are created where ```` depends on + chosen ``predictions_format``, and N may equal 0001 and depends on the + total number of successfully predicted instances. If the Model has both + ``instance`` and ``prediction`` schemata defined then each such file + contains predictions as per the ``predictions_format``. If prediction for + any instance failed (partially or completely), then an additional + ``errors_0001.``, ``errors_0002.``,..., + ``errors_N.`` files are created (N depends on total number of + failed predictions). These files contain the failed instances, as per + their schema, followed by an additional ``error`` field which as value has + ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your + instances to run batch prediction on. May contain wildcards. For more + information on wildcards, see + https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For + more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to + run batch prediction on. May contain wildcards. For more details about + this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the + predictions. Must be one of the Model's supportedOutputStorageFormats. For + more details about this output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location + where the output is to be written to. In the given project a new dataset + is created with name ``prediction__`` + where is made BigQuery-dataset-name compatible (for example, most special + characters become underscores), and timestamp is in + YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two + tables will be created, ``predictions``, and ``errors``. If the Model has + both ``instance`` and ``prediction`` schemata defined then the tables have + columns as follows: The ``predictions`` table contains instances for which + the prediction succeeded, it has columns as per a concatenation of the + Model's instance and prediction schemata. The ``errors`` table contains + rows for which the prediction has failed, it has instance columns, as per + the instance schema, followed by a single "errors" column, which as values + has ````google.rpc.Status`` ``__ represented as a STRUCT, and + containing only ``code`` and ``message``. For more details about this + output config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction + on dedicated resources. If the Model supports DEDICATED_RESOURCES this + config may be provided (and the job will use these resources). If the + Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. + For more details about the BatchDedicatedResources, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + For more details about the machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at + the start of the batch operation. If not set, Vertex AI decides starting + number, not greater than ``max_replica_count``. Only used if + ``machine_type`` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the + batch operation may be scaled to. Only used if ``machine_type`` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for + this BatchPredictionJob. Can be specified only if ``generate_explanation`` + is set to ``True``. This value overrides the value of + ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are + optional in the request. If a field of the ``explanation_metadata`` object + is not populated, the corresponding field of the + ``Model.explanation_metadata`` object is inherited. For more details, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for + Model's predictions. Can be specified only if ``generate_explanation`` is + set to ``True``. This value overrides the value of + ``Model.explanation_parameters``. All fields of ``explanation_parameters`` + are optional in the request. If a field of the ``explanation_parameters`` + object is not populated, the corresponding field of the + ``Model.explanation_parameters`` object is inherited. For more details, + see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the + input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be + attached to the machine as per ``batch_predict_accelerator_count``. Only + used if ``batch_predict_machine_type`` is set. For more details about the + machine spec, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the + ``batch_predict_machine_type``. Only used if + ``batch_predict_machine_type`` is set. + dataflow_machine_type: The Dataflow machine type for evaluation components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation + components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation + components. + dataflow_service_account: Custom service account to run Dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. Example: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, + resources created by this pipeline will be encrypted with the provided + encryption key. Has the form: + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute resource is + created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options + are ``Dataflow`` and ``DirectRunner``. + project: The GCP project that runs the pipeline components. Defaults to the + project in which the PipelineJob is run. + + Returns: + A system.Metrics artifact with feature attributions. + """ + outputs = NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics) + + # Sample the input dataset for a quicker batch explanation. + data_sampler_task = EvaluationDataSamplerOp( + project=project, + location=location, + gcs_source_uris=batch_predict_gcs_source_uris, + bigquery_source_uri=batch_predict_bigquery_source_uri, + instances_format=batch_predict_instances_format, + sample_size=batch_predict_explanation_data_sample_size, + force_runner_mode=force_runner_mode, + ) + + # Run batch explain. + batch_explain_task = ModelBatchPredictOp( + project=project, + location=location, + model=vertex_model, + job_display_name='model-registry-batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=data_sampler_task.outputs['gcs_output_directory'], + bigquery_source_input_uri=data_sampler_task.outputs[ + 'bigquery_output_table' + ], + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri, + generate_explanation=True, + explanation_parameters=batch_predict_explanation_parameters, + explanation_metadata=batch_predict_explanation_metadata, + machine_type=batch_predict_machine_type, + starting_replica_count=batch_predict_starting_replica_count, + max_replica_count=batch_predict_max_replica_count, + encryption_spec_key_name=encryption_spec_key_name, + accelerator_type=batch_predict_accelerator_type, + accelerator_count=batch_predict_accelerator_count, + ) + + # Generate feature attributions from explanations. + feature_attribution_task = ModelEvaluationFeatureAttributionOp( + project=project, + location=location, + problem_type=prediction_type, + predictions_format=batch_predict_predictions_format, + predictions_gcs_source=batch_explain_task.outputs['gcs_output_directory'], + predictions_bigquery_source=batch_explain_task.outputs[ + 'bigquery_output_table' + ], + dataflow_machine_type=dataflow_machine_type, + dataflow_max_workers_num=dataflow_max_num_workers, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_service_account=dataflow_service_account, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + force_runner_mode=force_runner_mode, + ) + + return outputs( + feature_attributions=feature_attribution_task.outputs[ + 'feature_attributions' + ] + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py index da06d0fdc1..0c57b6d7ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py @@ -17,7 +17,7 @@ from google_cloud_pipeline_components import _image from google_cloud_pipeline_components import _placeholders -from google_cloud_pipeline_components._implementation.model_evaluation import LLMSafetyBiasMetricsOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components.types.artifact_types import VertexBatchPredictionJob from kfp import dsl from kfp.dsl import Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py index b8de7ae626..6cf10f1d07 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/__init__.py @@ -14,11 +14,15 @@ """Model evaluation preview components.""" from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp +from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline +from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp __all__ = [ + 'evaluation_llm_classification_pipeline', + 'evaluation_llm_text_generation_pipeline', 'ModelEvaluationFeatureAttributionOp', 'FeatureAttributionGraphComponentOp', 'DetectModelBiasOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py index 5a12ee186f..7eda608857 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py @@ -16,7 +16,7 @@ from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp -from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp +from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp from google_cloud_pipeline_components.types.artifact_types import VertexModel from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp import kfp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index 23f0f8db01..d6398dca48 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -16,8 +16,8 @@ from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp +from google_cloud_pipeline_components._implementation.model_evaluation import FeatureAttributionGraphComponentOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp -from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 5055dcd0d7..609290f0d8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -16,9 +16,9 @@ from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp +from google_cloud_pipeline_components._implementation.model_evaluation import FeatureAttributionGraphComponentOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp -from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp From e88e7a66381e70cd06a4195aae668c0cba450a49 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 15 Aug 2023 13:26:26 -0700 Subject: [PATCH 091/253] chore(components): GCPC v2.2.0 release PiperOrigin-RevId: 557231395 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 5 +++++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 71ad870a43..01a4d3e13f 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.1.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.2.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 86e80b9fa7..d8fb87a929 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,10 @@ ## Upcoming release +## Release 2.2.0 +* Add `preview.model_evaluation.evaluation_llm_classification_pipeline.evaluation_llm_classification_pipeline` +* Change AutoML Vision Error Analysis pipeline names (`v1.model_evaluation.vision_model_error_analysis_pipeline' and 'v1.model_evaluation.evaluated_annotation_pipeline') +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.1.1 * Add `preview.model_evaluation.FeatureAttributionGraphComponentOp` pipeline * Apply latest GCPC image vulnerability resolutions (base OS and software updates) diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index bbdf86081e..38a3204abb 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.2.0", + "title": "2.2.0", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.1.1", "title": "2.1.1", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index d60161b3c4..14b0d50a81 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.1.1" +__version__ = "2.2.0" From c9e54798cd9fdd8a29f44ed0cfc2f4957f10766b Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 15 Aug 2023 16:24:52 -0700 Subject: [PATCH 092/253] chore(sdk): undo creation of kfp-dsl namespace package (#9874) * revert to c8204d0 * reapply #9742 * modify #9791 * reapply #9800 * reapply #9827 * revert parts of #9738 * reapply parts of #9785 * remove duplicated dsl-test code * reapply parts of #9791 * correct version --- sdk/python/build.sh | 2 +- sdk/python/kfp/compiler/compiler_test.py | 3 +- .../kfp/compiler/pipeline_spec_builder.py | 1 - sdk/python/kfp/compiler/read_write_test.py | 3 +- sdk/python/kfp/components/__init__.py | 1 - .../kfp/components/load_yaml_utilities.py | 102 +- .../components/load_yaml_utilities_test.py | 43 - sdk/python/kfp/dsl/__init__.py | 249 +++ sdk/python/kfp/dsl/base_component.py | 149 ++ .../{dsl-test => dsl}/base_component_test.py | 0 sdk/python/kfp/dsl/component_decorator.py | 127 ++ .../component_decorator_test.py | 5 +- sdk/python/kfp/dsl/component_factory.py | 639 ++++++++ .../component_factory_test.py | 0 sdk/python/kfp/dsl/constants.py | 29 + .../container_component_artifact_channel.py | 46 + ...ntainer_component_artifact_channel_test.py | 0 .../kfp/dsl/container_component_class.py | 40 + .../kfp/dsl/container_component_decorator.py | 53 + .../container_component_decorator_test.py | 0 sdk/python/kfp/dsl/executor.py | 368 +++++ sdk/python/kfp/dsl/executor_main.py | 105 ++ sdk/python/kfp/dsl/executor_test.py | 1333 +++++++++++++++++ sdk/python/kfp/dsl/for_loop.py | 315 ++++ .../kfp/{dsl-test => dsl}/for_loop_test.py | 0 sdk/python/kfp/dsl/graph_component.py | 91 ++ sdk/python/kfp/dsl/importer_component.py | 30 + sdk/python/kfp/dsl/importer_node.py | 145 ++ .../{dsl-test => dsl}/importer_node_test.py | 0 sdk/python/kfp/dsl/kfp_config.py | 106 ++ sdk/python/kfp/dsl/pipeline_channel.py | 379 +++++ .../pipeline_channel_test.py | 0 sdk/python/kfp/dsl/pipeline_context.py | 203 +++ sdk/python/kfp/dsl/pipeline_task.py | 685 +++++++++ .../{dsl-test => dsl}/pipeline_task_test.py | 49 +- sdk/python/kfp/dsl/placeholders.py | 458 ++++++ .../{dsl-test => dsl}/placeholders_test.py | 0 sdk/python/kfp/dsl/python_component.py | 44 + sdk/python/kfp/dsl/structures.py | 1075 +++++++++++++ .../kfp/{dsl-test => dsl}/structures_test.py | 60 +- sdk/python/kfp/dsl/task_final_status.py | 55 + sdk/python/kfp/dsl/tasks_group.py | 230 +++ .../kfp/{dsl-test => dsl}/tasks_group_test.py | 0 .../python/kfp/dsl/types/__init__.py | 13 +- sdk/python/kfp/dsl/types/artifact_types.py | 472 ++++++ .../types/artifact_types_test.py | 0 .../kfp/dsl/types/custom_artifact_types.py | 191 +++ .../types/custom_artifact_types_test.py | 0 ...expected_bulk_loaded_confusion_matrix.json | 0 .../test_data/expected_confusion_matrix.json | 0 ...ypes_bulk_load_classification_metrics.json | 0 ...ected_io_types_classification_metrics.json | 0 sdk/python/kfp/dsl/types/type_annotations.py | 245 +++ .../types/type_annotations_test.py | 0 sdk/python/kfp/dsl/types/type_utils.py | 543 +++++++ .../types/type_utils_test.py | 2 - sdk/python/kfp/dsl/utils.py | 128 ++ .../kfp/{dsl-test => dsl}/utils_test.py | 0 sdk/python/kfp/dsl/v1_components.py | 44 + sdk/python/kfp/dsl/v1_modelbase.py | 379 +++++ sdk/python/kfp/dsl/v1_structures.py | 851 +++++++++++ sdk/python/kfp/dsl/yaml_component.py | 54 + sdk/python/requirements.in | 1 - .../test_data/components/add_numbers.yaml | 2 +- .../component_with_metadata_fields.yaml | 2 +- .../component_with_pip_install.yaml | 2 +- .../component_with_task_final_status.yaml | 2 +- .../test_data/components/concat_message.yaml | 2 +- .../test_data/components/dict_input.yaml | 2 +- sdk/python/test_data/components/identity.yaml | 2 +- .../test_data/components/input_artifact.yaml | 2 +- .../test_data/components/nested_return.yaml | 2 +- .../test_data/components/output_metrics.yaml | 2 +- .../test_data/components/preprocess.yaml | 2 +- .../component_with_optional_inputs.yaml | 2 +- .../component_with_pip_index_urls.yaml | 2 +- .../components_with_optional_artifacts.yaml | 4 +- ...lightweight_python_functions_pipeline.yaml | 4 +- ...tweight_python_functions_with_outputs.yaml | 8 +- .../parallelfor_fan_in/artifacts_complex.yaml | 10 +- .../parallelfor_fan_in/artifacts_simple.yaml | 4 +- .../conditional_producer_and_consumers.yaml | 4 +- .../nested_with_parameters.yaml | 8 +- .../parameters_complex.yaml | 14 +- .../parallelfor_fan_in/parameters_simple.yaml | 4 +- .../pipeline_producer_consumer.yaml | 8 +- .../pipelines/pipeline_as_exit_task.yaml | 8 +- .../pipelines/pipeline_in_pipeline.yaml | 4 +- .../pipeline_in_pipeline_complex.yaml | 4 +- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 6 +- .../pipelines/pipeline_with_condition.yaml | 10 +- ...peline_with_dynamic_importer_metadata.yaml | 2 +- .../pipelines/pipeline_with_env.yaml | 2 +- .../pipelines/pipeline_with_exit_handler.yaml | 6 +- .../pipeline_with_google_artifact_type.yaml | 4 +- .../pipelines/pipeline_with_importer.yaml | 4 +- .../pipelines/pipeline_with_loops.yaml | 16 +- .../pipeline_with_loops_and_conditions.yaml | 26 +- .../pipeline_with_metadata_fields.yaml | 4 +- .../pipeline_with_metrics_outputs.yaml | 4 +- .../pipeline_with_multiple_exit_handlers.yaml | 14 +- .../pipeline_with_nested_conditions.yaml | 16 +- .../pipelines/pipeline_with_nested_loops.yaml | 6 +- .../pipelines/pipeline_with_outputs.yaml | 4 +- ...pipeline_with_parallelfor_parallelism.yaml | 12 +- ...ipeline_with_params_containing_format.yaml | 6 +- .../pipelines/pipeline_with_placeholders.yaml | 10 +- .../pipelines/pipeline_with_retry.yaml | 2 +- .../pipeline_with_task_final_status.yaml | 6 +- ...th_task_using_ignore_upstream_failure.yaml | 4 +- test/presubmit-component-yaml.sh | 2 +- test/presubmit-test-run-all-gcpc-modules.sh | 2 +- test/presubmit-test-sdk-upgrade.sh | 10 +- test/presubmit-tests-sdk.sh | 2 +- test/presubmit-tests-tfx.sh | 2 +- 115 files changed, 10089 insertions(+), 349 deletions(-) create mode 100644 sdk/python/kfp/dsl/__init__.py create mode 100644 sdk/python/kfp/dsl/base_component.py rename sdk/python/kfp/{dsl-test => dsl}/base_component_test.py (100%) create mode 100644 sdk/python/kfp/dsl/component_decorator.py rename sdk/python/kfp/{dsl-test => dsl}/component_decorator_test.py (97%) create mode 100644 sdk/python/kfp/dsl/component_factory.py rename sdk/python/kfp/{dsl-test => dsl}/component_factory_test.py (100%) create mode 100644 sdk/python/kfp/dsl/constants.py create mode 100644 sdk/python/kfp/dsl/container_component_artifact_channel.py rename sdk/python/kfp/{dsl-test => dsl}/container_component_artifact_channel_test.py (100%) create mode 100644 sdk/python/kfp/dsl/container_component_class.py create mode 100644 sdk/python/kfp/dsl/container_component_decorator.py rename sdk/python/kfp/{dsl-test => dsl}/container_component_decorator_test.py (100%) create mode 100644 sdk/python/kfp/dsl/executor.py create mode 100644 sdk/python/kfp/dsl/executor_main.py create mode 100644 sdk/python/kfp/dsl/executor_test.py create mode 100644 sdk/python/kfp/dsl/for_loop.py rename sdk/python/kfp/{dsl-test => dsl}/for_loop_test.py (100%) create mode 100644 sdk/python/kfp/dsl/graph_component.py create mode 100644 sdk/python/kfp/dsl/importer_component.py create mode 100644 sdk/python/kfp/dsl/importer_node.py rename sdk/python/kfp/{dsl-test => dsl}/importer_node_test.py (100%) create mode 100644 sdk/python/kfp/dsl/kfp_config.py create mode 100644 sdk/python/kfp/dsl/pipeline_channel.py rename sdk/python/kfp/{dsl-test => dsl}/pipeline_channel_test.py (100%) create mode 100644 sdk/python/kfp/dsl/pipeline_context.py create mode 100644 sdk/python/kfp/dsl/pipeline_task.py rename sdk/python/kfp/{dsl-test => dsl}/pipeline_task_test.py (88%) create mode 100644 sdk/python/kfp/dsl/placeholders.py rename sdk/python/kfp/{dsl-test => dsl}/placeholders_test.py (100%) create mode 100644 sdk/python/kfp/dsl/python_component.py create mode 100644 sdk/python/kfp/dsl/structures.py rename sdk/python/kfp/{dsl-test => dsl}/structures_test.py (94%) create mode 100644 sdk/python/kfp/dsl/task_final_status.py create mode 100644 sdk/python/kfp/dsl/tasks_group.py rename sdk/python/kfp/{dsl-test => dsl}/tasks_group_test.py (100%) rename test/presubmit-test-kfp-dsl-runtime-code.sh => sdk/python/kfp/dsl/types/__init__.py (62%) mode change 100755 => 100644 create mode 100644 sdk/python/kfp/dsl/types/artifact_types.py rename sdk/python/kfp/{dsl-test => dsl}/types/artifact_types_test.py (100%) create mode 100644 sdk/python/kfp/dsl/types/custom_artifact_types.py rename sdk/python/kfp/{dsl-test => dsl}/types/custom_artifact_types_test.py (100%) rename sdk/python/kfp/{dsl-test => dsl}/types/test_data/expected_bulk_loaded_confusion_matrix.json (100%) rename sdk/python/kfp/{dsl-test => dsl}/types/test_data/expected_confusion_matrix.json (100%) rename sdk/python/kfp/{dsl-test => dsl}/types/test_data/expected_io_types_bulk_load_classification_metrics.json (100%) rename sdk/python/kfp/{dsl-test => dsl}/types/test_data/expected_io_types_classification_metrics.json (100%) create mode 100644 sdk/python/kfp/dsl/types/type_annotations.py rename sdk/python/kfp/{dsl-test => dsl}/types/type_annotations_test.py (100%) create mode 100644 sdk/python/kfp/dsl/types/type_utils.py rename sdk/python/kfp/{dsl-test => dsl}/types/type_utils_test.py (99%) create mode 100644 sdk/python/kfp/dsl/utils.py rename sdk/python/kfp/{dsl-test => dsl}/utils_test.py (100%) create mode 100644 sdk/python/kfp/dsl/v1_components.py create mode 100644 sdk/python/kfp/dsl/v1_modelbase.py create mode 100644 sdk/python/kfp/dsl/v1_structures.py create mode 100644 sdk/python/kfp/dsl/yaml_component.py diff --git a/sdk/python/build.sh b/sdk/python/build.sh index 6ec5cc49c4..a18d0d3c0e 100755 --- a/sdk/python/build.sh +++ b/sdk/python/build.sh @@ -21,7 +21,7 @@ # ./build.sh [output_file] -target_archive_file=$1 +target_archive_file=${1:-kfp.tar.gz} pushd "$(dirname "$0")" dist_dir=$(mktemp -d) diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 597af49cc7..92b1f6a1b7 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -1299,8 +1299,7 @@ def ignore_kfp_version_helper(spec: Dict[str, Any]) -> Dict[str, Any]: pipeline_spec['deploymentSpec']['executors'][ executor] = yaml.safe_load( re.sub( - r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", - 'kfp', + r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", 'kfp', yaml.dump( pipeline_spec['deploymentSpec']['executors'] [executor], diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 5a7141d5c8..b276f892c1 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -1748,7 +1748,6 @@ def _validate_dag_output_types( output_spec, error_message_prefix, checks_input=False, - raise_on_error=kfp.TYPE_CHECK, ) diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index fc4dc7d3e1..29c76db03e 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -74,8 +74,7 @@ def ignore_kfp_version_helper(spec: Dict[str, Any]) -> Dict[str, Any]: pipeline_spec['deploymentSpec']['executors'][ executor] = yaml.safe_load( re.sub( - r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", - 'kfp', + r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", 'kfp', yaml.dump( pipeline_spec['deploymentSpec']['executors'] [executor], diff --git a/sdk/python/kfp/components/__init__.py b/sdk/python/kfp/components/__init__.py index 606fe843dd..01eb072e12 100644 --- a/sdk/python/kfp/components/__init__.py +++ b/sdk/python/kfp/components/__init__.py @@ -30,7 +30,6 @@ # keep this for backward compatibility with user code "from kfp.components import placholders" and similar from kfp.dsl import base_component # noqa: keep unused import from kfp.dsl import placeholders # noqa: keep unused import -# from kfp.dsl.base_component import BaseComponent from kfp.dsl.container_component_class import ContainerComponent from kfp.dsl.python_component import PythonComponent diff --git a/sdk/python/kfp/components/load_yaml_utilities.py b/sdk/python/kfp/components/load_yaml_utilities.py index 01af00d338..34342d3b0b 100644 --- a/sdk/python/kfp/components/load_yaml_utilities.py +++ b/sdk/python/kfp/components/load_yaml_utilities.py @@ -13,15 +13,11 @@ # limitations under the License. """Functions for loading components from compiled YAML.""" -import hashlib -from typing import Optional, Tuple, Union -import warnings +from typing import Optional, Tuple from kfp.dsl import structures -from kfp.dsl import v1_structures from kfp.dsl import yaml_component import requests -import yaml def load_component_from_text(text: str) -> yaml_component.YamlComponent: @@ -34,7 +30,7 @@ def load_component_from_text(text: str) -> yaml_component.YamlComponent: Component loaded from YAML. """ return yaml_component.YamlComponent( - component_spec=_load_component_spec_from_yaml_documents(text), + component_spec=structures.ComponentSpec.from_yaml_documents(text), component_yaml=text) @@ -90,97 +86,3 @@ def load_component_from_url( resp.raise_for_status() return load_component_from_text(resp.content.decode('utf-8')) - - -def _load_documents_from_yaml(component_yaml: str) -> Tuple[dict, dict]: - """Loads up to two YAML documents from a YAML string. - - First document must always be present. If second document is - present, it is returned as a dict, else an empty dict. - """ - documents = list(yaml.safe_load_all(component_yaml)) - num_docs = len(documents) - if num_docs == 1: - pipeline_spec_dict = documents[0] - platform_spec_dict = {} - elif num_docs == 2: - pipeline_spec_dict = documents[0] - platform_spec_dict = documents[1] - else: - raise ValueError( - f'Expected one or two YAML documents in the IR YAML file. Got: {num_docs}.' - ) - return pipeline_spec_dict, platform_spec_dict - - -def _load_component_spec_from_yaml_documents( - component_yaml: str) -> structures.ComponentSpec: - """Loads V1 or V2 component YAML into a ComponentSpec. - - Args: - component_yaml: PipelineSpec and optionally PlatformSpec YAML documents as a single string. - - Returns: - ComponentSpec: The ComponentSpec object. - """ - - def extract_description(component_yaml: str) -> Union[str, None]: - heading = '# Description: ' - multi_line_description_prefix = '# ' - index_of_heading = 2 - if heading in component_yaml: - description = component_yaml.splitlines()[index_of_heading] - - # Multi line - comments = component_yaml.splitlines() - index = index_of_heading + 1 - while comments[index][:len(multi_line_description_prefix - )] == multi_line_description_prefix: - description += '\n' + comments[index][ - len(multi_line_description_prefix) + 1:] - index += 1 - - return description[len(heading):] - else: - return None - - pipeline_spec_dict, platform_spec_dict = _load_documents_from_yaml( - component_yaml) - - is_v1 = 'implementation' in set(pipeline_spec_dict.keys()) - if is_v1: - v1_component = load_v1_component_spec_from_component_text( - component_yaml) - return structures.ComponentSpec.from_v1_component_spec(v1_component) - else: - component_spec = structures.ComponentSpec.from_ir_dicts( - pipeline_spec_dict, platform_spec_dict) - if not component_spec.description: - component_spec.description = extract_description( - component_yaml=component_yaml) - return component_spec - - -def load_v1_component_spec_from_component_text( - text) -> v1_structures.ComponentSpec: - component_dict = yaml.safe_load(text) - component_spec = v1_structures.ComponentSpec.from_dict(component_dict) - - if isinstance(component_spec.implementation, - v1_structures.ContainerImplementation) and ( - component_spec.implementation.container.command is None): - warnings.warn( - 'Container component must specify command to be compatible with KFP ' - 'v2 compatible mode and emissary executor, which will be the default' - ' executor for KFP v2.' - 'https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/', - category=FutureWarning, - ) - - # Calculating hash digest for the component - data = text if isinstance(text, bytes) else text.encode('utf-8') - data = data.replace(b'\r\n', b'\n') # Normalizing line endings - digest = hashlib.sha256(data).hexdigest() - component_spec._digest = digest - - return component_spec diff --git a/sdk/python/kfp/components/load_yaml_utilities_test.py b/sdk/python/kfp/components/load_yaml_utilities_test.py index dff93c2257..55ba29cf57 100644 --- a/sdk/python/kfp/components/load_yaml_utilities_test.py +++ b/sdk/python/kfp/components/load_yaml_utilities_test.py @@ -19,7 +19,6 @@ import unittest from kfp import components -from kfp.components import load_yaml_utilities from kfp.dsl import structures SAMPLE_YAML = textwrap.dedent("""\ @@ -125,47 +124,5 @@ def test_load_component_from_url(self): 'python:3.7') -class TestLoadDocumentsFromYAML(unittest.TestCase): - - def test_no_documents(self): - with self.assertRaisesRegex( - ValueError, - r'Expected one or two YAML documents in the IR YAML file\. Got\: 0\.' - ): - load_yaml_utilities._load_documents_from_yaml('') - - def test_one_document(self): - doc1, doc2 = load_yaml_utilities._load_documents_from_yaml( - textwrap.dedent("""\ - key1: value1 - """)) - self.assertEqual(doc1, {'key1': 'value1'}) - self.assertEqual(doc2, {}) - - def test_two_documents(self): - doc1, doc2 = load_yaml_utilities._load_documents_from_yaml( - textwrap.dedent("""\ - key1: value1 - --- - key2: value2 - """)) - self.assertEqual(doc1, {'key1': 'value1'}) - self.assertEqual(doc2, {'key2': 'value2'}) - - def test_three_documents(self): - with self.assertRaisesRegex( - ValueError, - r'Expected one or two YAML documents in the IR YAML file\. Got\: 3\.' - ): - load_yaml_utilities._load_documents_from_yaml( - textwrap.dedent("""\ - key3: value3 - --- - key3: value3 - --- - key3: value3 - """)) - - if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py new file mode 100644 index 0000000000..d3502a7287 --- /dev/null +++ b/sdk/python/kfp/dsl/__init__.py @@ -0,0 +1,249 @@ +"""The `kfp.dsl` module contains domain-specific language objects used to +compose pipelines.""" +# Copyright 2020 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + 'component', + 'container_component', + 'pipeline', + 'importer', + 'ContainerSpec', + 'Condition', + 'ExitHandler', + 'ParallelFor', + 'Collected', + 'Input', + 'Output', + 'InputPath', + 'OutputPath', + 'IfPresentPlaceholder', + 'ConcatPlaceholder', + 'PipelineTaskFinalStatus', + 'PIPELINE_JOB_NAME_PLACEHOLDER', + 'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER', + 'PIPELINE_JOB_ID_PLACEHOLDER', + 'PIPELINE_TASK_NAME_PLACEHOLDER', + 'PIPELINE_TASK_ID_PLACEHOLDER', + 'PIPELINE_ROOT_PLACEHOLDER', + 'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER', + 'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER', + 'Artifact', + 'ClassificationMetrics', + 'Dataset', + 'HTML', + 'Markdown', + 'Metrics', + 'Model', + 'SlicedClassificationMetrics', + 'PipelineTask', +] + +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated + +from typing import TypeVar + +from kfp.dsl.component_decorator import component +from kfp.dsl.container_component_decorator import container_component +from kfp.dsl.for_loop import Collected +from kfp.dsl.importer_node import importer +from kfp.dsl.pipeline_context import pipeline +from kfp.dsl.pipeline_task import PipelineTask +from kfp.dsl.placeholders import ConcatPlaceholder +from kfp.dsl.placeholders import IfPresentPlaceholder +from kfp.dsl.structures import ContainerSpec +from kfp.dsl.task_final_status import PipelineTaskFinalStatus +from kfp.dsl.tasks_group import Condition +from kfp.dsl.tasks_group import ExitHandler +from kfp.dsl.tasks_group import ParallelFor +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import ClassificationMetrics +from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.artifact_types import HTML +from kfp.dsl.types.artifact_types import Markdown +from kfp.dsl.types.artifact_types import Metrics +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.artifact_types import SlicedClassificationMetrics +from kfp.dsl.types.type_annotations import InputAnnotation +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputAnnotation +from kfp.dsl.types.type_annotations import OutputPath + +# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py + +PIPELINE_JOB_NAME_PLACEHOLDER = '{{$.pipeline_job_name}}' +"""A placeholder used to obtain a pipeline job name within a task at pipeline runtime. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Job name:', + value=dsl.PIPELINE_JOB_NAME_PLACEHOLDER, + ) +""" + +PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER = '{{$.pipeline_job_resource_name}}' +"""A placeholder used to obtain a pipeline job resource name within a task at pipeline runtime. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Job resource name:', + value=dsl.PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER, + ) +""" + +PIPELINE_JOB_ID_PLACEHOLDER = '{{$.pipeline_job_uuid}}' +"""A placeholder used to obtain a pipeline job ID within a task at pipeline runtime. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Job ID:', + value=dsl.PIPELINE_JOB_ID_PLACEHOLDER, + ) +""" + +PIPELINE_TASK_NAME_PLACEHOLDER = '{{$.pipeline_task_name}}' +"""A placeholder used to obtain a task name within a task at pipeline runtime. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Task name:', + value=dsl.PIPELINE_TASK_NAME_PLACEHOLDER, + ) +""" + +PIPELINE_TASK_ID_PLACEHOLDER = '{{$.pipeline_task_uuid}}' +"""A placeholder used to obtain a task ID within a task at pipeline runtime. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Task ID:', + value=dsl.PIPELINE_TASK_ID_PLACEHOLDER, + ) +""" + +PIPELINE_ROOT_PLACEHOLDER = '{{$.pipeline_root}}' +"""A placeholder used to obtain the pipeline root. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + store_model( + tmp_dir=dsl.PIPELINE_ROOT_PLACEHOLDER+'/tmp', + ) +""" + +PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER = '{{$.pipeline_job_create_time_utc}}' +"""A placeholder used to obtain the time that a pipeline job was created. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Job created at:', + value=dsl.PIPELINE_JOB_CREATE_TIME_UTC, + ) +""" +PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER = '{{$.pipeline_job_schedule_time_utc}}' +"""A placeholder used to obtain the time for which a pipeline job is scheduled. + + Example: + :: + + @dsl.pipeline + def my_pipeline(): + print_op( + msg='Job scheduled at:', + value=dsl.PIPELINE_JOB_SCHEDULE_TIME_UTC, + ) +""" + +T = TypeVar('T') +Input = Annotated[T, InputAnnotation] +"""Type generic used to represent an input artifact of type ``T``, where ``T`` is an artifact class. + +Use ``Input[Artifact]`` or ``Output[Artifact]`` to indicate whether the enclosed artifact is a component input or output. + +Args: + T: The type of the input artifact. + +Example: + :: + + @dsl.component + def artifact_producer(model: Output[Artifact]): + with open(model.path, 'w') as f: + f.write('my model') + + @dsl.component + def artifact_consumer(model: Input[Artifact]): + print(model) + + @dsl.pipeline + def my_pipeline(): + producer_task = artifact_producer() + artifact_consumer(model=producer_task.output) +""" + +Output = Annotated[T, OutputAnnotation] +"""A type generic used to represent an output artifact of type ``T``, where ``T`` is an artifact class. The argument typed with this annotation is provided at runtime by the executing backend and does not need to be passed as an input by the pipeline author (see example). + +Use ``Input[Artifact]`` or ``Output[Artifact]`` to indicate whether the enclosed artifact is a component input or output. + +Args: + T: The type of the output artifact. + +Example: + :: + + @dsl.component + def artifact_producer(model: Output[Artifact]): + with open(model.path, 'w') as f: + f.write('my model') + + @dsl.component + def artifact_consumer(model: Input[Artifact]): + print(model) + + @dsl.pipeline + def my_pipeline(): + producer_task = artifact_producer() + artifact_consumer(model=producer_task.output) +""" diff --git a/sdk/python/kfp/dsl/base_component.py b/sdk/python/kfp/dsl/base_component.py new file mode 100644 index 0000000000..25a10f84df --- /dev/null +++ b/sdk/python/kfp/dsl/base_component.py @@ -0,0 +1,149 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Base class for KFP components.""" + +import abc +from typing import List + +from kfp.dsl import pipeline_task +from kfp.dsl import structures +from kfp.dsl.types import type_utils +from kfp.pipeline_spec import pipeline_spec_pb2 + + +class BaseComponent(abc.ABC): + """Base class for a component. + + **Note:** ``BaseComponent`` is not intended to be used to construct components directly. Use ``@kfp.dsl.component`` or ``kfp.components.load_component_from_*()`` instead. + + Attributes: + name: Name of the component. + component_spec: Component definition. + """ + + def __init__(self, component_spec: structures.ComponentSpec): + """Init function for BaseComponent. + + Args: + component_spec: The component definition. + """ + self.component_spec = component_spec + self.name = component_spec.name + self.description = component_spec.description or None + + # Arguments typed as PipelineTaskFinalStatus are special arguments that + # do not count as user inputs. Instead, they are reserved to for the + # (backend) system to pass a value. + self._component_inputs = { + input_name for input_name, input_spec in ( + self.component_spec.inputs or {}).items() + if not type_utils.is_task_final_status_type(input_spec.type) + } + + def _prevent_using_output_lists_of_artifacts(self): + """This method should be called at the end of __init__ for + PythonComponent and ContainerComponent subclasses to temporarily block + outputting lists of artifacts from a component.""" + # TODO: remove when output lists of artifacts from primitive components is supported + for output_name, output_spec in (self.component_spec.outputs or + {}).items(): + if output_spec.is_artifact_list: + raise ValueError( + f'Output lists of artifacts are only supported for pipelines. Got output list of artifacts for output parameter {output_name!r} of component {self.name!r}.' + ) + + def __call__(self, *args, **kwargs) -> pipeline_task.PipelineTask: + """Creates a PipelineTask object. + + The arguments are generated on the fly based on component input + definitions. + """ + task_inputs = {} + + if args: + raise TypeError( + 'Components must be instantiated using keyword arguments. Positional ' + f'parameters are not allowed (found {len(args)} such parameters for ' + f'component "{self.name}").') + + for k, v in kwargs.items(): + if k not in self._component_inputs: + raise TypeError( + f'{self.name}() got an unexpected keyword argument "{k}".') + task_inputs[k] = v + + # Skip optional inputs and arguments typed as PipelineTaskFinalStatus. + missing_arguments = [ + arg for arg in self.required_inputs if arg not in kwargs + ] + if missing_arguments: + argument_or_arguments = 'argument' if len( + missing_arguments) == 1 else 'arguments' + arguments = ', '.join( + arg_name.replace('-', '_') for arg_name in missing_arguments) + + raise TypeError( + f'{self.name}() missing {len(missing_arguments)} required ' + f'{argument_or_arguments}: {arguments}.') + + return pipeline_task.PipelineTask( + component_spec=self.component_spec, + args=task_inputs, + ) + + @property + def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Returns the pipeline spec of the component.""" + with BlockPipelineTaskRegistration(): + return self.component_spec.to_pipeline_spec() + + @property + def platform_spec(self) -> pipeline_spec_pb2.PlatformSpec: + """Returns the PlatformSpec of the component. + + Useful when the component is a GraphComponent, else will be + empty per component_spec.platform_spec default. + """ + return self.component_spec.platform_spec + + @abc.abstractmethod + def execute(self, **kwargs): + """Executes the component locally if implemented by the inheriting + subclass.""" + + @property + def required_inputs(self) -> List[str]: + return [ + input_name for input_name, input_spec in ( + self.component_spec.inputs or {}).items() + if not input_spec.optional + ] + + +class BlockPipelineTaskRegistration: + """Temporarily stop registering tasks to the default pipeline. + + Handles special, uncommon functions that decorate and mutate a + component, possibly by using the component's .pipeline_spec + attribute. This is exhibited in the version of + google_cloud_pipeline_components compatible with KFP SDK v2. + """ + + # TODO: this handles the special case of a compiled component (when compiled inside a pipeline), which should not have any concept of a default pipeline. Perhaps there is a way to unify component/pipeline compilation concepts to remove this workaround? + + def __enter__(self): + self.task_handler, pipeline_task.PipelineTask._register_task_handler = pipeline_task.PipelineTask._register_task_handler, pipeline_task._register_task_handler + + def __exit__(self, *args): + pipeline_task.PipelineTask._register_task_handler = self.task_handler diff --git a/sdk/python/kfp/dsl-test/base_component_test.py b/sdk/python/kfp/dsl/base_component_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/base_component_test.py rename to sdk/python/kfp/dsl/base_component_test.py diff --git a/sdk/python/kfp/dsl/component_decorator.py b/sdk/python/kfp/dsl/component_decorator.py new file mode 100644 index 0000000000..7c6589589d --- /dev/null +++ b/sdk/python/kfp/dsl/component_decorator.py @@ -0,0 +1,127 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from typing import Callable, List, Optional +import warnings + +from kfp.dsl import component_factory + + +def component(func: Optional[Callable] = None, + *, + base_image: Optional[str] = None, + target_image: Optional[str] = None, + packages_to_install: List[str] = None, + pip_index_urls: Optional[List[str]] = None, + output_component_file: Optional[str] = None, + install_kfp_package: bool = True, + kfp_package_path: Optional[str] = None): + """Decorator for Python-function based components. + + A KFP component can either be a lightweight component or a containerized + component. + + If ``target_image`` is not specified, this function creates a lightweight + component. A lightweight component is a self-contained Python function that + includes all necessary imports and dependencies. In lightweight components, + ``packages_to_install`` will be used to install dependencies at runtime. The + parameters ``install_kfp_package`` and ``kfp_package_path`` can be used to control + how and from where KFP should be installed when the lightweight component is executed. + + If ``target_image`` is specified, this function creates a component definition + based around the ``target_image``. The assumption is that the function in ``func`` + will be packaged by KFP into this ``target_image``. You can use the KFP CLI's ``build`` + command to package the function into ``target_image``. + + Args: + func: Python function from which to create a component. The function + should have type annotations for all its arguments, indicating how + each argument is intended to be used (e.g. as an input/output artifact, + a plain parameter, or a path to a file). + base_image: Image to use when executing the Python function. It should + contain a default Python interpreter that is compatible with KFP. + target_image: Image to when creating containerized components. + packages_to_install: List of packages to install before + executing the Python function. These will always be installed at component runtime. + pip_index_urls: Python Package Index base URLs from which to + install ``packages_to_install``. Defaults to installing from only PyPI + (``'https://pypi.org/simple'``). For more information, see `pip install docs `_. + output_component_file: If specified, this function will write a + shareable/loadable version of the component spec into this file. + + **Warning:** This compilation approach is deprecated. + install_kfp_package: Specifies if the KFP SDK should add the ``kfp`` Python package to + ``packages_to_install``. Lightweight Python functions always require + an installation of KFP in ``base_image`` to work. If you specify + a ``base_image`` that already contains KFP, you can set this to ``False``. + This flag is ignored when ``target_image`` is specified, which implies + a choice to build a containerized component. Containerized components + will always install KFP as part of the build process. + kfp_package_path: Specifies the location from which to install KFP. By + default, this will try to install from PyPI using the same version + as that used when this component was created. Component authors can + choose to override this to point to a GitHub pull request or + other pip-compatible package server. + + Returns: + A component task factory that can be used in pipeline definitions. + + Example: + :: + + from kfp import dsl + + @dsl.component + def my_function_one(input: str, output: Output[Model]): + ... + + @dsl.component( + base_image='python:3.9', + output_component_file='my_function.yaml' + ) + def my_function_two(input: Input[Mode])): + ... + + @dsl.pipeline(name='my-pipeline', pipeline_root='...') + def pipeline(): + my_function_one_task = my_function_one(input=...) + my_function_two_task = my_function_two(input=my_function_one_task.outputs) + """ + if output_component_file is not None: + warnings.warn( + 'output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.', + DeprecationWarning, + stacklevel=2) + + if func is None: + return functools.partial( + component, + base_image=base_image, + target_image=target_image, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + output_component_file=output_component_file, + install_kfp_package=install_kfp_package, + kfp_package_path=kfp_package_path) + + return component_factory.create_component_from_func( + func, + base_image=base_image, + target_image=target_image, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + output_component_file=output_component_file, + install_kfp_package=install_kfp_package, + kfp_package_path=kfp_package_path) diff --git a/sdk/python/kfp/dsl-test/component_decorator_test.py b/sdk/python/kfp/dsl/component_decorator_test.py similarity index 97% rename from sdk/python/kfp/dsl-test/component_decorator_test.py rename to sdk/python/kfp/dsl/component_decorator_test.py index 1358fa691d..4b51de638f 100644 --- a/sdk/python/kfp/dsl-test/component_decorator_test.py +++ b/sdk/python/kfp/dsl/component_decorator_test.py @@ -17,8 +17,8 @@ from typing import Dict, List, NamedTuple import unittest -from kfp.components import load_yaml_utilities from kfp.dsl import python_component +from kfp.dsl import structures from kfp.dsl.component_decorator import component @@ -104,8 +104,7 @@ def comp(text: str) -> str: with open(filepath, 'r') as f: yaml_text = f.read() - component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( - yaml_text) + component_spec = structures.ComponentSpec.from_yaml_documents(yaml_text) self.assertEqual(component_spec.name, comp.component_spec.name) def test_output_named_tuple_with_dict(self): diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py new file mode 100644 index 0000000000..99d34f7828 --- /dev/null +++ b/sdk/python/kfp/dsl/component_factory.py @@ -0,0 +1,639 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import dataclasses +import inspect +import itertools +import pathlib +import re +import textwrap +from typing import Callable, List, Mapping, Optional, Tuple, Type, Union +import warnings + +import docstring_parser +from kfp.dsl import container_component_artifact_channel +from kfp.dsl import container_component_class +from kfp.dsl import graph_component +from kfp.dsl import placeholders +from kfp.dsl import python_component +from kfp.dsl import structures +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import custom_artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils + +_DEFAULT_BASE_IMAGE = 'python:3.7' + + +@dataclasses.dataclass +class ComponentInfo(): + """A dataclass capturing registered components. + + This will likely be subsumed/augmented with BaseComponent. + """ + name: str + function_name: str + func: Callable + target_image: str + module_path: pathlib.Path + component_spec: structures.ComponentSpec + output_component_file: Optional[str] = None + base_image: str = _DEFAULT_BASE_IMAGE + packages_to_install: Optional[List[str]] = None + pip_index_urls: Optional[List[str]] = None + + +# A map from function_name to components. This is always populated when a +# module containing KFP components is loaded. Primarily used by KFP CLI +# component builder to package components in a file into containers. +REGISTERED_MODULES = None + + +def _python_function_name_to_component_name(name): + name_with_spaces = re.sub(' +', ' ', name.replace('_', ' ')).strip(' ') + return name_with_spaces[0].upper() + name_with_spaces[1:] + + +def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str: + """Generates index url options for pip install command based on provided + pip_index_urls. + + Args: + pip_index_urls: Optional list of pip index urls + + Returns: + - Empty string if pip_index_urls is empty/None. + - '--index-url url --trusted-host url ' if pip_index_urls contains 1 + url + - the above followed by '--extra-index-url url --trusted-host url ' + for + each next url in pip_index_urls if pip_index_urls contains more than 1 + url + + Note: In case pip_index_urls is not empty, the returned string will + contain space at the end. + """ + if not pip_index_urls: + return '' + + index_url = pip_index_urls[0] + extra_index_urls = pip_index_urls[1:] + + options = [f'--index-url {index_url} --trusted-host {index_url}'] + options.extend( + f'--extra-index-url {extra_index_url} --trusted-host {extra_index_url}' + for extra_index_url in extra_index_urls) + + return ' '.join(options) + ' ' + + +_install_python_packages_script_template = ''' +if ! [ -x "$(command -v pip)" ]; then + python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip +fi + +PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \ + --no-warn-script-location {index_url_options}{concat_package_list} && "$0" "$@" +''' + + +def _get_packages_to_install_command( + package_list: Optional[List[str]] = None, + pip_index_urls: Optional[List[str]] = None) -> List[str]: + + if not package_list: + return [] + + concat_package_list = ' '.join( + [repr(str(package)) for package in package_list]) + index_url_options = make_index_url_options(pip_index_urls) + install_python_packages_script = _install_python_packages_script_template.format( + index_url_options=index_url_options, + concat_package_list=concat_package_list) + return ['sh', '-c', install_python_packages_script] + + +def _get_default_kfp_package_path() -> str: + import kfp + return f'kfp=={kfp.__version__}' + + +def _get_function_source_definition(func: Callable) -> str: + func_code = inspect.getsource(func) + + # Function might be defined in some indented scope (e.g. in another + # function). We need to handle this and properly dedent the function source + # code + func_code = textwrap.dedent(func_code) + func_code_lines = func_code.split('\n') + + # Removing possible decorators (can be multiline) until the function + # definition is found + func_code_lines = itertools.dropwhile(lambda x: not x.startswith('def'), + func_code_lines) + + if not func_code_lines: + raise ValueError( + f'Failed to dedent and clean up the source of function "{func.__name__}". It is probably not properly indented.' + ) + + return '\n'.join(func_code_lines) + + +def _maybe_make_unique(name: str, names: List[str]): + if name not in names: + return name + + for i in range(2, 100): + unique_name = f'{name}_{i}' + if unique_name not in names: + return unique_name + + raise RuntimeError(f'Too many arguments with the name {name}') + + +def extract_component_interface( + func: Callable, + containerized: bool = False, + description: Optional[str] = None, + name: Optional[str] = None, +) -> structures.ComponentSpec: + single_output_name_const = 'Output' + + signature = inspect.signature(func) + parameters = list(signature.parameters.values()) + + original_docstring = inspect.getdoc(func) + parsed_docstring = docstring_parser.parse(original_docstring) + + inputs = {} + outputs = {} + + input_names = set() + output_names = set() + for parameter in parameters: + parameter_type = type_annotations.maybe_strip_optional_from_annotation( + parameter.annotation) + passing_style = None + io_name = parameter.name + is_artifact_list = False + + if type_annotations.is_Input_Output_artifact_annotation(parameter_type): + # passing_style is either type_annotations.InputAnnotation or + # type_annotations.OutputAnnotation. + passing_style = type_annotations.get_io_artifact_annotation( + parameter_type) + + # parameter_type is a type like typing_extensions.Annotated[kfp.dsl.types.artifact_types.Artifact, ] OR typing_extensions.Annotated[typing.List[kfp.dsl.types.artifact_types.Artifact], ] + + is_artifact_list = type_annotations.is_list_of_artifacts( + parameter_type.__origin__) + + parameter_type = type_annotations.get_io_artifact_class( + parameter_type) + if not type_annotations.is_artifact_class(parameter_type): + raise ValueError( + f'Input[T] and Output[T] are only supported when T is an artifact or list of artifacts. Found `{io_name} with type {parameter_type}`' + ) + + if parameter.default is not inspect.Parameter.empty: + if passing_style in [ + type_annotations.OutputAnnotation, + type_annotations.OutputPath, + ]: + raise ValueError( + 'Default values for Output artifacts are not supported.' + ) + elif parameter.default is not None: + raise ValueError( + f'Optional Input artifacts may only have default value None. Got: {parameter.default}.' + ) + + elif isinstance( + parameter_type, + (type_annotations.InputPath, type_annotations.OutputPath)): + passing_style = type(parameter_type) + parameter_type = parameter_type.type + if parameter.default is not inspect.Parameter.empty and not ( + passing_style == type_annotations.InputPath and + parameter.default is None): + raise ValueError( + 'Path inputs only support default values of None. Default' + ' values for outputs are not supported.') + + type_struct = type_utils._annotation_to_type_struct(parameter_type) + if type_struct is None: + raise TypeError( + f'Missing type annotation for argument: {parameter.name}') + + if passing_style in [ + type_annotations.OutputAnnotation, type_annotations.OutputPath + ]: + if io_name == single_output_name_const: + raise ValueError( + f'"{single_output_name_const}" is an invalid parameter name.' + ) + io_name = _maybe_make_unique(io_name, output_names) + output_names.add(io_name) + if type_annotations.is_artifact_class(parameter_type): + schema_version = parameter_type.schema_version + output_spec = structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + type_struct, schema_version), + is_artifact_list=is_artifact_list) + else: + output_spec = structures.OutputSpec(type=type_struct) + outputs[io_name] = output_spec + else: + io_name = _maybe_make_unique(io_name, input_names) + input_names.add(io_name) + type_ = type_utils.create_bundled_artifact_type( + type_struct, parameter_type.schema_version + ) if type_annotations.is_artifact_class( + parameter_type) else type_struct + default = None if parameter.default == inspect.Parameter.empty or type_annotations.is_artifact_class( + parameter_type) else parameter.default + optional = parameter.default is not inspect.Parameter.empty or type_utils.is_task_final_status_type( + type_struct) + input_spec = structures.InputSpec( + type=type_, + default=default, + optional=optional, + is_artifact_list=is_artifact_list, + ) + + inputs[io_name] = input_spec + + #Analyzing the return type annotations. + return_ann = signature.return_annotation + if not containerized: + if hasattr(return_ann, '_fields'): #NamedTuple + # Getting field type annotations. + # __annotations__ does not exist in python 3.5 and earlier + # _field_types does not exist in python 3.9 and later + field_annotations = getattr(return_ann, '__annotations__', + None) or getattr( + return_ann, '_field_types', None) + for field_name in return_ann._fields: + output_name = _maybe_make_unique(field_name, output_names) + output_names.add(output_name) + type_var = field_annotations.get(field_name) + if type_annotations.is_list_of_artifacts(type_var): + artifact_cls = type_var.__args__[0] + output_spec = structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + artifact_cls.schema_title, + artifact_cls.schema_version), + is_artifact_list=True) + elif type_annotations.is_artifact_class(type_var): + output_spec = structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + type_var.schema_title, type_var.schema_version)) + else: + type_struct = type_utils._annotation_to_type_struct( + type_var) + output_spec = structures.OutputSpec(type=type_struct) + outputs[output_name] = output_spec + # Deprecated dict-based way of declaring multiple outputs. Was only used by + # the @component decorator + elif isinstance(return_ann, dict): + warnings.warn( + 'The ability to specify multiple outputs using the dict syntax' + ' has been deprecated. It will be removed soon after release' + ' 0.1.32. Please use typing.NamedTuple to declare multiple' + ' outputs.') + for output_name, output_type_annotation in return_ann.items(): + output_type_struct = type_utils._annotation_to_type_struct( + output_type_annotation) + output_spec = structures.OutputSpec(type=output_type_struct) + outputs[name] = output_spec + elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty: + output_name = _maybe_make_unique(single_output_name_const, + output_names) + # Fixes exotic, but possible collision: + # `def func(output_path: OutputPath()) -> str: ...` + output_names.add(output_name) + return_ann = signature.return_annotation + if type_annotations.is_list_of_artifacts(return_ann): + artifact_cls = return_ann.__args__[0] + output_spec = structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + artifact_cls.schema_title, artifact_cls.schema_version), + is_artifact_list=True) + elif type_annotations.is_artifact_class(return_ann): + output_spec = structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + return_ann.schema_title, return_ann.schema_version), + is_artifact_list=False) + else: + type_struct = type_utils._annotation_to_type_struct(return_ann) + output_spec = structures.OutputSpec(type=type_struct) + + outputs[output_name] = output_spec + elif return_ann != inspect.Parameter.empty and return_ann != structures.ContainerSpec: + raise TypeError( + 'Return annotation should be either ContainerSpec or omitted for container components.' + ) + + component_name = name or _python_function_name_to_component_name( + func.__name__) + + def assign_descriptions( + inputs_or_outputs: Mapping[str, Union[structures.InputSpec, + structures.OutputSpec]], + docstring_params: List[docstring_parser.DocstringParam], + ) -> None: + """Assigns descriptions to InputSpec or OutputSpec for each component + input/output found in the parsed docstring parameters.""" + docstring_inputs = {param.arg_name: param for param in docstring_params} + for name, spec in inputs_or_outputs.items(): + if name in docstring_inputs: + spec.description = docstring_inputs[name].description + + def parse_docstring_with_return_as_args( + docstring: Union[str, + None]) -> Union[docstring_parser.Docstring, None]: + """Modifies docstring so that a return section can be treated as an + args section, then parses the docstring.""" + if docstring is None: + return None + + # Returns and Return are the only two keywords docstring_parser uses for returns + # use newline to avoid replacements that aren't in the return section header + return_keywords = ['Returns:\n', 'Returns\n', 'Return:\n', 'Return\n'] + for keyword in return_keywords: + if keyword in docstring: + modified_docstring = docstring.replace(keyword.strip(), 'Args:') + return docstring_parser.parse(modified_docstring) + + return None + + assign_descriptions(inputs, parsed_docstring.params) + + modified_parsed_docstring = parse_docstring_with_return_as_args( + original_docstring) + if modified_parsed_docstring is not None: + assign_descriptions(outputs, modified_parsed_docstring.params) + + description = get_pipeline_description( + decorator_description=description, + docstring=parsed_docstring, + ) + + return structures.ComponentSpec( + name=component_name, + description=description, + inputs=inputs or None, + outputs=outputs or None, + implementation=structures.Implementation(), + ) + + +def _get_command_and_args_for_lightweight_component( + func: Callable) -> Tuple[List[str], List[str]]: + imports_source = [ + 'import kfp', + 'from kfp import dsl', + 'from kfp.dsl import *', + 'from typing import *', + ] + custom_artifact_types.get_custom_artifact_type_import_statements(func) + + func_source = _get_function_source_definition(func) + source = textwrap.dedent(''' + {imports_source} + + {func_source}\n''').format( + imports_source='\n'.join(imports_source), func_source=func_source) + command = [ + 'sh', + '-ec', + textwrap.dedent('''\ + program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" + python3 -m kfp.dsl.executor_main \ + --component_module_path \ + "$program_path/ephemeral_component.py" \ + "$@" + '''), + source, + ] + + args = [ + '--executor_input', + placeholders.ExecutorInputPlaceholder(), + '--function_to_execute', + func.__name__, + ] + + return command, args + + +def _get_command_and_args_for_containerized_component( + function_name: str) -> Tuple[List[str], List[str]]: + command = [ + 'python3', + '-m', + 'kfp.dsl.executor_main', + ] + + args = [ + '--executor_input', + placeholders.ExecutorInputPlaceholder()._to_string(), + '--function_to_execute', + function_name, + ] + return command, args + + +def create_component_from_func( + func: Callable, + base_image: Optional[str] = None, + target_image: Optional[str] = None, + packages_to_install: List[str] = None, + pip_index_urls: Optional[List[str]] = None, + output_component_file: Optional[str] = None, + install_kfp_package: bool = True, + kfp_package_path: Optional[str] = None, +) -> python_component.PythonComponent: + """Implementation for the @component decorator. + + The decorator is defined under component_decorator.py. See the + decorator for the canonical documentation for this function. + """ + packages_to_install = packages_to_install or [] + + if install_kfp_package and target_image is None: + if kfp_package_path is None: + kfp_package_path = _get_default_kfp_package_path() + packages_to_install.append(kfp_package_path) + + packages_to_install_command = _get_packages_to_install_command( + package_list=packages_to_install, pip_index_urls=pip_index_urls) + + command = [] + args = [] + if base_image is None: + base_image = _DEFAULT_BASE_IMAGE + + component_image = base_image + + if target_image: + component_image = target_image + command, args = _get_command_and_args_for_containerized_component( + function_name=func.__name__,) + else: + command, args = _get_command_and_args_for_lightweight_component( + func=func) + + component_spec = extract_component_interface(func) + component_spec.implementation = structures.Implementation( + container=structures.ContainerSpecImplementation( + image=component_image, + command=packages_to_install_command + command, + args=args, + )) + + module_path = pathlib.Path(inspect.getsourcefile(func)) + module_path.resolve() + + component_name = _python_function_name_to_component_name(func.__name__) + component_info = ComponentInfo( + name=component_name, + function_name=func.__name__, + func=func, + target_image=target_image, + module_path=module_path, + component_spec=component_spec, + output_component_file=output_component_file, + base_image=base_image, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls) + + if REGISTERED_MODULES is not None: + REGISTERED_MODULES[component_name] = component_info + + if output_component_file: + component_spec.save_to_component_yaml(output_component_file) + + return python_component.PythonComponent( + component_spec=component_spec, python_func=func) + + +def make_input_for_parameterized_container_component_function( + name: str, annotation: Union[Type[List[artifact_types.Artifact]], + Type[artifact_types.Artifact]] +) -> Union[placeholders.Placeholder, container_component_artifact_channel + .ContainerComponentArtifactChannel]: + if type_annotations.is_input_artifact(annotation): + + if type_annotations.is_list_of_artifacts(annotation.__origin__): + return placeholders.InputListOfArtifactsPlaceholder(name) + else: + return container_component_artifact_channel.ContainerComponentArtifactChannel( + io_type='input', var_name=name) + + elif type_annotations.is_output_artifact(annotation): + + if type_annotations.is_list_of_artifacts(annotation.__origin__): + return placeholders.OutputListOfArtifactsPlaceholder(name) + else: + return container_component_artifact_channel.ContainerComponentArtifactChannel( + io_type='output', var_name=name) + + elif isinstance( + annotation, + (type_annotations.OutputAnnotation, type_annotations.OutputPath)): + return placeholders.OutputParameterPlaceholder(name) + + else: + placeholder = placeholders.InputValuePlaceholder(name) + # small hack to encode the runtime value's type for a custom json.dumps function + if (annotation == task_final_status.PipelineTaskFinalStatus or + type_utils.is_task_final_status_type(annotation)): + placeholder._ir_type = 'STRUCT' + else: + placeholder._ir_type = type_utils.get_parameter_type_name( + annotation) + return placeholder + + +def create_container_component_from_func( + func: Callable) -> container_component_class.ContainerComponent: + """Implementation for the @container_component decorator. + + The decorator is defined under container_component_decorator.py. See + the decorator for the canonical documentation for this function. + """ + + component_spec = extract_component_interface(func, containerized=True) + signature = inspect.signature(func) + parameters = list(signature.parameters.values()) + arg_list = [] + for parameter in parameters: + parameter_type = type_annotations.maybe_strip_optional_from_annotation( + parameter.annotation) + arg_list.append( + make_input_for_parameterized_container_component_function( + parameter.name, parameter_type)) + + container_spec = func(*arg_list) + container_spec_implementation = structures.ContainerSpecImplementation.from_container_spec( + container_spec) + component_spec.implementation = structures.Implementation( + container_spec_implementation) + component_spec._validate_placeholders() + return container_component_class.ContainerComponent(component_spec, func) + + +def create_graph_component_from_func( + func: Callable, + name: Optional[str] = None, + description: Optional[str] = None, + display_name: Optional[str] = None, +) -> graph_component.GraphComponent: + """Implementation for the @pipeline decorator. + + The decorator is defined under pipeline_context.py. See the + decorator for the canonical documentation for this function. + """ + + component_spec = extract_component_interface( + func, + description=description, + name=name, + ) + return graph_component.GraphComponent( + component_spec=component_spec, + pipeline_func=func, + display_name=display_name, + ) + + +def get_pipeline_description( + decorator_description: Union[str, None], + docstring: docstring_parser.Docstring, +) -> Union[str, None]: + """Obtains the correct pipeline description from the pipeline decorator's + description argument and the parsed docstring. + + Gives precedence to the decorator argument. + """ + if decorator_description: + return decorator_description + + short_description = docstring.short_description + long_description = docstring.long_description + docstring_description = short_description + '\n' + long_description if ( + short_description and long_description) else short_description + return docstring_description.strip() if docstring_description else None diff --git a/sdk/python/kfp/dsl-test/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/component_factory_test.py rename to sdk/python/kfp/dsl/component_factory_test.py diff --git a/sdk/python/kfp/dsl/constants.py b/sdk/python/kfp/dsl/constants.py new file mode 100644 index 0000000000..44b7a16fbb --- /dev/null +++ b/sdk/python/kfp/dsl/constants.py @@ -0,0 +1,29 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Constants.""" + +# Unit constants for k8s size string. +_E = 10**18 # Exa +_EI = 1 << 60 # Exa: power-of-two approximate +_P = 10**15 # Peta +_PI = 1 << 50 # Peta: power-of-two approximate +# noinspection PyShadowingBuiltins +_T = 10**12 # Tera +_TI = 1 << 40 # Tera: power-of-two approximate +_G = 10**9 # Giga +_GI = 1 << 30 # Giga: power-of-two approximate +_M = 10**6 # Mega +_MI = 1 << 20 # Mega: power-of-two approximate +_K = 10**3 # Kilo +_KI = 1 << 10 # Kilo: power-of-two approximate diff --git a/sdk/python/kfp/dsl/container_component_artifact_channel.py b/sdk/python/kfp/dsl/container_component_artifact_channel.py new file mode 100644 index 0000000000..322752295f --- /dev/null +++ b/sdk/python/kfp/dsl/container_component_artifact_channel.py @@ -0,0 +1,46 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + + +class ContainerComponentArtifactChannel: + """A class for passing in placeholders into container_component decorated + function.""" + + def __init__(self, io_type: str, var_name: str): + self._io_type = io_type + self._var_name = var_name + + def __getattr__(self, _name: str) -> Union['placeholders.Placeholder']: + # aviod circular imports + from kfp.dsl import placeholders + + attr_to_placeholder_dict = { + 'uri': { + 'input': placeholders.InputUriPlaceholder, + 'output': placeholders.OutputUriPlaceholder, + }, + 'path': { + 'input': placeholders.InputPathPlaceholder, + 'output': placeholders.OutputPathPlaceholder, + }, + 'metadata': { + 'input': placeholders.InputMetadataPlaceholder, + 'output': placeholders.OutputMetadataPlaceholder + }, + } + if _name not in ['uri', 'path', 'metadata']: + raise AttributeError(f'Cannot access artifact attribute "{_name}".') + return attr_to_placeholder_dict[_name][self._io_type](self._var_name) diff --git a/sdk/python/kfp/dsl-test/container_component_artifact_channel_test.py b/sdk/python/kfp/dsl/container_component_artifact_channel_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/container_component_artifact_channel_test.py rename to sdk/python/kfp/dsl/container_component_artifact_channel_test.py diff --git a/sdk/python/kfp/dsl/container_component_class.py b/sdk/python/kfp/dsl/container_component_class.py new file mode 100644 index 0000000000..7cd928036a --- /dev/null +++ b/sdk/python/kfp/dsl/container_component_class.py @@ -0,0 +1,40 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Container-based component.""" + +from typing import Callable + +from kfp.dsl import base_component +from kfp.dsl import structures + + +class ContainerComponent(base_component.BaseComponent): + """Component defined via pre-built container. + + Attribute: + pipeline_func: The function that becomes the implementation of this component. + """ + + def __init__(self, component_spec: structures.ComponentSpec, + pipeline_func: Callable) -> None: + super().__init__(component_spec=component_spec) + self.pipeline_func = pipeline_func + + self._prevent_using_output_lists_of_artifacts() + + def execute(self, **kwargs): + # ContainerComponent`: Also inherits from `BaseComponent`. + # As its name suggests, this class backs (custom) container components. + # Its `execute()` method uses `docker run` for local component execution + raise NotImplementedError diff --git a/sdk/python/kfp/dsl/container_component_decorator.py b/sdk/python/kfp/dsl/container_component_decorator.py new file mode 100644 index 0000000000..6ce43094ff --- /dev/null +++ b/sdk/python/kfp/dsl/container_component_decorator.py @@ -0,0 +1,53 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable + +from kfp.dsl import component_factory +from kfp.dsl import container_component_class + + +def container_component( + func: Callable) -> container_component_class.ContainerComponent: + """Decorator for container-based components in KFP v2. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + + Example: + :: + + from kfp.dsl import container_component, ContainerSpec, InputPath, OutputPath, Output + + @container_component + def my_component( + dataset_path: InputPath(Dataset), + model: Output[Model], + num_epochs: int, + output_parameter: OutputPath(str), + ): + return ContainerSpec( + image='gcr.io/my-image', + command=['sh', 'my_component.sh'], + args=[ + '--dataset_path', dataset_path, + '--model_path', model.path, + '--output_parameter_path', output_parameter, + ] + ) + """ + return component_factory.create_container_component_from_func(func) diff --git a/sdk/python/kfp/dsl-test/container_component_decorator_test.py b/sdk/python/kfp/dsl/container_component_decorator_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/container_component_decorator_test.py rename to sdk/python/kfp/dsl/container_component_decorator_test.py diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp/dsl/executor.py new file mode 100644 index 0000000000..db8a8a89bd --- /dev/null +++ b/sdk/python/kfp/dsl/executor.py @@ -0,0 +1,368 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect +import json +import os +from typing import Any, Callable, Dict, List, Optional, Union + +from kfp.dsl import python_component +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations + + +class Executor(): + """Executor executes v2-based Python function components.""" + + def __init__(self, executor_input: Dict, + function_to_execute: Union[Callable, + python_component.PythonComponent]): + if hasattr(function_to_execute, 'python_func'): + self._func = function_to_execute.python_func + else: + self._func = function_to_execute + + self._input = executor_input + self._input_artifacts: Dict[str, + Union[artifact_types.Artifact, + List[artifact_types.Artifact]]] = {} + self._output_artifacts: Dict[str, artifact_types.Artifact] = {} + + for name, artifacts in self._input.get('inputs', + {}).get('artifacts', {}).items(): + list_of_artifact_proto_structs = artifacts.get('artifacts') + if list_of_artifact_proto_structs: + annotation = self._func.__annotations__[name] + # InputPath has no attribute __origin__ and also should be handled as a single artifact + if type_annotations.is_Input_Output_artifact_annotation( + annotation) and type_annotations.is_list_of_artifacts( + annotation.__origin__): + self._input_artifacts[name] = [ + self.make_artifact( + msg, + name, + self._func, + ) for msg in list_of_artifact_proto_structs + ] + else: + self._input_artifacts[name] = self.make_artifact( + list_of_artifact_proto_structs[0], + name, + self._func, + ) + + for name, artifacts in self._input.get('outputs', + {}).get('artifacts', {}).items(): + list_of_artifact_proto_structs = artifacts.get('artifacts') + if list_of_artifact_proto_structs: + output_artifact = self.make_artifact( + list_of_artifact_proto_structs[0], + name, + self._func, + ) + self._output_artifacts[name] = output_artifact + self.makedirs_recursively(output_artifact.path) + + self._return_annotation = inspect.signature( + self._func).return_annotation + self._executor_output = {} + + def make_artifact( + self, + runtime_artifact: Dict, + name: str, + func: Callable, + ) -> Any: + annotation = func.__annotations__.get(name) + if isinstance(annotation, type_annotations.InputPath): + schema_title, _ = annotation.type.split('@') + if schema_title in artifact_types._SCHEMA_TITLE_TO_TYPE: + artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE[ + schema_title] + else: + raise TypeError( + f'Invalid type argument to {type_annotations.InputPath.__name__}: {annotation.type}' + ) + else: + artifact_cls = annotation + return create_artifact_instance( + runtime_artifact, artifact_cls=artifact_cls) + + def makedirs_recursively(self, path: str) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + + def _get_input_artifact(self, name: str): + return self._input_artifacts.get(name) + + def _get_output_artifact(self, name: str): + return self._output_artifacts.get(name) + + def _get_input_parameter_value(self, parameter_name: str): + parameter_values = self._input.get('inputs', + {}).get('parameterValues', None) + + if parameter_values is not None: + return parameter_values.get(parameter_name, None) + + return None + + def _get_output_parameter_path(self, parameter_name: str): + parameter = self._input.get('outputs', + {}).get('parameters', + {}).get(parameter_name, None) + if parameter is None: + return None + + import os + path = parameter.get('outputFile', None) + if path: + os.makedirs(os.path.dirname(path), exist_ok=True) + return path + + def _get_output_artifact_path(self, artifact_name: str): + output_artifact = self._output_artifacts.get(artifact_name) + if not output_artifact: + raise ValueError( + f'Failed to get output artifact path for artifact name {artifact_name}' + ) + return output_artifact.path + + def _get_input_artifact_path(self, artifact_name: str): + input_artifact = self._input_artifacts.get(artifact_name) + if not input_artifact: + raise ValueError( + f'Failed to get input artifact path for artifact name {artifact_name}' + ) + return input_artifact.path + + def _write_output_parameter_value(self, name: str, + value: Union[str, int, float, bool, dict, + list, Dict, List]): + if isinstance(value, (float, int)): + output = str(value) + elif isinstance(value, str): + # value is already a string. + output = value + elif isinstance(value, (bool, list, dict)): + output = json.dumps(value) + else: + raise ValueError( + f'Unable to serialize unknown type `{value}` for parameter input with value `{type(value)}`' + ) + + if not self._executor_output.get('parameterValues'): + self._executor_output['parameterValues'] = {} + + self._executor_output['parameterValues'][name] = value + + def _write_output_artifact_payload(self, name: str, value: Any): + path = self._get_output_artifact_path(name) + with open(path, 'w') as f: + f.write(str(value)) + + # TODO: extract to a util + @classmethod + def _get_short_type_name(cls, type_name: str) -> str: + """Extracts the short form type name. + + This method is used for looking up serializer for a given type. + + For example: + typing.List -> List + typing.List[int] -> List + typing.Dict[str, str] -> Dict + List -> List + str -> str + + Args: + type_name: The original type name. + + Returns: + The short form type name or the original name if pattern doesn't match. + """ + import re + match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) + return match.group('type') if match else type_name + + # TODO: merge with type_utils.is_parameter_type + @classmethod + def _is_parameter(cls, annotation: Any) -> bool: + if type(annotation) == type: + return annotation in [str, int, float, bool, dict, list] + + # Annotation could be, for instance `typing.Dict[str, str]`, etc. + return cls._get_short_type_name(str(annotation)) in ['Dict', 'List'] + + @classmethod + def _is_artifact(cls, annotation: Any) -> bool: + if type(annotation) == type: + return type_annotations.is_artifact_class(annotation) + return False + + @classmethod + def _is_named_tuple(cls, annotation: Any) -> bool: + if type(annotation) == type: + return issubclass(annotation, tuple) and hasattr( + annotation, '_fields') and hasattr(annotation, + '__annotations__') + return False + + def _handle_single_return_value(self, output_name: str, + annotation_type: Any, return_value: Any): + if self._is_parameter(annotation_type): + origin_type = getattr(annotation_type, '__origin__', + None) or annotation_type + # relax float-typed return to allow both int and float. + if origin_type == float: + accepted_types = (int, float) + # TODO: relax str-typed return to allow all primitive types? + else: + accepted_types = origin_type + if not isinstance(return_value, accepted_types): + raise ValueError( + f'Function `{self._func.__name__}` returned value of type {type(return_value)}; want type {origin_type}' + ) + self._write_output_parameter_value(output_name, return_value) + elif self._is_artifact(annotation_type): + self._write_output_artifact_payload(output_name, return_value) + else: + raise RuntimeError( + f'Unknown return type: {annotation_type}. Must be one of the supported data types: https://www.kubeflow.org/docs/components/pipelines/v2/data-types/' + ) + + def _write_executor_output(self, func_output: Optional[Any] = None): + if self._output_artifacts: + self._executor_output['artifacts'] = {} + + for name, artifact in self._output_artifacts.items(): + runtime_artifact = { + 'name': artifact.name, + 'uri': artifact.uri, + 'metadata': artifact.metadata, + } + artifacts_list = {'artifacts': [runtime_artifact]} + + self._executor_output['artifacts'][name] = artifacts_list + + if func_output is not None: + if self._is_parameter(self._return_annotation) or self._is_artifact( + self._return_annotation): + # Note: single output is named `Output` in component.yaml. + self._handle_single_return_value('Output', + self._return_annotation, + func_output) + elif self._is_named_tuple(self._return_annotation): + if len(self._return_annotation._fields) != len(func_output): + raise RuntimeError( + f'Expected {len(self._return_annotation._fields)} return values from function `{self._func.__name__}`, got {len(func_output)}' + ) + for i in range(len(self._return_annotation._fields)): + field = self._return_annotation._fields[i] + field_type = self._return_annotation.__annotations__[field] + if type(func_output) == tuple: + field_value = func_output[i] + else: + field_value = getattr(func_output, field) + self._handle_single_return_value(field, field_type, + field_value) + else: + raise RuntimeError( + f'Unknown return type: {self._return_annotation}. Must be one of `str`, `int`, `float`, a subclass of `Artifact`, or a NamedTuple collection of these types.' + ) + + # This check is to ensure only one worker (in a mirrored, distributed training/compute strategy) attempts to write to the same executor output file at the same time using gcsfuse, which enforces immutability of files. + write_file = True + + CLUSTER_SPEC_ENV_VAR_NAME = 'CLUSTER_SPEC' + cluster_spec_string = os.environ.get(CLUSTER_SPEC_ENV_VAR_NAME) + if cluster_spec_string: + cluster_spec = json.loads(cluster_spec_string) + CHIEF_NODE_LABELS = {'workerpool0', 'chief', 'master'} + write_file = cluster_spec['task']['type'] in CHIEF_NODE_LABELS + + if write_file: + executor_output_path = self._input['outputs']['outputFile'] + os.makedirs(os.path.dirname(executor_output_path), exist_ok=True) + with open(executor_output_path, 'w') as f: + f.write(json.dumps(self._executor_output)) + + def execute(self): + annotations = inspect.getfullargspec(self._func).annotations + + # Function arguments. + func_kwargs = {} + + for k, v in annotations.items(): + if k == 'return': + continue + + # Annotations for parameter types could be written as, for example, + # `Optional[str]`. In this case, we need to strip off the part + # `Optional[]` to get the actual parameter type. + v = type_annotations.maybe_strip_optional_from_annotation(v) + + if v == task_final_status.PipelineTaskFinalStatus: + value = self._get_input_parameter_value(k) + func_kwargs[k] = task_final_status.PipelineTaskFinalStatus( + state=value.get('state'), + pipeline_job_resource_name=value.get( + 'pipelineJobResourceName'), + pipeline_task_name=value.get('pipelineTaskName'), + error_code=value.get('error').get('code', None), + error_message=value.get('error').get('message', None), + ) + + elif self._is_parameter(v): + value = self._get_input_parameter_value(k) + if value is not None: + func_kwargs[k] = value + + elif type_annotations.is_Input_Output_artifact_annotation(v): + if type_annotations.is_input_artifact(v): + func_kwargs[k] = self._get_input_artifact(k) + if type_annotations.is_output_artifact(v): + func_kwargs[k] = self._get_output_artifact(k) + + elif isinstance(v, type_annotations.OutputPath): + if self._is_parameter(v.type): + func_kwargs[k] = self._get_output_parameter_path(k) + else: + func_kwargs[k] = self._get_output_artifact_path(k) + + elif isinstance(v, type_annotations.InputPath): + func_kwargs[k] = self._get_input_artifact_path(k) + + result = self._func(**func_kwargs) + self._write_executor_output(result) + + +def create_artifact_instance( + runtime_artifact: Dict, + artifact_cls=artifact_types.Artifact, +) -> type: + """Creates an artifact class instances from a runtime artifact + dictionary.""" + schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') + + artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE.get( + schema_title, artifact_cls) + return artifact_cls._from_executor_fields( + uri=runtime_artifact.get('uri', ''), + name=runtime_artifact.get('name', ''), + metadata=runtime_artifact.get('metadata', {}), + ) if hasattr(artifact_cls, '_from_executor_fields') else artifact_cls( + uri=runtime_artifact.get('uri', ''), + name=runtime_artifact.get('name', ''), + metadata=runtime_artifact.get('metadata', {}), + ) diff --git a/sdk/python/kfp/dsl/executor_main.py b/sdk/python/kfp/dsl/executor_main.py new file mode 100644 index 0000000000..1836ea5889 --- /dev/null +++ b/sdk/python/kfp/dsl/executor_main.py @@ -0,0 +1,105 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import json +import logging +import os +import sys + +from kfp.dsl import executor as component_executor +from kfp.dsl import kfp_config +from kfp.dsl import utils + + +def _setup_logging(): + logging_format = '[KFP Executor %(asctime)s %(levelname)s]: %(message)s' + logging.basicConfig( + stream=sys.stdout, format=logging_format, level=logging.INFO) + + +def executor_main(): + _setup_logging() + parser = argparse.ArgumentParser(description='KFP Component Executor.') + + parser.add_argument( + '--component_module_path', + type=str, + help='Path to a module containing the KFP component.') + + parser.add_argument( + '--function_to_execute', + type=str, + required=True, + help='The name of the component function in ' + '--component_module_path file that is to be executed.') + + parser.add_argument( + '--executor_input', + type=str, + help='JSON-serialized ExecutorInput from the orchestrator. ' + 'This should contain inputs and placeholders for outputs.') + + args, _ = parser.parse_known_args() + + func_name = args.function_to_execute + module_path = None + module_directory = None + module_name = None + + if args.component_module_path is not None: + logging.info( + f'Looking for component `{func_name}` in --component_module_path `{args.component_module_path}`' + ) + module_path = args.component_module_path + module_directory = os.path.dirname(args.component_module_path) + module_name = os.path.basename(args.component_module_path)[:-len('.py')] + else: + # Look for module directory using kfp_config.ini + logging.info( + f'--component_module_path is not specified. Looking for component `{func_name}` in config file `kfp_config.ini` instead' + ) + config = kfp_config.KFPConfig() + components = config.get_components() + if not components: + raise RuntimeError('No components found in `kfp_config.ini`') + try: + module_path = components[func_name] + except KeyError: + raise RuntimeError( + f'Could not find component `{func_name}` in `kfp_config.ini`. Found the following components instead:\n{components}' + ) + + module_directory = str(module_path.parent) + module_name = str(module_path.name)[:-len('.py')] + + logging.info( + f'Loading KFP component "{func_name}" from {module_path} (directory "{module_directory}" and module name "{module_name}")' + ) + + module = utils.load_module( + module_name=module_name, module_directory=module_directory) + + executor_input = json.loads(args.executor_input) + function_to_execute = getattr(module, func_name) + + logging.info(f'Got executor_input:\n{json.dumps(executor_input, indent=4)}') + + executor = component_executor.Executor( + executor_input=executor_input, function_to_execute=function_to_execute) + + executor.execute() + + +if __name__ == '__main__': + executor_main() diff --git a/sdk/python/kfp/dsl/executor_test.py b/sdk/python/kfp/dsl/executor_test.py new file mode 100644 index 0000000000..4cc5969344 --- /dev/null +++ b/sdk/python/kfp/dsl/executor_test.py @@ -0,0 +1,1333 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for kfp.dsl.executor.""" + +import json +import os +import tempfile +from typing import Callable, Dict, List, NamedTuple, Optional +import unittest +from unittest import mock + +from absl.testing import parameterized +from kfp import dsl +from kfp.dsl import executor +from kfp.dsl import Input +from kfp.dsl import Output +from kfp.dsl.task_final_status import PipelineTaskFinalStatus +from kfp.dsl.types import artifact_types +from kfp.dsl.types.artifact_types import Artifact +from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.artifact_types import Metrics +from kfp.dsl.types.artifact_types import Model +from kfp.dsl.types.type_annotations import InputPath +from kfp.dsl.types.type_annotations import OutputPath + + +class ExecutorTest(parameterized.TestCase): + + @classmethod + def setUp(cls): + cls.maxDiff = None + cls._test_dir = tempfile.mkdtemp() + artifact_types._GCS_LOCAL_MOUNT_PREFIX = cls._test_dir + '/' + artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls._test_dir + '/minio/' + artifact_types._S3_LOCAL_MOUNT_PREFIX = cls._test_dir + '/s3/' + + def execute(self, func: Callable, executor_input: str) -> None: + executor_input_dict = json.loads(executor_input % + {'test_dir': self._test_dir}) + + executor.Executor( + executor_input=executor_input_dict, + function_to_execute=func).execute() + + def execute_and_load_output_metadata(self, func: Callable, + executor_input: str) -> dict: + self.execute(func, executor_input) + with open(os.path.join(self._test_dir, 'output_metadata.json'), + 'r') as f: + return json.loads(f.read()) + + def test_input_and_output_parameters(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "input_parameter": "Hello, KFP" + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_parameter: str) -> str: + self.assertEqual(input_parameter, 'Hello, KFP') + return input_parameter + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertEqual({'parameterValues': { + 'Output': 'Hello, KFP' + }}, output_metadata) + + def test_input_artifact_custom_type(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "google.VertexDataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + class VertexDataset(dsl.Artifact): + schema_title = 'google.VertexDataset' + schema_version = '0.0.0' + + @property + def path(self) -> str: + return self.uri.replace('gs://', + artifact_types._GCS_LOCAL_MOUNT_PREFIX) + + def test_func(input_artifact_one: Input[VertexDataset]): + self.assertEqual(input_artifact_one.uri, + 'gs://some-bucket/input_artifact_one') + self.assertEqual( + input_artifact_one.path, + os.path.join(artifact_types._GCS_LOCAL_MOUNT_PREFIX, + 'some-bucket/input_artifact_one')) + self.assertEqual( + input_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) + self.assertIsInstance(input_artifact_one, VertexDataset) + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_input_artifact(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "google.VertexDataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_artifact_one: Input[Dataset]): + self.assertEqual(input_artifact_one.uri, + 'gs://some-bucket/input_artifact_one') + self.assertEqual( + input_artifact_one.path, + os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) + self.assertEqual( + input_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) + self.assertIsInstance(input_artifact_one, Dataset) + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_output_parameter(self): + executor_input = """\ + { + "outputs": { + "parameters": { + "output_parameter_path": { + "outputFile": "%(test_dir)s/gcs/some-bucket/some_task/nested/output_parameter" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(output_parameter_path: OutputPath(str)): + # Test that output parameters just use the passed in filename. + self.assertEqual( + output_parameter_path, self._test_dir + + '/gcs/some-bucket/some_task/nested/output_parameter') + with open(output_parameter_path, 'w') as f: + f.write('Hello, World!') + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_input_path_artifact(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact_one_path": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Dataset" + }, + "uri": "gs://some-bucket/input_artifact_one" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_artifact_one_path: InputPath('Dataset')): + self.assertEqual( + input_artifact_one_path, + os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_output_path_artifact(self): + executor_input = """\ + { + "outputs": { + "artifacts": { + "output_artifact_one_path": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Model" + }, + "uri": "gs://some-bucket/output_artifact_one" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(output_artifact_one_path: OutputPath('Model')): + self.assertEqual( + output_artifact_one_path, + os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_output_metadata(self): + executor_input = """\ + { + "outputs": { + "artifacts": { + "output_artifact_two": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Metrics" + }, + "uri": "gs://some-bucket/output_artifact_two" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(output_artifact_two: Output[Metrics]): + output_artifact_two.metadata['key_1'] = 'value_1' + output_artifact_two.metadata['key_2'] = 2 + output_artifact_two.uri = 'new-uri' + + # log_metric works here since the schema is specified as Metrics. + output_artifact_two.log_metric('metric', 0.9) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'output_artifact_two': { + 'artifacts': [{ + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'new-uri', + 'metadata': { + 'key_1': 'value_1', + 'key_2': 2, + 'metric': 0.9 + } + }] + } + } + }) + + def test_function_string_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first_message": "Hello", + "second_message": ", ", + "third_message": "World" + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func( + first_message: str, + second_message: str, + third_message: str, + ) -> str: + return first_message + second_message + third_message + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': 'Hello, World' + }, + }) + + def test_function_with_int_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": 40, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: int, second: int) -> int: + return first + second + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': 42 + }, + }) + + @parameterized.parameters( + { + 'executor_input': + """\ + { + "inputs": { + "parameterValues": { + "first": 0.0, + "second": 1.2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """, + 'expected_output_metadata': { + 'parameterValues': { + 'Output': 1.2 + }, + }, + }, + { + 'executor_input': + """\ + { + "inputs": { + "parameterValues": { + "first": 1, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """, + 'expected_output_metadata': { + 'parameterValues': { + 'Output': 3 + }, + }, + }, + ) + def test_function_with_float_output(self, executor_input, + expected_output_metadata): + + def test_func(first: float, second: float) -> float: + return first + second + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, expected_output_metadata) + + def test_function_with_list_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": 40, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: int, second: int) -> List: + return [first, second] + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': [40, 2] + }, + }) + + def test_function_with_dict_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": 40, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: int, second: int) -> Dict: + return {'first': first, 'second': second} + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': { + 'first': 40, + 'second': 2 + } + }, + }) + + def test_function_with_typed_list_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": 40, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: int, second: int) -> List[int]: + return [first, second] + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': [40, 2] + }, + }) + + def test_function_with_typed_dict_output(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": 40, + "second": 2 + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: int, second: int) -> Dict[str, int]: + return {'first': first, 'second': second} + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, { + 'parameterValues': { + 'Output': { + 'first': 40, + 'second': 2 + } + }, + }) + + def test_artifact_output1(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": "Hello", + "second": "World" + } + }, + "outputs": { + "artifacts": { + "output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: str, second: str, output: Output[Artifact]) -> str: + with open(output.path, 'w') as f: + f.write('artifact output') + return first + ', ' + second + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'output': { + 'artifacts': [{ + 'metadata': {}, + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output' + }] + } + }, + 'parameterValues': { + 'Output': 'Hello, World' + } + }) + + with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: + artifact_payload = f.read() + self.assertEqual(artifact_payload, 'artifact output') + + def test_artifact_output2(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first": "Hello", + "second": "World" + } + }, + "outputs": { + "artifacts": { + "Output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(first: str, second: str) -> Artifact: + return first + ', ' + second + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'Output': { + 'artifacts': [{ + 'metadata': {}, + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output' + }] + } + }, + }) + + with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: + artifact_payload = f.read() + self.assertEqual(artifact_payload, 'Hello, World') + + def test_output_artifact3(self): + executor_input = """\ + { + "outputs": { + "artifacts": { + "output_artifact_one": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Model" + }, + "uri": "gs://some-bucket/output_artifact_one" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(output_artifact_one: Output[Model]): + self.assertEqual(output_artifact_one.uri, + 'gs://some-bucket/output_artifact_one') + + self.assertEqual( + output_artifact_one.path, + os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) + self.assertEqual( + output_artifact_one.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' + ) + self.assertIsInstance(output_artifact_one, Model) + + self.execute_and_load_output_metadata(test_func, executor_input) + + def test_named_tuple_output(self): + executor_input = """\ + { + "outputs": { + "artifacts": { + "output_dataset": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Dataset" + }, + "uri": "gs://some-bucket/output_dataset" + } + ] + } + }, + "parameters": { + "output_int": { + "outputFile": "gs://some-bucket/output_int" + }, + "output_string": { + "outputFile": "gs://some-bucket/output_string" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + # Functions returning named tuples should work. + def func_returning_named_tuple() -> NamedTuple('Outputs', [ + ('output_dataset', Dataset), + ('output_int', int), + ('output_string', str), + ]): + from collections import namedtuple + output = namedtuple( + 'Outputs', ['output_dataset', 'output_int', 'output_string']) + return output('Dataset contents', 101, 'Some output string') + + # Functions returning plain tuples should work too. + def func_returning_plain_tuple() -> NamedTuple('Outputs', [ + ('output_dataset', Dataset), + ('output_int', int), + ('output_string', str), + ]): + return ('Dataset contents', 101, 'Some output string') + + for test_func in [ + func_returning_named_tuple, func_returning_plain_tuple + ]: + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'output_dataset': { + 'artifacts': [{ + 'metadata': {}, + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://some-bucket/output_dataset' + }] + } + }, + 'parameterValues': { + 'output_int': 101, + 'output_string': 'Some output string' + }, + }) + + with open( + os.path.join(self._test_dir, 'some-bucket/output_dataset'), + 'r') as f: + artifact_payload = f.read() + self.assertEqual(artifact_payload, 'Dataset contents') + + def test_function_with_optional_inputs(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "first_message": "Hello", + "second_message": "World" + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func( + first_message: str = 'default value', + second_message: Optional[str] = None, + third_message: Optional[str] = None, + fourth_argument: str = 'abc', + fifth_argument: int = 100, + sixth_argument: float = 1.23, + seventh_argument: bool = True, + eighth_argument: list = [1, 2], + ninth_argument: dict = {'a': 1}, + ) -> str: + return (f'{first_message} ({type(first_message)}), ' + f'{second_message} ({type(second_message)}), ' + f'{third_message} ({type(third_message)}), ' + f'{fourth_argument} ({type(fourth_argument)}), ' + f'{fifth_argument} ({type(fifth_argument)}), ' + f'{sixth_argument} ({type(sixth_argument)}), ' + f'{seventh_argument} ({type(seventh_argument)}), ' + f'{eighth_argument} ({type(eighth_argument)}), ' + f'{ninth_argument} ({type(ninth_argument)}).') + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'parameterValues': { + 'Output': "Hello (), " + "World (), " + "None (), " + "abc (), " + "100 (), " + "1.23 (), " + "True (), " + "[1, 2] (), " + "{'a': 1} ()." + }, + }) + + def test_function_with_optional_input_artifact(self): + executor_input = """\ + { + "inputs": {}, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(a: Optional[Input[Artifact]] = None): + self.assertIsNone(a) + + self.execute(test_func, executor_input) + + def test_function_with_pipeline_task_final_status(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "status": {"error":{"code":9,"message":"The DAG failed because some tasks failed. The failed tasks are: [fail-op]."},"pipelineJobResourceName":"projects/123/locations/us-central1/pipelineJobs/pipeline-456", "pipelineTaskName": "upstream-task", "state":"FAILED"} + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(status: PipelineTaskFinalStatus) -> str: + return (f'Pipeline status: {status.state}\n' + f'Job resource name: {status.pipeline_job_resource_name}\n' + f'Pipeline task name: {status.pipeline_task_name}\n' + f'Error code: {status.error_code}\n' + f'Error message: {status.error_message}') + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'parameterValues': { + 'Output': + 'Pipeline status: FAILED\n' + 'Job resource name: projects/123/locations/us-central1/pipelineJobs/pipeline-456\n' + 'Pipeline task name: upstream-task\n' + 'Error code: 9\n' + 'Error message: The DAG failed because some tasks failed. The failed tasks are: [fail-op].' + }, + }) + + def test_component_with_input_path(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "dataset_one_path": { + "artifacts": [ + { + "name": "84085", + "type": { + "instanceSchema": "" + }, + "uri": "gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/preprocess/output_dataset_one", + "metadata": { + "display_name": "output_dataset_one" + } + } + ] + } + }, + "parameterValues": { + "input_bool": true, + "input_dict": { + "A": 1, + "B": 2 + }, + "input_list": [ + "a", + "b", + "c" + ], + "message": "here is my message", + "num_steps": 100 + } + }, + "outputs": { + "artifacts": { + "model": { + "artifacts": [ + { + "type": { + "schemaTitle": "system.Model", + "schemaVersion": "0.0.1" + }, + "uri": "gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/train/model" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + path = os.path.join( + self._test_dir, + 'mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/preprocess/output_dataset_one' + ) + os.makedirs(os.path.dirname(path)) + with open(path, 'w+') as f: + f.write('data!') + + def test_func( + # Use InputPath to get a locally accessible path for the input artifact + # of type `Dataset`. + dataset_one_path: InputPath('Dataset'), + # An input parameter of type string. + message: str, + # Use Output[T] to get a metadata-rich handle to the output artifact + # of type `Dataset`. + model: Output[Model], + # An input parameter of type bool. + input_bool: bool, + # An input parameter of type dict. + input_dict: Dict[str, int], + # An input parameter of type List[str]. + input_list: List[str], + # An input parameter of type int with a default value. + num_steps: int = 100, + ): + """Dummy Training step.""" + with open(dataset_one_path) as input_file: + dataset_one_contents = input_file.read() + + line = (f'dataset_one_contents: {dataset_one_contents} || ' + f'message: {message} || ' + f'input_bool: {input_bool}, type {type(input_bool)} || ' + f'input_dict: {input_dict}, type {type(input_dict)} || ' + f'input_list: {input_list}, type {type(input_list)} \n') + + with open(model.path, 'w') as output_file: + for i in range(num_steps): + output_file.write(f'Step {i}\n{line}\n=====\n') + + # model is an instance of Model artifact, which has a .metadata dictionary + # to store arbitrary metadata for the output artifact. + model.metadata['accuracy'] = 0.9 + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + self.assertEqual( + output_metadata, { + 'artifacts': { + 'model': { + 'artifacts': [{ + 'name': + '', + 'uri': + 'gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/train/model', + 'metadata': { + 'accuracy': 0.9 + } + }] + } + } + }) + + @mock.patch.dict( + os.environ, + {'CLUSTER_SPEC': json.dumps({'task': { + 'type': 'workerpool0' + }})}, + clear=True) + def test_distributed_training_strategy_write(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "input_parameter": "Hello, KFP" + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_parameter: str): + self.assertEqual(input_parameter, 'Hello, KFP') + + self.execute( + func=test_func, + executor_input=executor_input, + ) + self.assertTrue( + os.path.exists( + os.path.join(self._test_dir, 'output_metadata.json'))) + + @mock.patch.dict( + os.environ, + {'CLUSTER_SPEC': json.dumps({'task': { + 'type': 'workerpool1' + }})}, + clear=True) + def test_distributed_training_strategy_no_write(self): + executor_input = """\ + { + "inputs": { + "parameterValues": { + "input_parameter": "Hello, KFP" + } + }, + "outputs": { + "parameters": { + "Output": { + "outputFile": "gs://some-bucket/output" + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_parameter: str): + self.assertEqual(input_parameter, 'Hello, KFP') + + self.execute( + func=test_func, + executor_input=executor_input, + ) + self.assertFalse( + os.path.exists( + os.path.join(self._test_dir, 'output_metadata.json'))) + + def test_single_artifact_input(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_artifact" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_artifact: Input[Artifact]): + self.assertIsInstance(input_artifact, Artifact) + self.assertEqual( + input_artifact.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' + ) + self.assertEqual( + input_artifact.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' + ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + + def test_list_of_artifacts_input(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_list": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/0" + }, + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/1" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_list: Input[List[Artifact]]): + self.assertEqual(len(input_list), 2) + self.assertEqual( + input_list[0].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0' + ) + self.assertEqual( + input_list[1].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1' + ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + + +class TestDictToArtifact(parameterized.TestCase): + + @parameterized.parameters( + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Artifact' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.Artifact, + 'expected_type': artifact_types.Artifact, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Model' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.Model, + 'expected_type': artifact_types.Model, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Dataset' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.Dataset, + 'expected_type': artifact_types.Dataset, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Metrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.Metrics, + 'expected_type': artifact_types.Metrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.ClassificationMetrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.ClassificationMetrics, + 'expected_type': artifact_types.ClassificationMetrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.SlicedClassificationMetrics' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': artifact_types.SlicedClassificationMetrics, + 'expected_type': artifact_types.SlicedClassificationMetrics, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.HTML' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': None, + 'expected_type': artifact_types.HTML, + }, + { + 'runtime_artifact': { + 'metadata': {}, + 'name': 'input_artifact_one', + 'type': { + 'schemaTitle': 'system.Markdown' + }, + 'uri': 'gs://some-bucket/input_artifact_one' + }, + 'artifact_cls': None, + 'expected_type': artifact_types.Markdown, + }, + ) + def test_dict_to_artifact_kfp_artifact( + self, + runtime_artifact, + artifact_cls, + expected_type, + ): + # with artifact_cls + self.assertIsInstance( + executor.create_artifact_instance( + runtime_artifact, artifact_cls=artifact_cls), expected_type) + + # without artifact_cls + self.assertIsInstance( + executor.create_artifact_instance(runtime_artifact), expected_type) + + +if __name__ == '__main__': + unittest.main() diff --git a/sdk/python/kfp/dsl/for_loop.py b/sdk/python/kfp/dsl/for_loop.py new file mode 100644 index 0000000000..5381576631 --- /dev/null +++ b/sdk/python/kfp/dsl/for_loop.py @@ -0,0 +1,315 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Classes and methods that supports argument for ParallelFor.""" + +import re +from typing import Any, Dict, List, Optional, Union + +from kfp.dsl import pipeline_channel + +ItemList = List[Union[int, float, str, Dict[str, Any]]] + + +def _get_loop_item_type(type_name: str) -> Optional[str]: + """Extracts the loop item type. + + This method is used for extract the item type from a collection type. + For example: + + List[str] -> str + typing.List[int] -> int + typing.Sequence[str] -> str + List -> None + str -> None + + Args: + type_name: The collection type name, like `List`, Sequence`, etc. + + Returns: + The collection item type or None if no match found. + """ + match = re.match('(typing\.)?(?:\w+)(?:\[(?P.+)\])', type_name) + return match['item_type'].lstrip().rstrip() if match else None + + +def _get_subvar_type(type_name: str) -> Optional[str]: + """Extracts the subvar type. + + This method is used for extract the value type from a dictionary type. + For example: + + Dict[str, int] -> int + typing.Mapping[str, float] -> float + + Args: + type_name: The dictionary type. + + Returns: + The dictionary value type or None if no match found. + """ + match = re.match( + '(typing\.)?(?:\w+)(?:\[\s*(?:\w+)\s*,\s*(?P.+)\])', + type_name) + return match['value_type'].lstrip().rstrip() if match else None + + +class LoopArgument(pipeline_channel.PipelineParameterChannel): + """Represents the argument that are looped over in a ParallelFor loop. + + The class shouldn't be instantiated by the end user, rather it is + created automatically by a ParallelFor ops group. + + To create a LoopArgument instance, use one of its factory methods:: + + LoopArgument.from_pipeline_channel(...) + LoopArgument.from_raw_items(...) + + + Attributes: + items_or_pipeline_channel: The raw items or the PipelineChannel object + this LoopArgument is associated to. + """ + LOOP_ITEM_NAME_BASE = 'loop-item' + LOOP_ITEM_PARAM_NAME_BASE = 'loop-item-param' + + def __init__( + self, + items: Union[ItemList, pipeline_channel.PipelineChannel], + name_code: Optional[str] = None, + name_override: Optional[str] = None, + **kwargs, + ): + """Initializes a LoopArguments object. + + Args: + items: List of items to loop over. If a list of dicts then, all + dicts must have the same keys and every key must be a legal + Python variable name. + name_code: A unique code used to identify these loop arguments. + Should match the code for the ParallelFor ops_group which created + these LoopArguments. This prevents parameter name collisions. + name_override: The override name for PipelineChannel. + **kwargs: Any other keyword arguments passed down to PipelineChannel. + """ + if (name_code is None) == (name_override is None): + raise ValueError( + 'Expect one and only one of `name_code` and `name_override` to ' + 'be specified.') + + if name_override is None: + super().__init__(name=self._make_name(name_code), **kwargs) + else: + super().__init__(name=name_override, **kwargs) + + if not isinstance(items, + (list, tuple, pipeline_channel.PipelineChannel)): + raise TypeError( + f'Expected list, tuple, or PipelineChannel, got {items}.') + + if isinstance(items, tuple): + items = list(items) + + self.items_or_pipeline_channel = items + self.is_with_items_loop_argument = not isinstance( + items, pipeline_channel.PipelineChannel) + self._referenced_subvars: Dict[str, LoopArgumentVariable] = {} + + if isinstance(items, list) and isinstance(items[0], dict): + subvar_names = set(items[0].keys()) + # then this block creates loop_arg.variable_a and loop_arg.variable_b + for subvar_name in subvar_names: + loop_arg_var = LoopArgumentVariable( + loop_argument=self, + subvar_name=subvar_name, + ) + self._referenced_subvars[subvar_name] = loop_arg_var + setattr(self, subvar_name, loop_arg_var) + + def __getattr__(self, name: str): + # this is being overridden so that we can access subvariables of the + # LoopArgument (i.e.: item.a) without knowing the subvariable names ahead + # of time. + + return self._referenced_subvars.setdefault( + name, LoopArgumentVariable( + loop_argument=self, + subvar_name=name, + )) + + def _make_name(self, code: str): + """Makes a name for this loop argument from a unique code.""" + return f'{self.LOOP_ITEM_PARAM_NAME_BASE}-{code}' + + @classmethod + def from_pipeline_channel( + cls, + channel: pipeline_channel.PipelineChannel, + ) -> 'LoopArgument': + """Creates a LoopArgument object from a PipelineChannel object.""" + return LoopArgument( + items=channel, + name_override=channel.name + '-' + cls.LOOP_ITEM_NAME_BASE, + task_name=channel.task_name, + channel_type=_get_loop_item_type(channel.channel_type) or 'String', + ) + + @classmethod + def from_raw_items( + cls, + raw_items: ItemList, + name_code: str, + ) -> 'LoopArgument': + """Creates a LoopArgument object from raw item list.""" + if len(raw_items) == 0: + raise ValueError('Got an empty item list for loop argument.') + + return LoopArgument( + items=raw_items, + name_code=name_code, + channel_type=type(raw_items[0]).__name__, + ) + + @classmethod + def name_is_loop_argument(cls, name: str) -> bool: + """Returns True if the given channel name looks like a loop argument. + + Either it came from a withItems loop item or withParams loop + item. + """ + return ('-' + cls.LOOP_ITEM_NAME_BASE) in name \ + or (cls.LOOP_ITEM_PARAM_NAME_BASE + '-') in name + + +class LoopArgumentVariable(pipeline_channel.PipelineChannel): + """Represents a subvariable for a loop argument. + + This is used for cases where we're looping over maps, each of which contains + several variables. If the user ran: + + with dsl.ParallelFor([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]) as item: + ... + + Then there's one LoopArgumentVariable for 'a' and another for 'b'. + + Attributes: + loop_argument: The original LoopArgument object this subvariable is + attached to. + subvar_name: The subvariable name. + """ + SUBVAR_NAME_DELIMITER = '-subvar-' + LEGAL_SUBVAR_NAME_REGEX = re.compile(r'^[a-zA-Z_][0-9a-zA-Z_]*$') + + def __init__( + self, + loop_argument: LoopArgument, + subvar_name: str, + ): + """Initializes a LoopArgumentVariable instance. + + Args: + loop_argument: The LoopArgument object this subvariable is based on + a subvariable to. + subvar_name: The name of this subvariable, which is the name of the + dict key that spawned this subvariable. + + Raises: + ValueError is subvar name is illegal. + """ + if not self._subvar_name_is_legal(subvar_name): + raise ValueError( + f'Tried to create subvariable named {subvar_name}, but that is ' + 'not a legal Python variable name.') + + self.subvar_name = subvar_name + self.loop_argument = loop_argument + + super().__init__( + name=self._get_name_override( + loop_arg_name=loop_argument.name, + subvar_name=subvar_name, + ), + task_name=loop_argument.task_name, + channel_type=_get_subvar_type(loop_argument.channel_type) or + 'String', + ) + + @property + def items_or_pipeline_channel( + self) -> Union[ItemList, pipeline_channel.PipelineChannel]: + """Returns the loop argument items.""" + return self.loop_argument.items_or_pipeline_chanenl + + @property + def is_with_items_loop_argument(self) -> bool: + """Whether the loop argument is originated from raw items.""" + return self.loop_argument.is_with_items_loop_argument + + def _subvar_name_is_legal(self, proposed_variable_name: str) -> bool: + """Returns True if the subvar name is legal.""" + return re.match(self.LEGAL_SUBVAR_NAME_REGEX, + proposed_variable_name) is not None + + def _get_name_override(self, loop_arg_name: str, subvar_name: str) -> str: + """Gets the name. + + Args: + loop_arg_name: the name of the loop argument parameter that this + LoopArgumentVariable is attached to. + subvar_name: The name of this subvariable. + + Returns: + The name of this loop arg variable. + """ + return f'{loop_arg_name}{self.SUBVAR_NAME_DELIMITER}{subvar_name}' + + +class Collected(pipeline_channel.PipelineChannel): + """For collecting into a list the output from a task in dsl.ParallelFor + loops. + + Args: + output: The output of an upstream task within a dsl.ParallelFor loop. + + Example: + :: + + @dsl.pipeline + def math_pipeline() -> int: + with dsl.ParallelFor([1, 2, 3]) as x: + t = double(num=x) + + return add(nums=dsl.Collected(t.output)).output + """ + + def __init__( + self, + output: pipeline_channel.PipelineChannel, + ) -> None: + self.output = output + if isinstance(output, pipeline_channel.PipelineArtifactChannel): + channel_type = output.channel_type + self.is_artifact_channel = True + # we know all dsl.Collected instances are lists, so set to true + # for type checking, which occurs before dsl.Collected is updated to + # it's "correct" channel during compilation + self.is_artifact_list = True + else: + channel_type = 'LIST' + self.is_artifact_channel = False + + super().__init__( + output.name, + channel_type=channel_type, + task_name=output.task_name, + ) diff --git a/sdk/python/kfp/dsl-test/for_loop_test.py b/sdk/python/kfp/dsl/for_loop_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/for_loop_test.py rename to sdk/python/kfp/dsl/for_loop_test.py diff --git a/sdk/python/kfp/dsl/graph_component.py b/sdk/python/kfp/dsl/graph_component.py new file mode 100644 index 0000000000..2b09927dfa --- /dev/null +++ b/sdk/python/kfp/dsl/graph_component.py @@ -0,0 +1,91 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pipeline as a component (aka graph component).""" + +import inspect +from typing import Callable, Optional +import uuid + +from kfp.compiler import pipeline_spec_builder as builder +from kfp.dsl import base_component +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import structures +from kfp.pipeline_spec import pipeline_spec_pb2 + + +class GraphComponent(base_component.BaseComponent): + """A component defined via @dsl.pipeline decorator. + + Attribute: + pipeline_func: The function that becomes the implementation of this component. + """ + + def __init__( + self, + component_spec: structures.ComponentSpec, + pipeline_func: Callable, + display_name: Optional[str] = None, + ): + super().__init__(component_spec=component_spec) + self.pipeline_func = pipeline_func + + args_list = [] + signature = inspect.signature(pipeline_func) + + for arg_name in signature.parameters: + input_spec = component_spec.inputs[arg_name] + args_list.append( + pipeline_channel.create_pipeline_channel( + name=arg_name, + channel_type=input_spec.type, + is_artifact_list=input_spec.is_artifact_list, + )) + + with pipeline_context.Pipeline( + self.component_spec.name) as dsl_pipeline: + pipeline_outputs = pipeline_func(*args_list) + + if not dsl_pipeline.tasks: + raise ValueError('Task is missing from pipeline.') + + # Making the pipeline group name unique to prevent name clashes with + # templates + pipeline_group = dsl_pipeline.groups[0] + pipeline_group.name = uuid.uuid4().hex + + pipeline_spec, platform_spec = builder.create_pipeline_spec( + pipeline=dsl_pipeline, + component_spec=self.component_spec, + pipeline_outputs=pipeline_outputs, + ) + + pipeline_root = getattr(pipeline_func, 'pipeline_root', None) + if pipeline_root is not None: + pipeline_spec.default_pipeline_root = pipeline_root + if display_name is not None: + pipeline_spec.pipeline_info.display_name = display_name + if component_spec.description is not None: + pipeline_spec.pipeline_info.description = component_spec.description + + self.component_spec.implementation.graph = pipeline_spec + self.component_spec.platform_spec = platform_spec + + @property + def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Returns the pipeline spec of the component.""" + return self.component_spec.implementation.graph + + def execute(self, **kwargs): + raise RuntimeError('Graph component has no local execution mode.') diff --git a/sdk/python/kfp/dsl/importer_component.py b/sdk/python/kfp/dsl/importer_component.py new file mode 100644 index 0000000000..168c7c6f73 --- /dev/null +++ b/sdk/python/kfp/dsl/importer_component.py @@ -0,0 +1,30 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Importer-based component.""" + +from kfp.dsl import base_component +from kfp.dsl import structures + + +class ImporterComponent(base_component.BaseComponent): + """Component defined via dsl.importer.""" + + def __init__( + self, + component_spec: structures.ComponentSpec, + ): + super().__init__(component_spec=component_spec) + + def execute(self, **kwargs): + raise NotImplementedError diff --git a/sdk/python/kfp/dsl/importer_node.py b/sdk/python/kfp/dsl/importer_node.py new file mode 100644 index 0000000000..2a3e676daa --- /dev/null +++ b/sdk/python/kfp/dsl/importer_node.py @@ -0,0 +1,145 @@ +# Copyright 2020-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utility function for building Importer Node spec.""" + +from typing import Any, Dict, Mapping, Optional, Type, Union + +from kfp.dsl import importer_component +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_task +from kfp.dsl import placeholders +from kfp.dsl import structures +from kfp.dsl import utils +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_utils + +URI_KEY = 'uri' +OUTPUT_KEY = 'artifact' +METADATA_KEY = 'metadata' + + +def importer( + artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str], + artifact_class: Type[artifact_types.Artifact], + reimport: bool = False, + metadata: Optional[Mapping[str, Any]] = None, +) -> pipeline_task.PipelineTask: + """Imports an existing artifact for use in a downstream component. + + Args: + artifact_uri: The URI of the artifact to import. + artifact_class: The artifact class being imported. + reimport: Whether to reimport the artifact. + metadata: Properties of the artifact. + + Returns: + A task with the artifact accessible via its ``.output`` attribute. + + Examples:: + + @dsl.pipeline(name='pipeline-with-importer') + def pipeline_with_importer(): + + importer1 = importer( + artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt', + artifact_class=Dataset, + reimport=False) + train(dataset=importer1.output) + """ + + component_inputs: Dict[str, structures.InputSpec] = {} + call_inputs: Dict[str, Any] = {} + + def traverse_dict_and_create_metadata_inputs(d: Any) -> Any: + if isinstance(d, pipeline_channel.PipelineParameterChannel): + reversed_call_inputs = { + pipeline_param_chan: name + for name, pipeline_param_chan in call_inputs.items() + } + + # minimizes importer spec interface by not creating new + # inputspec/parameters if the same input is used multiple places + # in metadata + unique_name = reversed_call_inputs.get( + d, + utils.make_name_unique_by_adding_index( + METADATA_KEY, + list(call_inputs), + '-', + ), + ) + + call_inputs[unique_name] = d + component_inputs[unique_name] = structures.InputSpec( + type=d.channel_type) + + return placeholders.InputValuePlaceholder( + input_name=unique_name)._to_string() + + elif isinstance(d, dict): + # use this instead of list comprehension to ensure compiles are identical across Python versions + res = {} + for k, v in d.items(): + new_k = traverse_dict_and_create_metadata_inputs(k) + new_v = traverse_dict_and_create_metadata_inputs(v) + res[new_k] = new_v + return res + + elif isinstance(d, list): + return [traverse_dict_and_create_metadata_inputs(el) for el in d] + + elif isinstance(d, str): + # extract pipeline channels from f-strings, if any + pipeline_channels = pipeline_channel.extract_pipeline_channels_from_any( + d) + + # pass the channel back into the recursive function to create the placeholder, component inputs, and call inputs, then replace the channel with the placeholder + for channel in pipeline_channels: + input_placeholder = traverse_dict_and_create_metadata_inputs( + channel) + d = d.replace(channel.pattern, input_placeholder) + return d + + else: + return d + + metadata_with_placeholders = traverse_dict_and_create_metadata_inputs( + metadata) + + component_spec = structures.ComponentSpec( + name='importer', + implementation=structures.Implementation( + importer=structures.ImporterSpec( + artifact_uri=placeholders.InputValuePlaceholder( + URI_KEY)._to_string(), + schema_title=type_utils.create_bundled_artifact_type( + artifact_class.schema_title, artifact_class.schema_version), + schema_version=artifact_class.schema_version, + reimport=reimport, + metadata=metadata_with_placeholders)), + inputs={ + URI_KEY: structures.InputSpec(type='String'), + **component_inputs + }, + outputs={ + OUTPUT_KEY: + structures.OutputSpec( + type=type_utils.create_bundled_artifact_type( + artifact_class.schema_title, + artifact_class.schema_version)) + }, + ) + importer = importer_component.ImporterComponent( + component_spec=component_spec) + return importer(uri=artifact_uri, **call_inputs) diff --git a/sdk/python/kfp/dsl-test/importer_node_test.py b/sdk/python/kfp/dsl/importer_node_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/importer_node_test.py rename to sdk/python/kfp/dsl/importer_node_test.py diff --git a/sdk/python/kfp/dsl/kfp_config.py b/sdk/python/kfp/dsl/kfp_config.py new file mode 100644 index 0000000000..798249ed85 --- /dev/null +++ b/sdk/python/kfp/dsl/kfp_config.py @@ -0,0 +1,106 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import configparser +import pathlib +from typing import Dict, Optional +import warnings + +_KFP_CONFIG_FILE = 'kfp_config.ini' + +_COMPONENTS_SECTION = 'Components' + + +class KFPConfig(): + """Class for managing KFP component configuration. + + The configuration is .ini file named `kfp_config.ini` that can be parsed by + Python's native configparser module. Currently, this class supports a single + `Components` section, which lists components as key-value pairs. The key is + the component name (i.e. the function name), and the value is the path to + the file containing this function. The path is usually relative from the + location of the configuration file, but absolute paths should also work. + + At runtime, the KFP v2 Executor, defined in executor_main.py, will look + for this configuration file in its current working directory. If found, + it will load its contents, and use this to find the file containing the + component to execute. + + Example of the file's contents: + + [Components] + my_component_1 = my_dir_1/my_component_1.py + my_component_2 = my_dir_2/my_component_2.py + ... + """ + + def __init__(self, config_directory: Optional[pathlib.Path] = None): + """Creates a KFPConfig object. + + Loads the config from an existing `kfp_config.ini` file if found. + + Args: + config_directory: Looks for a file named `kfp_config.ini` in this + directory. Defaults to the current directory. + """ + self._config_parser = configparser.ConfigParser() + # Preserve case for keys. + self._config_parser.optionxform = lambda x: x + + if config_directory is None: + self._config_filepath = pathlib.Path(_KFP_CONFIG_FILE) + else: + self._config_filepath = config_directory / _KFP_CONFIG_FILE + + try: + with open(str(self._config_filepath), 'r') as f: + self._config_parser.read_file(f) + except IOError: + warnings.warn('No existing KFP Config file found') + + if not self._config_parser.has_section(_COMPONENTS_SECTION): + self._config_parser.add_section(_COMPONENTS_SECTION) + + self._components = {} + + def add_component(self, function_name: str, path: pathlib.Path): + """Adds a KFP component. + + Args: + function_name: The name of the component function. + path: A path to the file containing the component. + """ + self._components[function_name] = str(path) + + def save(self): + """Writes out a KFP config file.""" + # Always write out components in alphabetical order for determinism, + # especially in tests. + for function_name in sorted(self._components.keys()): + self._config_parser[_COMPONENTS_SECTION][ + function_name] = self._components[function_name] + + with open(str(self._config_filepath), 'w') as f: + self._config_parser.write(f) + + def get_components(self) -> Dict[str, pathlib.Path]: + """Returns a list of known KFP components. + + Returns: + A dictionary from component name (function name) to a pathlib.Path + pointing to the Python file with this component's definition. + """ + return { + function_name: pathlib.Path(module_path) for function_name, + module_path in self._config_parser[_COMPONENTS_SECTION].items() + } diff --git a/sdk/python/kfp/dsl/pipeline_channel.py b/sdk/python/kfp/dsl/pipeline_channel.py new file mode 100644 index 0000000000..66616103fb --- /dev/null +++ b/sdk/python/kfp/dsl/pipeline_channel.py @@ -0,0 +1,379 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definition of PipelineChannel.""" + +import abc +import contextlib +import dataclasses +import json +import re +from typing import Dict, List, Optional, Union + +from kfp.dsl.types import type_utils + + +@dataclasses.dataclass +class ConditionOperator: + """Represents a condition expression to be used in dsl.Condition(). + + Attributes: + operator: The operator of the condition. + left_operand: The left operand. + right_operand: The right operand. + """ + operator: str + left_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] + right_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] + + +# The string template used to generate the placeholder of a PipelineChannel. +_PIPELINE_CHANNEL_PLACEHOLDER_TEMPLATE = ( + '{{channel:task=%s;name=%s;type=%s;}}') +# The regex for parsing PipelineChannel placeholders from a string. +_PIPELINE_CHANNEL_PLACEHOLDER_REGEX = ( + r'{{channel:task=([\w\s_-]*);name=([\w\s_-]+);type=([\w\s{}":_-]*);}}') + + +class PipelineChannel(abc.ABC): + """Represents a future value that is passed between pipeline components. + + A PipelineChannel object can be used as a pipeline function argument so that + it will be a pipeline artifact or parameter that shows up in ML Pipelines + system UI. It can also represent an intermediate value passed between + components. + + Attributes: + name: The name of the pipeline channel. + channel_type: The type of the pipeline channel. + task_name: The name of the task that produces the pipeline channel. + None means it is not produced by any task, so if None, either user + constructs it directly (for providing an immediate value), or it is + a pipeline function argument. + pattern: The serialized string regex pattern this pipeline channel + created from. + """ + + @abc.abstractmethod + def __init__( + self, + name: str, + channel_type: Union[str, Dict], + task_name: Optional[str] = None, + ): + """Initializes a PipelineChannel instance. + + Args: + name: The name of the pipeline channel. The name will be sanitized + to be k8s compatible. + channel_type: The type of the pipeline channel. + task_name: Optional; The name of the task that produces the pipeline + channel. If provided, the task name will be sanitized to be k8s + compatible. + + Raises: + ValueError: If name or task_name contains invalid characters. + ValueError: If both task_name and value are set. + """ + valid_name_regex = r'^[A-Za-z][A-Za-z0-9\s_-]*$' + if not re.match(valid_name_regex, name): + raise ValueError( + f'Only letters, numbers, spaces, "_", and "-" are allowed in the name. Must begin with a letter. Got name: {name}' + ) + + self.name = name + self.channel_type = channel_type + # ensure value is None even if empty string or empty list/dict + # so that serialization and unserialization remain consistent + # (i.e. None => '' => None) + self.task_name = task_name or None + from kfp.dsl import pipeline_context + + default_pipeline = pipeline_context.Pipeline.get_default_pipeline() + if self.task_name is not None and default_pipeline is not None and default_pipeline.tasks: + self.task = pipeline_context.Pipeline.get_default_pipeline().tasks[ + self.task_name] + else: + self.task = None + + @property + def full_name(self) -> str: + """Unique name for the PipelineChannel.""" + return f'{self.task_name}-{self.name}' if self.task_name else self.name + + @property + def pattern(self) -> str: + """Unique pattern for the PipelineChannel.""" + return str(self) + + def __str__(self) -> str: + """String representation of the PipelineChannel. + + The string representation is a string identifier so we can mix + the PipelineChannel inline with other strings such as arguments. + For example, we can support: ['echo %s' % param] as the + container command and later a compiler can replace the + placeholder '{{pipeline_channel:task=%s;name=%s;type=%s}}' with + its own parameter identifier. + """ + task_name = self.task_name or '' + name = self.name + channel_type = self.channel_type or '' + if isinstance(channel_type, dict): + channel_type = json.dumps(channel_type) + return _PIPELINE_CHANNEL_PLACEHOLDER_TEMPLATE % (task_name, name, + channel_type) + + def __repr__(self) -> str: + """Representation of the PipelineChannel. + + We make repr return the placeholder string so that if someone + uses str()-based serialization of complex objects containing + `PipelineChannel`, it works properly. (e.g. str([1, 2, 3, + kfp.pipeline_channel.PipelineParameterChannel("aaa"), 4, 5, 6,])) + """ + return str(self) + + def __hash__(self) -> int: + """Returns the hash of a PipelineChannel.""" + return hash(self.pattern) + + def __eq__(self, other): + return ConditionOperator('==', self, other) + + def __ne__(self, other): + return ConditionOperator('!=', self, other) + + def __lt__(self, other): + return ConditionOperator('<', self, other) + + def __le__(self, other): + return ConditionOperator('<=', self, other) + + def __gt__(self, other): + return ConditionOperator('>', self, other) + + def __ge__(self, other): + return ConditionOperator('>=', self, other) + + +class PipelineParameterChannel(PipelineChannel): + """Represents a pipeline parameter channel. + + Attributes: + name: The name of the pipeline channel. + channel_type: The type of the pipeline channel. + task_name: The name of the task that produces the pipeline channel. + None means it is not produced by any task, so if None, either user + constructs it directly (for providing an immediate value), or it is a + pipeline function argument. + pattern: The serialized string regex pattern this pipeline channel created + from. + value: The actual value of the pipeline channel. If provided, the + pipeline channel is "resolved" immediately. + """ + + def __init__( + self, + name: str, + channel_type: Union[str, Dict], + task_name: Optional[str] = None, + value: Optional[type_utils.PARAMETER_TYPES] = None, + ): + """Initializes a PipelineArtifactChannel instance. + + Args: + name: The name of the pipeline channel. + channel_type: The type of the pipeline channel. + task_name: Optional; The name of the task that produces the pipeline + channel. + value: Optional; The actual value of the pipeline channel. + + Raises: + ValueError: If name or task_name contains invalid characters. + ValueError: If both task_name and value are set. + TypeError: If the channel type is not a parameter type. + """ + if task_name and value: + raise ValueError('task_name and value cannot be both set.') + + if not type_utils.is_parameter_type(channel_type): + raise TypeError(f'{channel_type} is not a parameter type.') + + self.value = value + + super(PipelineParameterChannel, self).__init__( + name=name, + channel_type=channel_type, + task_name=task_name, + ) + + +class PipelineArtifactChannel(PipelineChannel): + """Represents a pipeline artifact channel. + + Attributes: + name: The name of the pipeline channel. + channel_type: The type of the pipeline channel. + task_name: The name of the task that produces the pipeline channel. + A pipeline artifact channel is always produced by some task. + pattern: The serialized string regex pattern this pipeline channel created + from. + """ + + def __init__( + self, + name: str, + channel_type: Union[str, Dict], + task_name: Optional[str], + is_artifact_list: bool, + ): + """Initializes a PipelineArtifactChannel instance. + + Args: + name: The name of the pipeline channel. + channel_type: The type of the pipeline channel. + task_name: Optional; the name of the task that produces the pipeline + channel. + + Raises: + ValueError: If name or task_name contains invalid characters. + TypeError: If the channel type is not an artifact type. + """ + if type_utils.is_parameter_type(channel_type): + raise TypeError(f'{channel_type} is not an artifact type.') + + self.is_artifact_list = is_artifact_list + + super(PipelineArtifactChannel, self).__init__( + name=name, + channel_type=channel_type, + task_name=task_name, + ) + + +def create_pipeline_channel( + name: str, + channel_type: Union[str, Dict], + task_name: Optional[str] = None, + value: Optional[type_utils.PARAMETER_TYPES] = None, + is_artifact_list: bool = False, +) -> PipelineChannel: + """Creates a PipelineChannel object. + + Args: + name: The name of the channel. + channel_type: The type of the channel, which decides whether it is an + PipelineParameterChannel or PipelineArtifactChannel + task_name: Optional; the task that produced the channel. + value: Optional; the realized value for a channel. + + Returns: + A PipelineParameterChannel or PipelineArtifactChannel object. + """ + if type_utils.is_parameter_type(channel_type): + return PipelineParameterChannel( + name=name, + channel_type=channel_type, + task_name=task_name, + value=value, + ) + else: + return PipelineArtifactChannel( + name=name, + channel_type=channel_type, + task_name=task_name, + is_artifact_list=is_artifact_list, + ) + + +def extract_pipeline_channels_from_string( + payload: str) -> List[PipelineChannel]: + """Extracts a list of PipelineChannel instances from the payload string. + + Note: this function removes all duplicate matches. + + Args: + payload: A string that may contain serialized PipelineChannels. + + Returns: + A list of PipelineChannels found from the payload. + """ + matches = re.findall(_PIPELINE_CHANNEL_PLACEHOLDER_REGEX, payload) + unique_channels = set() + for match in matches: + task_name, name, channel_type = match + + # channel_type could be either a string (e.g. "Integer") or a dictionary + # (e.g.: {"custom_type": {"custom_property": "some_value"}}). + # Try loading it into dictionary, if failed, it means channel_type is a + # string. + with contextlib.suppress(json.JSONDecodeError): + channel_type = json.loads(channel_type) + + if type_utils.is_parameter_type(channel_type): + pipeline_channel = PipelineParameterChannel( + name=name, + channel_type=channel_type, + task_name=task_name, + ) + else: + pipeline_channel = PipelineArtifactChannel( + name=name, + channel_type=channel_type, + task_name=task_name, + # currently no support for getting the index from a list of artifacts (e.g., my_datasets[0].uri), so this will always be False until accessing a single artifact element is supported + is_artifact_list=False, + ) + unique_channels.add(pipeline_channel) + + return list(unique_channels) + + +def extract_pipeline_channels_from_any( + payload: Union[PipelineChannel, str, list, tuple, dict] +) -> List[PipelineChannel]: + """Recursively extract PipelineChannels from any object or list of objects. + + Args: + payload: An object that contains serialized PipelineChannels or k8 + definition objects. + + Returns: + A list of PipelineChannels found from the payload. + """ + if not payload: + return [] + + if isinstance(payload, PipelineChannel): + return [payload] + + if isinstance(payload, str): + return list(set(extract_pipeline_channels_from_string(payload))) + + if isinstance(payload, (list, tuple)): + pipeline_channels = [] + for item in payload: + pipeline_channels += extract_pipeline_channels_from_any(item) + return list(set(pipeline_channels)) + + if isinstance(payload, dict): + pipeline_channels = [] + for key, value in payload.items(): + pipeline_channels += extract_pipeline_channels_from_any(key) + pipeline_channels += extract_pipeline_channels_from_any(value) + return list(set(pipeline_channels)) + + # TODO(chensun): extract PipelineChannel from v2 container spec? + + return [] diff --git a/sdk/python/kfp/dsl-test/pipeline_channel_test.py b/sdk/python/kfp/dsl/pipeline_channel_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/pipeline_channel_test.py rename to sdk/python/kfp/dsl/pipeline_channel_test.py diff --git a/sdk/python/kfp/dsl/pipeline_context.py b/sdk/python/kfp/dsl/pipeline_context.py new file mode 100644 index 0000000000..c1304c39ba --- /dev/null +++ b/sdk/python/kfp/dsl/pipeline_context.py @@ -0,0 +1,203 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definition for Pipeline.""" + +import functools +from typing import Callable, Optional + +from kfp.dsl import component_factory +from kfp.dsl import pipeline_task +from kfp.dsl import tasks_group +from kfp.dsl import utils + + +def pipeline(func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None, + pipeline_root: Optional[str] = None, + display_name: Optional[str] = None) -> Callable: + """Decorator used to construct a pipeline. + + Example + :: + + @pipeline( + name='my-pipeline', + description='My ML Pipeline.' + pipeline_root='gs://my-bucket/my-output-path' + ) + def my_pipeline(a: str, b: int): + ... + + Args: + func: The Python function that defines a pipeline. + name: The pipeline name. Defaults to a sanitized version of the + decorated function name. + description: A human-readable description of the pipeline. + pipeline_root: The root directory from which to read input and output + parameters and artifacts. + display_name: A human-readable name for the pipeline. + """ + if func is None: + return functools.partial( + pipeline, + name=name, + description=description, + pipeline_root=pipeline_root, + display_name=display_name, + ) + + if pipeline_root: + func.pipeline_root = pipeline_root + + return component_factory.create_graph_component_from_func( + func, + name=name, + description=description, + display_name=display_name, + ) + + +class Pipeline: + """A pipeline contains a list of tasks. + + This class is not supposed to be used by pipeline authors since pipeline + authors can use pipeline functions (decorated with @pipeline) to reference + their pipelines. + This class is useful for implementing a compiler. For example, the compiler + can use the following to get the pipeline object and its tasks: + + Example: + :: + + with Pipeline() as p: + pipeline_func(*args_list) + + traverse(p.tasks) + + Attributes: + name: + tasks: + groups: + """ + + # _default_pipeline is set when the compiler runs "with Pipeline()" + _default_pipeline = None + + @staticmethod + def get_default_pipeline(): + """Gets the default pipeline.""" + return Pipeline._default_pipeline + + def __init__(self, name: str): + """Creates a new instance of Pipeline. + + Args: + name: The name of the pipeline. + """ + self.name = name + self.tasks = {} + # Add the root group. + self.groups = [ + tasks_group.TasksGroup( + group_type=tasks_group.TasksGroupType.PIPELINE, + name=name, + is_root=True) + ] + self._group_id = 0 + + def __enter__(self): + + if Pipeline._default_pipeline: + raise Exception('Nested pipelines are not allowed.') + + Pipeline._default_pipeline = self + + def register_task_and_generate_id(task: pipeline_task.PipelineTask): + return self.add_task( + task=task, + add_to_group=not getattr(task, 'is_exit_handler', False)) + + self._old_register_task_handler = ( + pipeline_task.PipelineTask._register_task_handler) + pipeline_task.PipelineTask._register_task_handler = ( + register_task_and_generate_id) + return self + + def __exit__(self, *unused_args): + + Pipeline._default_pipeline = None + pipeline_task.PipelineTask._register_task_handler = ( + self._old_register_task_handler) + + def add_task( + self, + task: pipeline_task.PipelineTask, + add_to_group: bool, + ) -> str: + """Adds a new task. + + Args: + task: A PipelineTask instance. + add_to_group: Whether add the task into the current group. Expect + True for all tasks expect for exit handler. + + Returns: + A unique task name. + """ + # Sanitizing the task name. + # Technically this could be delayed to the compilation stage, but string + # serialization of PipelineChannels make unsanitized names problematic. + task_name = utils.maybe_rename_for_k8s(task.component_spec.name) + #If there is an existing task with this name then generate a new name. + task_name = utils.make_name_unique_by_adding_index( + task_name, list(self.tasks.keys()), '-') + if task_name == '': + task_name = utils.make_name_unique_by_adding_index( + 'task', list(self.tasks.keys()), '-') + + self.tasks[task_name] = task + if add_to_group: + task.parent_task_group = self.groups[-1] + self.groups[-1].tasks.append(task) + + return task_name + + def push_tasks_group(self, group: 'tasks_group.TasksGroup'): + """Pushes a TasksGroup into the stack. + + Args: + group: A TasksGroup. Typically it is one of ExitHandler, Condition, + and ParallelFor. + """ + self.groups[-1].groups.append(group) + self.groups.append(group) + + def pop_tasks_group(self): + """Removes the current TasksGroup from the stack.""" + del self.groups[-1] + + def remove_task_from_groups(self, task: pipeline_task.PipelineTask): + """Removes a task from the pipeline. + + This is useful for excluding exit handler from the pipeline. + """ + for group in self.groups: + group.remove_task_recursive(task) + + def get_next_group_id(self) -> str: + """Gets the next id for a new group.""" + self._group_id += 1 + return str(self._group_id) diff --git a/sdk/python/kfp/dsl/pipeline_task.py b/sdk/python/kfp/dsl/pipeline_task.py new file mode 100644 index 0000000000..f35cdd752b --- /dev/null +++ b/sdk/python/kfp/dsl/pipeline_task.py @@ -0,0 +1,685 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pipeline task class and operations.""" + +import copy +import inspect +import itertools +import re +from typing import Any, Dict, List, Mapping, Optional, Union +import warnings + +from kfp.dsl import constants +from kfp.dsl import pipeline_channel +from kfp.dsl import placeholders +from kfp.dsl import structures +from kfp.dsl import utils +from kfp.dsl.types import type_utils +from kfp.pipeline_spec import pipeline_spec_pb2 + +_register_task_handler = lambda task: utils.maybe_rename_for_k8s( + task.component_spec.name) + + +class PipelineTask: + """Represents a pipeline task (instantiated component). + + **Note:** ``PipelineTask`` should not be constructed by pipeline authors directly, but instead obtained via an instantiated component (see example). + + Replaces ``ContainerOp`` from ``kfp`` v1. Holds operations available on a task object, such as + ``.after()``, ``.set_memory_limit()``, ``.enable_caching()``, etc. + + Args: + component_spec: The component definition. + args: The dictionary of arguments on which the component was called to instantiate this task. + + Example: + :: + + @dsl.component + def identity(message: str) -> str: + return message + + @dsl.pipeline(name='my_pipeline') + def my_pipeline(): + # task is an instance of PipelineTask + task = identity(message='my string') + """ + _register_task_handler = _register_task_handler + + # Fallback behavior for compiling a component. This should be overriden by + # pipeline `register_task_and_generate_id` if compiling a pipeline (more + # than one component). + + def __init__( + self, + component_spec: structures.ComponentSpec, + args: Mapping[str, Any], + ): + """Initilizes a PipelineTask instance.""" + # import within __init__ to avoid circular import + from kfp.dsl.tasks_group import TasksGroup + + self.parent_task_group: Union[None, TasksGroup] = None + args = args or {} + + for input_name, argument_value in args.items(): + + if input_name not in component_spec.inputs: + raise ValueError( + f'Component {component_spec.name!r} got an unexpected input:' + f' {input_name!r}.') + + input_spec = component_spec.inputs[input_name] + + type_utils.verify_type_compatibility( + given_value=argument_value, + expected_spec=input_spec, + error_message_prefix=( + f'Incompatible argument passed to the input ' + f'{input_name!r} of component {component_spec.name!r}: '), + ) + + self.component_spec = component_spec + + self._task_spec = structures.TaskSpec( + name=self._register_task_handler(), + inputs=dict(args.items()), + dependent_tasks=[], + component_ref=component_spec.name, + enable_caching=True) + self._run_after: List[str] = [] + + self.importer_spec = None + self.container_spec = None + self.pipeline_spec = None + self._ignore_upstream_failure_tag = False + # platform_config for this primitive task; empty if task is for a graph component + self.platform_config = {} + + def validate_placeholder_types( + component_spec: structures.ComponentSpec) -> None: + inputs_dict = component_spec.inputs or {} + outputs_dict = component_spec.outputs or {} + for arg in itertools.chain( + (component_spec.implementation.container.command or []), + (component_spec.implementation.container.args or [])): + check_primitive_placeholder_is_used_for_correct_io_type( + inputs_dict, outputs_dict, arg) + + if component_spec.implementation.container is not None: + validate_placeholder_types(component_spec) + self.container_spec = self._extract_container_spec_and_convert_placeholders( + component_spec=component_spec) + elif component_spec.implementation.importer is not None: + self.importer_spec = component_spec.implementation.importer + self.importer_spec.artifact_uri = args['uri'] + else: + self.pipeline_spec = self.component_spec.implementation.graph + + self._outputs = { + output_name: pipeline_channel.create_pipeline_channel( + name=output_name, + channel_type=output_spec.type, + task_name=self._task_spec.name, + is_artifact_list=output_spec.is_artifact_list, + ) for output_name, output_spec in ( + component_spec.outputs or {}).items() + } + + self._inputs = args + + self._channel_inputs = [ + value for _, value in args.items() + if isinstance(value, pipeline_channel.PipelineChannel) + ] + pipeline_channel.extract_pipeline_channels_from_any([ + value for _, value in args.items() + if not isinstance(value, pipeline_channel.PipelineChannel) + ]) + + @property + def platform_spec(self) -> pipeline_spec_pb2.PlatformSpec: + """PlatformSpec for all tasks in the pipeline as task. + + Only for use on tasks created from GraphComponents. + """ + if self.pipeline_spec: + return self.component_spec.platform_spec + + # can only create primitive task platform spec at compile-time, since the executor label is not known until then + raise ValueError( + f'Can only access {".platform_spec"!r} property on a tasks created from pipelines. Use {".platform_config"!r} for tasks created from primitive components.' + ) + + @property + def name(self) -> str: + """The name of the task. + + Unique within its parent group. + """ + return self._task_spec.name + + @property + def inputs( + self + ) -> List[Union[type_utils.PARAMETER_TYPES, + pipeline_channel.PipelineChannel]]: + """The list of actual inputs passed to the task.""" + return self._inputs + + @property + def channel_inputs(self) -> List[pipeline_channel.PipelineChannel]: + """The list of all channel inputs passed to the task. + + :meta private: + """ + return self._channel_inputs + + @property + def output(self) -> pipeline_channel.PipelineChannel: + """The single output of the task. + + Used when a task has exactly one output parameter. + """ + if len(self._outputs) != 1: + raise AttributeError( + 'The task has multiple outputs. Please reference the output by its name.' + ) + return list(self._outputs.values())[0] + + @property + def outputs(self) -> Mapping[str, pipeline_channel.PipelineChannel]: + """The dictionary of outputs of the task. + + Used when a task has more the one output or uses an + ``OutputPath`` or ``Output[Artifact]`` type annotation. + """ + return self._outputs + + @property + def dependent_tasks(self) -> List[str]: + """A list of the dependent task names.""" + return self._task_spec.dependent_tasks + + def _extract_container_spec_and_convert_placeholders( + self, component_spec: structures.ComponentSpec + ) -> structures.ContainerSpecImplementation: + """Extracts a ContainerSpec from a ComponentSpec and converts + placeholder objects to strings. + + Args: + component_spec: The component definition. + """ + container_spec = copy.deepcopy(component_spec.implementation.container) + if container_spec is None: + raise ValueError( + '_extract_container_spec_and_convert_placeholders used incorrectly. ComponentSpec.implementation.container is None.' + ) + container_spec.command = [ + placeholders.convert_command_line_element_to_string(e) + for e in container_spec.command or [] + ] + container_spec.args = [ + placeholders.convert_command_line_element_to_string(e) + for e in container_spec.args or [] + ] + return container_spec + + def set_caching_options(self, enable_caching: bool) -> 'PipelineTask': + """Sets caching options for the task. + + Args: + enable_caching: Whether to enable caching. + + Returns: + Self return to allow chained setting calls. + """ + self._task_spec.enable_caching = enable_caching + return self + + def _ensure_container_spec_exists(self) -> None: + """Ensures that the task has a container spec.""" + caller_method_name = inspect.stack()[1][3] + + if self.container_spec is None: + raise ValueError( + f'{caller_method_name} can only be used on single-step components, not pipelines used as components, or special components like importers.' + ) + + def _validate_cpu_request_limit(self, cpu: str) -> float: + """Validates cpu request/limit string and converts to its numeric + value. + + Args: + cpu: CPU requests or limits. This string should be a number or a + number followed by an "m" to indicate millicores (1/1000). For + more information, see `Specify a CPU Request and a CPU Limit + `_. + + Raises: + ValueError if the cpu request/limit string value is invalid. + + Returns: + The numeric value (float) of the cpu request/limit. + """ + if re.match(r'([0-9]*[.])?[0-9]+m?$', cpu) is None: + raise ValueError( + 'Invalid cpu string. Should be float or integer, or integer' + ' followed by "m".') + + return float(cpu[:-1]) / 1000 if cpu.endswith('m') else float(cpu) + + def set_cpu_request(self, cpu: str) -> 'PipelineTask': + """Sets CPU request (minimum) for the task. + + Args: + cpu: Minimum CPU requests required. This string should be a number + or a number followed by an "m" to indicate millicores (1/1000). + For more information, see `Specify a CPU Request and a CPU Limit + `_. + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + cpu = self._validate_cpu_request_limit(cpu) + + if self.container_spec.resources is not None: + self.container_spec.resources.cpu_request = cpu + else: + self.container_spec.resources = structures.ResourceSpec( + cpu_request=cpu) + + return self + + def set_cpu_limit(self, cpu: str) -> 'PipelineTask': + """Sets CPU limit (maximum) for the task. + + Args: + cpu: Maximum CPU requests allowed. This string should be a number + or a number followed by an "m" to indicate millicores (1/1000). + For more information, see `Specify a CPU Request and a CPU Limit + `_. + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + cpu = self._validate_cpu_request_limit(cpu) + + if self.container_spec.resources is not None: + self.container_spec.resources.cpu_limit = cpu + else: + self.container_spec.resources = structures.ResourceSpec( + cpu_limit=cpu) + + return self + + def set_accelerator_limit(self, limit: int) -> 'PipelineTask': + """Sets accelerator limit (maximum) for the task. Only applies if + accelerator type is also set via .set_accelerator_type(). + + Args: + limit: Maximum number of accelerators allowed. + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + if isinstance(limit, str): + if re.match(r'[1-9]\d*$', limit) is None: + raise ValueError(f'{"limit"!r} must be positive integer.') + limit = int(limit) + + if self.container_spec.resources is not None: + self.container_spec.resources.accelerator_count = limit + else: + self.container_spec.resources = structures.ResourceSpec( + accelerator_count=limit) + + return self + + def set_gpu_limit(self, gpu: str) -> 'PipelineTask': + """Sets GPU limit (maximum) for the task. Only applies if accelerator + type is also set via .add_accelerator_type(). + + Args: + gpu: The maximum GPU reuqests allowed. This string should be a positive integer number of GPUs. + + Returns: + Self return to allow chained setting calls. + + :meta private: + """ + warnings.warn( + f'{self.set_gpu_limit.__name__!r} is deprecated. Please use {self.set_accelerator_limit.__name__!r} instead.', + category=DeprecationWarning) + return self.set_accelerator_limit(gpu) + + def _validate_memory_request_limit(self, memory: str) -> float: + """Validates memory request/limit string and converts to its numeric + value. + + Args: + memory: Memory requests or limits. This string should be a number or + a number followed by one of "E", "Ei", "P", "Pi", "T", "Ti", "G", + "Gi", "M", "Mi", "K", or "Ki". + + Raises: + ValueError if the memory request/limit string value is invalid. + + Returns: + The numeric value (float) of the memory request/limit. + """ + if re.match(r'^[0-9]+(E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki){0,1}$', + memory) is None: + raise ValueError( + 'Invalid memory string. Should be a number or a number ' + 'followed by one of "E", "Ei", "P", "Pi", "T", "Ti", "G", ' + '"Gi", "M", "Mi", "K", "Ki".') + + if memory.endswith('E'): + memory = float(memory[:-1]) * constants._E / constants._G + elif memory.endswith('Ei'): + memory = float(memory[:-2]) * constants._EI / constants._G + elif memory.endswith('P'): + memory = float(memory[:-1]) * constants._P / constants._G + elif memory.endswith('Pi'): + memory = float(memory[:-2]) * constants._PI / constants._G + elif memory.endswith('T'): + memory = float(memory[:-1]) * constants._T / constants._G + elif memory.endswith('Ti'): + memory = float(memory[:-2]) * constants._TI / constants._G + elif memory.endswith('G'): + memory = float(memory[:-1]) + elif memory.endswith('Gi'): + memory = float(memory[:-2]) * constants._GI / constants._G + elif memory.endswith('M'): + memory = float(memory[:-1]) * constants._M / constants._G + elif memory.endswith('Mi'): + memory = float(memory[:-2]) * constants._MI / constants._G + elif memory.endswith('K'): + memory = float(memory[:-1]) * constants._K / constants._G + elif memory.endswith('Ki'): + memory = float(memory[:-2]) * constants._KI / constants._G + else: + # By default interpret as a plain integer, in the unit of Bytes. + memory = float(memory) / constants._G + + return memory + + def set_memory_request(self, memory: str) -> 'PipelineTask': + """Sets memory request (minimum) for the task. + + Args: + memory: The minimum memory requests required. This string should be + a number or a number followed by one of "E", "Ei", "P", "Pi", + "T", "Ti", "G", "Gi", "M", "Mi", "K", or "Ki". + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + memory = self._validate_memory_request_limit(memory) + + if self.container_spec.resources is not None: + self.container_spec.resources.memory_request = memory + else: + self.container_spec.resources = structures.ResourceSpec( + memory_request=memory) + + return self + + def set_memory_limit(self, memory: str) -> 'PipelineTask': + """Sets memory limit (maximum) for the task. + + Args: + memory: The maximum memory requests allowed. This string should be + a number or a number followed by one of "E", "Ei", "P", "Pi", + "T", "Ti", "G", "Gi", "M", "Mi", "K", or "Ki". + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + memory = self._validate_memory_request_limit(memory) + + if self.container_spec.resources is not None: + self.container_spec.resources.memory_limit = memory + else: + self.container_spec.resources = structures.ResourceSpec( + memory_limit=memory) + + return self + + def set_retry(self, + num_retries: int, + backoff_duration: Optional[str] = None, + backoff_factor: Optional[float] = None, + backoff_max_duration: Optional[str] = None) -> 'PipelineTask': + """Sets task retry parameters. + + Args: + num_retries : Number of times to retry on failure. + backoff_duration: Number of seconds to wait before triggering a retry. Defaults to ``'0s'`` (immediate retry). + backoff_factor: Exponential backoff factor applied to ``backoff_duration``. For example, if ``backoff_duration="60"`` (60 seconds) and ``backoff_factor=2``, the first retry will happen after 60 seconds, then again after 120, 240, and so on. Defaults to ``2.0``. + backoff_max_duration: Maximum duration during which the task will be retried. Maximum duration is 1 hour (3600s). Defaults to ``'3600s'``. + + Returns: + Self return to allow chained setting calls. + """ + self._task_spec.retry_policy = structures.RetryPolicy( + max_retry_count=num_retries, + backoff_duration=backoff_duration, + backoff_factor=backoff_factor, + backoff_max_duration=backoff_max_duration, + ) + return self + + def add_node_selector_constraint(self, accelerator: str) -> 'PipelineTask': + """Sets accelerator type to use when executing this task. + + Args: + accelerator: The name of the accelerator, such as ``'NVIDIA_TESLA_K80'``, ``'TPU_V3'``, ``'nvidia.com/gpu'`` or ``'cloud-tpus.google.com/v3'``. + + Returns: + Self return to allow chained setting calls. + """ + warnings.warn( + f'{self.add_node_selector_constraint.__name__!r} is deprecated. Please use {self.set_accelerator_type.__name__!r} instead.', + category=DeprecationWarning) + return self.set_accelerator_type(accelerator) + + def set_accelerator_type(self, accelerator: str) -> 'PipelineTask': + """Sets accelerator type to use when executing this task. + + Args: + accelerator: The name of the accelerator, such as ``'NVIDIA_TESLA_K80'``, ``'TPU_V3'``, ``'nvidia.com/gpu'`` or ``'cloud-tpus.google.com/v3'``. + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + if self.container_spec.resources is not None: + self.container_spec.resources.accelerator_type = accelerator + if self.container_spec.resources.accelerator_count is None: + self.container_spec.resources.accelerator_count = 1 + else: + self.container_spec.resources = structures.ResourceSpec( + accelerator_count=1, accelerator_type=accelerator) + + return self + + def set_display_name(self, name: str) -> 'PipelineTask': + """Sets display name for the task. + + Args: + name: Display name. + + Returns: + Self return to allow chained setting calls. + """ + self._task_spec.display_name = name + return self + + def set_env_variable(self, name: str, value: str) -> 'PipelineTask': + """Sets environment variable for the task. + + Args: + name: Environment variable name. + value: Environment variable value. + + Returns: + Self return to allow chained setting calls. + """ + self._ensure_container_spec_exists() + + if self.container_spec.env is not None: + self.container_spec.env[name] = value + else: + self.container_spec.env = {name: value} + return self + + def after(self, *tasks) -> 'PipelineTask': + """Specifies an explicit dependency on other tasks by requiring this + task be executed after other tasks finish completion. + + Args: + *tasks: Tasks after which this task should be executed. + + Returns: + Self return to allow chained setting calls. + + Example: + :: + + @dsl.pipeline(name='my-pipeline') + def my_pipeline(): + task1 = my_component(text='1st task') + task2 = my_component(text='2nd task').after(task1) + """ + for task in tasks: + self._run_after.append(task.name) + self._task_spec.dependent_tasks.append(task.name) + return self + + def ignore_upstream_failure(self) -> 'PipelineTask': + """If called, the pipeline task will run when any specified upstream + tasks complete, even if unsuccessful. + + This method effectively turns the caller task into an exit task + if the caller task has upstream dependencies. + + If the task has no upstream tasks, either via data exchange or an explicit dependency via .after(), this method has no effect. + + Returns: + Self return to allow chained setting calls. + + Example: + :: + + @dsl.pipeline() + def my_pipeline(text: str = 'message'): + task = fail_op(message=text) + clean_up_task = print_op( + message=task.output).ignore_upstream_failure() + """ + + for input_spec_name, input_spec in (self.component_spec.inputs or + {}).items(): + argument_value = self._inputs[input_spec_name] + if (isinstance(argument_value, pipeline_channel.PipelineChannel) + ) and (not input_spec.optional) and (argument_value.task_name + is not None): + raise ValueError( + f'Tasks can only use .ignore_upstream_failure() if all input parameters that accept arguments created by an upstream task have a default value, in case the upstream task fails to produce its output. Input parameter task {self.name!r}`s {input_spec_name!r} argument is an output of an upstream task {argument_value.task_name!r}, but {input_spec_name!r} has no default value.' + ) + + self._ignore_upstream_failure_tag = True + + return self + + +# TODO: this function should ideally be in the function kfp.dsl.structures.check_placeholder_references_valid_io_name, which does something similar, but this causes the exception to be raised at component definition time, rather than compile time. This would break tests that load v1 component YAML, even though that YAML is invalid. +def check_primitive_placeholder_is_used_for_correct_io_type( + inputs_dict: Dict[str, structures.InputSpec], + outputs_dict: Dict[str, structures.OutputSpec], + arg: Union[placeholders.CommandLineElement, Any], +): + """Validates input/output placeholders refer to an input/output with an + appropriate type for the placeholder. This should only apply to components + loaded from v1 component YAML, where the YAML is authored directly. For v2 + YAML, this is encapsulated in the DSL logic which does not permit writing + incorrect placeholders. + + Args: + inputs_dict: The existing input names. + outputs_dict: The existing output names. + arg: The command line element, which may be a placeholder. + """ + + if isinstance(arg, placeholders.InputValuePlaceholder): + input_name = arg.input_name + if not type_utils.is_parameter_type(inputs_dict[input_name].type): + raise TypeError( + f'Input "{input_name}" with type ' + f'"{inputs_dict[input_name].type}" cannot be paired with ' + 'InputValuePlaceholder.') + + elif isinstance( + arg, + (placeholders.InputUriPlaceholder, placeholders.InputPathPlaceholder)): + input_name = arg.input_name + if type_utils.is_parameter_type(inputs_dict[input_name].type): + raise TypeError( + f'Input "{input_name}" with type ' + f'"{inputs_dict[input_name].type}" cannot be paired with ' + f'{arg.__class__.__name__}.') + + elif isinstance(arg, placeholders.OutputUriPlaceholder): + output_name = arg.output_name + if type_utils.is_parameter_type(outputs_dict[output_name].type): + raise TypeError( + f'Output "{output_name}" with type ' + f'"{outputs_dict[output_name].type}" cannot be paired with ' + f'{arg.__class__.__name__}.') + elif isinstance(arg, placeholders.IfPresentPlaceholder): + all_normalized_args: List[placeholders.CommandLineElement] = [] + if arg.then is None: + pass + elif isinstance(arg.then, list): + all_normalized_args.extend(arg.then) + else: + all_normalized_args.append(arg.then) + + if arg.else_ is None: + pass + elif isinstance(arg.else_, list): + all_normalized_args.extend(arg.else_) + else: + all_normalized_args.append(arg.else_) + + for arg in all_normalized_args: + check_primitive_placeholder_is_used_for_correct_io_type( + inputs_dict, outputs_dict, arg) + elif isinstance(arg, placeholders.ConcatPlaceholder): + for arg in arg.items: + check_primitive_placeholder_is_used_for_correct_io_type( + inputs_dict, outputs_dict, arg) diff --git a/sdk/python/kfp/dsl-test/pipeline_task_test.py b/sdk/python/kfp/dsl/pipeline_task_test.py similarity index 88% rename from sdk/python/kfp/dsl-test/pipeline_task_test.py rename to sdk/python/kfp/dsl/pipeline_task_test.py index cf71a4150b..6e7443fc1a 100644 --- a/sdk/python/kfp/dsl-test/pipeline_task_test.py +++ b/sdk/python/kfp/dsl/pipeline_task_test.py @@ -18,7 +18,6 @@ from absl.testing import parameterized from kfp import dsl -from kfp.components import load_yaml_utilities from kfp.dsl import pipeline_task from kfp.dsl import placeholders from kfp.dsl import structures @@ -113,8 +112,8 @@ def test_create_pipeline_task_valid(self): ) task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) self.assertEqual(task._task_spec, expected_task_spec) @@ -126,8 +125,8 @@ def test_create_pipeline_task_invalid_wrong_input(self): ValueError, "Component 'component1' got an unexpected input: 'input0'."): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={ 'input1': 'value', 'input0': 'abc', @@ -136,8 +135,8 @@ def test_create_pipeline_task_invalid_wrong_input(self): def test_set_caching_options(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_caching_options(False) @@ -164,8 +163,8 @@ def test_set_caching_options(self): def test_set_valid_cpu_request_limit(self, cpu: str, expected_cpu_number: float): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_cpu_request(cpu) @@ -183,8 +182,8 @@ def test_set_valid_cpu_request_limit(self, cpu: str, def test_set_valid_gpu_limit(self, gpu_limit: str, expected_gpu_number: int): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) with self.assertWarnsRegex( @@ -197,8 +196,8 @@ def test_set_valid_gpu_limit(self, gpu_limit: str, def test_add_valid_node_selector_constraint(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) with self.assertWarnsRegex( @@ -221,8 +220,8 @@ def test_add_valid_node_selector_constraint(self): ) def test_set_accelerator_limit(self, limit, expected): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) @@ -286,8 +285,8 @@ def test_set_accelerator_limit(self, limit, expected): ) def test_set_memory_limit(self, memory: str, expected_memory_number: int): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_memory_request(memory) @@ -299,8 +298,8 @@ def test_set_memory_limit(self, memory: str, expected_memory_number: int): def test_set_accelerator_type_with_type_only(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_accelerator_type('NVIDIA_TESLA_K80') @@ -311,8 +310,8 @@ def test_set_accelerator_type_with_type_only(self): def test_set_accelerator_type_with_accelerator_count(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_accelerator_limit('5').set_accelerator_type('TPU_V3') @@ -323,8 +322,8 @@ def test_set_accelerator_type_with_accelerator_count(self): def test_set_env_variable(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_env_variable('env_name', 'env_value') @@ -332,8 +331,8 @@ def test_set_env_variable(self): def test_set_display_name(self): task = pipeline_task.PipelineTask( - component_spec=load_yaml_utilities - ._load_component_spec_from_yaml_documents(V2_YAML), + component_spec=structures.ComponentSpec.from_yaml_documents( + V2_YAML), args={'input1': 'value'}, ) task.set_display_name('test_name') diff --git a/sdk/python/kfp/dsl/placeholders.py b/sdk/python/kfp/dsl/placeholders.py new file mode 100644 index 0000000000..39a2617cff --- /dev/null +++ b/sdk/python/kfp/dsl/placeholders.py @@ -0,0 +1,458 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains data structures and functions for handling input and output +placeholders.""" + +import abc +import json +from typing import Any, Dict, List, Optional, Union + +from kfp.dsl import utils +from kfp.dsl.types import type_utils + + +class Placeholder(abc.ABC): + + @abc.abstractmethod + def _to_string(self) -> str: + raise NotImplementedError + + def __str__(self) -> str: + """Enables use of placeholders in f-strings. + + To be overridden by container placeholders ConcatPlaceholder and + IfPresentPlaceholder, which cannot be used in an f-string. + """ + return self._to_string() + + def __eq__(self, other: Any) -> bool: + """Used for comparing placeholders in tests.""" + return isinstance(other, + self.__class__) and self.__dict__ == other.__dict__ + + +class ExecutorInputPlaceholder(Placeholder): + + def _to_string(self) -> str: + return '{{$}}' + + +class InputValuePlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.input_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.inputs.parameters['{self.input_name}']}}}}" + + +class InputListOfArtifactsPlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.input_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.inputs.artifacts['{self.input_name}']}}}}" + + def __getattribute__(self, name: str) -> Any: + if name in {'name', 'uri', 'metadata', 'path'}: + raise AttributeError( + f'Cannot access an attribute on a list of artifacts in a Custom Container Component. Found reference to attribute {name!r} on {self.input_name!r}. Please pass the whole list of artifacts only.' + ) + else: + return object.__getattribute__(self, name) + + def __getitem__(self, k: int) -> None: + raise KeyError( + f'Cannot access individual artifacts in a list of artifacts. Found access to element {k} on {self.input_name!r}. Please pass the whole list of artifacts only.' + ) + + +class OutputListOfArtifactsPlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.output_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.outputs.artifacts['{self.output_name}']}}}}" + + def __getattribute__(self, name: str) -> Any: + if name in {'name', 'uri', 'metadata', 'path'}: + raise AttributeError( + f'Cannot access an attribute on a list of artifacts in a Custom Container Component. Found reference to attribute {name!r} on {self.output_name!r}. Please pass the whole list of artifacts only.' + ) + else: + return object.__getattribute__(self, name) + + def __getitem__(self, k: int) -> None: + raise KeyError( + f'Cannot access individual artifacts in a list of artifacts. Found access to element {k} on {self.output_name!r}. Please pass the whole list of artifacts only.' + ) + + +class InputPathPlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.input_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.inputs.artifacts['{self.input_name}'].path}}}}" + + +class InputUriPlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.input_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.inputs.artifacts['{self.input_name}'].uri}}}}" + + +class InputMetadataPlaceholder(Placeholder): + + def __init__(self, input_name: str) -> None: + self.input_name = input_name + + def _to_string(self) -> str: + return f"{{{{$.inputs.artifacts['{self.input_name}'].metadata}}}}" + + def __getitem__(self, key: str) -> str: + return f"{{{{$.inputs.artifacts['{self.input_name}'].metadata['{key}']}}}}" + + +class OutputParameterPlaceholder(Placeholder): + + def __init__(self, output_name: str) -> None: + self.output_name = output_name + + def _to_string(self) -> str: + return f"{{{{$.outputs.parameters['{self.output_name}'].output_file}}}}" + + +class OutputPathPlaceholder(Placeholder): + + def __init__(self, output_name: str) -> None: + self.output_name = output_name + + def _to_string(self) -> str: + return f"{{{{$.outputs.artifacts['{self.output_name}'].path}}}}" + + +class OutputUriPlaceholder(Placeholder): + + def __init__(self, output_name: str) -> None: + self.output_name = output_name + + def _to_string(self) -> str: + return f"{{{{$.outputs.artifacts['{self.output_name}'].uri}}}}" + + +class OutputMetadataPlaceholder(Placeholder): + + def __init__(self, output_name: str) -> None: + self.output_name = output_name + + def _to_string(self) -> str: + return f"{{{{$.outputs.artifacts['{self.output_name}'].metadata}}}}" + + def __getitem__(self, key: str) -> str: + return f"{{{{$.outputs.artifacts['{self.output_name}'].metadata['{key}']}}}}" + + +class ConcatPlaceholder(Placeholder): + """Placeholder for concatenating multiple strings. May contain other + placeholders. + + Args: + items: Elements to concatenate. + + Examples: + :: + + @container_component + def container_with_concat_placeholder(text1: str, text2: Output[Dataset], + output_path: OutputPath(str)): + return ContainerSpec( + image='python:3.7', + command=[ + 'my_program', + ConcatPlaceholder(['prefix-', text1, text2.uri]) + ], + args=['--output_path', output_path] + ) + """ + + def __init__(self, items: List['CommandLineElement']) -> None: + for item in items: + if isinstance(item, IfPresentPlaceholder): + item._validate_then_and_else_are_only_single_element() + self.items = items + + def _to_dict(self) -> Dict[str, Any]: + return { + 'Concat': [ + convert_command_line_element_to_string_or_struct(item) + for item in self.items + ] + } + + def _to_string(self) -> str: + return json.dumps(self._to_dict()) + + def __str__(self) -> str: + raise ValueError( + f'Cannot use {self.__class__.__name__} in an f-string.') + + +class IfPresentPlaceholder(Placeholder): + """Placeholder for handling cases where an input may or may not be passed. + May contain other placeholders. + + Args: + input_name: Name of the input/output. + then: If the input/output specified in name is present, the command-line argument will be replaced at run-time by the value of then. + else_: If the input/output specified in name is not present, the command-line argument will be replaced at run-time by the value of else_. + + Examples: + :: + + @container_component + def container_with_if_placeholder(output_path: OutputPath(str), + dataset: Output[Dataset], + optional_input: str = 'default'): + return ContainerSpec( + image='python:3.7', + command=[ + 'my_program', + IfPresentPlaceholder( + input_name='optional_input', + then=[optional_input], + else_=['no_input']), '--dataset', + IfPresentPlaceholder( + input_name='optional_input', then=[dataset.uri], else_=['no_dataset']) + ], + args=['--output_path', output_path] + ) + """ + + def __init__( + self, + input_name: str, + then: Union['CommandLineElement', List['CommandLineElement']], + else_: Optional[Union['CommandLineElement', + List['CommandLineElement']]] = None, + ) -> None: + self.input_name = input_name + self.then = then + self.else_ = else_ + + def _validate_then_and_else_are_only_single_element(self) -> None: + """Rercursively validate that then and else contain only a single + element. + + This method should only be called by a ConcatPlaceholder, which + cannot have an IfPresentPlaceholder with a list in either 'then' + or 'else_'. + """ + + # the illegal state + if isinstance(self.then, list) or isinstance(self.else_, list): + raise ValueError( + f'Cannot use {IfPresentPlaceholder.__name__} within {ConcatPlaceholder.__name__} when `then` and `else_` arguments to {IfPresentPlaceholder.__name__} are lists. Please use a single element for `then` and `else_` only.' + ) + + # check that there is no illegal state found recursively + if isinstance(self.then, ConcatPlaceholder): + for item in self.then.items: + if isinstance(item, IfPresentPlaceholder): + item._validate_then_and_else_are_only_single_element() + elif isinstance(self.then, IfPresentPlaceholder): + self.then._validate_then_and_else_are_only_single_element() + + if isinstance(self.else_, ConcatPlaceholder): + for item in self.else_.items: + if isinstance(item, IfPresentPlaceholder): + item._validate_then_and_else_are_only_single_element() + elif isinstance(self.else_, IfPresentPlaceholder): + self.else_._validate_then_and_else_are_only_single_element() + + def _to_dict(self) -> Dict[str, Any]: + struct = { + 'IfPresent': { + 'InputName': + self.input_name, + 'Then': [ + convert_command_line_element_to_string_or_struct(e) + for e in self.then + ] if isinstance(self.then, list) else + convert_command_line_element_to_string_or_struct( + self.then) + } + } + if self.else_: + struct['IfPresent']['Else'] = [ + convert_command_line_element_to_string_or_struct(e) + for e in self.else_ + ] if isinstance( + self.else_, + list) else convert_command_line_element_to_string_or_struct( + self.else_) + return struct + + def _to_string(self) -> str: + return json.dumps(self._to_dict()) + + def __str__(self) -> str: + raise ValueError( + f'Cannot use {self.__class__.__name__} in an f-string.') + + +_CONTAINER_PLACEHOLDERS = (IfPresentPlaceholder, ConcatPlaceholder) +PRIMITIVE_INPUT_PLACEHOLDERS = (InputValuePlaceholder, InputPathPlaceholder, + InputUriPlaceholder, InputMetadataPlaceholder, + InputListOfArtifactsPlaceholder) +PRIMITIVE_OUTPUT_PLACEHOLDERS = (OutputParameterPlaceholder, + OutputPathPlaceholder, OutputUriPlaceholder, + OutputMetadataPlaceholder, + OutputListOfArtifactsPlaceholder) + +CommandLineElement = Union[str, Placeholder] + + +def convert_command_line_element_to_string( + element: Union[str, Placeholder]) -> str: + return element._to_string() if isinstance(element, Placeholder) else element + + +def convert_command_line_element_to_string_or_struct( + element: Union[Placeholder, Any]) -> Any: + if isinstance(element, Placeholder): + return element._to_dict() if isinstance( + element, _CONTAINER_PLACEHOLDERS) else element._to_string() + + return element + + +def maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + arg: Dict[str, Any], + component_dict: Dict[str, Any]) -> Union[CommandLineElement, Any]: + if isinstance(arg, str): + return arg + + if not isinstance(arg, dict): + raise ValueError + + has_one_entry = len(arg) == 1 + + if not has_one_entry: + raise ValueError( + f'Got unexpected dictionary {arg}. Expected a dictionary with one entry.' + ) + + first_key = list(arg.keys())[0] + first_value = list(arg.values())[0] + if first_key == 'inputValue': + return InputValuePlaceholder( + input_name=utils.sanitize_input_name(first_value)) + + elif first_key == 'inputPath': + return InputPathPlaceholder( + input_name=utils.sanitize_input_name(first_value)) + + elif first_key == 'inputUri': + return InputUriPlaceholder( + input_name=utils.sanitize_input_name(first_value)) + + elif first_key == 'outputPath': + outputs = component_dict['outputs'] + for output in outputs: + if output['name'] == first_value: + type_ = output.get('type') + is_parameter = type_utils.is_parameter_type(type_) + if is_parameter: + return OutputParameterPlaceholder( + output_name=utils.sanitize_input_name(first_value)) + else: + return OutputPathPlaceholder( + output_name=utils.sanitize_input_name(first_value)) + raise ValueError( + f'{first_value} not found in component outputs. Could not process placeholders. Component spec: {component_dict}.' + ) + + elif first_key == 'outputUri': + return OutputUriPlaceholder( + output_name=utils.sanitize_input_name(first_value)) + + elif first_key == 'ifPresent': + structure_kwargs = arg['ifPresent'] + structure_kwargs['input_name'] = structure_kwargs.pop('inputName') + structure_kwargs['otherwise'] = structure_kwargs.pop('else') + structure_kwargs['then'] = [ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + e, component_dict=component_dict) + for e in structure_kwargs['then'] + ] + structure_kwargs['otherwise'] = [ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + e, component_dict=component_dict) + for e in structure_kwargs['otherwise'] + ] + return IfPresentPlaceholder(**structure_kwargs) + + elif first_key == 'concat': + return ConcatPlaceholder(items=[ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + e, component_dict=component_dict) for e in arg['concat'] + ]) + + elif first_key == 'executorInput': + return ExecutorInputPlaceholder() + + elif 'if' in arg: + if_ = arg['if'] + input_name = utils.sanitize_input_name(if_['cond']['isPresent']) + then = if_['then'] + else_ = if_.get('else') + + if isinstance(then, list): + then = [ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + val, component_dict=component_dict) for val in then + ] + else: + then = maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + then, component_dict=component_dict) + + if else_ is None: + pass + elif isinstance(else_, list): + else_ = [ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + val, component_dict=component_dict) for val in else_ + ] + else: + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + else_, component_dict=component_dict) + + return IfPresentPlaceholder( + input_name=input_name, then=then, else_=else_) + + elif 'concat' in arg: + + return ConcatPlaceholder(items=[ + maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + val, component_dict=component_dict) for val in arg['concat'] + ]) + else: + raise TypeError(f'Unexpected argument {arg} of type {type(arg)}.') diff --git a/sdk/python/kfp/dsl-test/placeholders_test.py b/sdk/python/kfp/dsl/placeholders_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/placeholders_test.py rename to sdk/python/kfp/dsl/placeholders_test.py diff --git a/sdk/python/kfp/dsl/python_component.py b/sdk/python/kfp/dsl/python_component.py new file mode 100644 index 0000000000..faa4c44740 --- /dev/null +++ b/sdk/python/kfp/dsl/python_component.py @@ -0,0 +1,44 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python function-based component.""" + +from typing import Callable + +from kfp.dsl import base_component +from kfp.dsl import structures + + +class PythonComponent(base_component.BaseComponent): + """A component defined via Python function. + + **Note:** ``PythonComponent`` is not intended to be used to construct components directly. Use ``@kfp.dsl.component`` instead. + + Args: + component_spec: Component definition. + python_func: Python function that becomes the implementation of this component. + """ + + def __init__( + self, + component_spec: structures.ComponentSpec, + python_func: Callable, + ): + super().__init__(component_spec=component_spec) + self.python_func = python_func + + self._prevent_using_output_lists_of_artifacts() + + def execute(self, **kwargs): + """Executes the Python function that defines the component.""" + return self.python_func(**kwargs) diff --git a/sdk/python/kfp/dsl/structures.py b/sdk/python/kfp/dsl/structures.py new file mode 100644 index 0000000000..3e627617c8 --- /dev/null +++ b/sdk/python/kfp/dsl/structures.py @@ -0,0 +1,1075 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definitions for component spec.""" + +import ast +import collections +import dataclasses +import itertools +import re +from typing import Any, Dict, List, Mapping, Optional, Tuple, Union +import uuid + +from google.protobuf import json_format +import kfp +from kfp.dsl import placeholders +from kfp.dsl import utils +from kfp.dsl import v1_components +from kfp.dsl import v1_structures +from kfp.dsl.container_component_artifact_channel import \ + ContainerComponentArtifactChannel +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils +from kfp.pipeline_spec import pipeline_spec_pb2 +import yaml + + +@dataclasses.dataclass +class InputSpec: + """Component input definitions. + + Attributes: + type: The type of the input. + default (optional): the default value for the input. + optional: Wether the input is optional. An input is optional when it has an explicit default value. + is_artifact_list: True if `type` represents a list of the artifact type. Only applies when `type` is an artifact. + description: Input description. + """ + type: Union[str, dict] + default: Optional[Any] = None + optional: bool = False + # This special flag for lists of artifacts allows type to be used the same way for list of artifacts and single artifacts. This is aligned with how IR represents lists of artifacts (same as for single artifacts), as well as simplifies downstream type handling/checking operations in the SDK since we don't need to parse the string `type` to determine if single artifact or list. + is_artifact_list: bool = False + description: Optional[str] = None + + def __post_init__(self) -> None: + self._validate_type() + self._validate_usage_of_optional() + + @classmethod + def from_ir_component_inputs_dict( + cls, ir_component_inputs_dict: Dict[str, Any]) -> 'InputSpec': + """Creates an InputSpec from a ComponentInputsSpec message in dict + format (pipeline_spec.components..inputDefinitions.parameters.). + + Args: + ir_component_inputs_dict (Dict[str, Any]): The ComponentInputsSpec + message in dict format. + + Returns: + InputSpec: The InputSpec object. + """ + if 'parameterType' in ir_component_inputs_dict: + type_string = ir_component_inputs_dict['parameterType'] + type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string) + if type_ is None: + raise ValueError(f'Unknown type {type_string} found in IR.') + default_value = ir_component_inputs_dict.get('defaultValue') + # fallback to checking if the parameter has a default value, + # since some IR compiled with kfp<=2.0.0b8 will have defaults + # without isOptional=True + optional = ir_component_inputs_dict.get( + 'isOptional', 'defaultValue' in ir_component_inputs_dict) + return InputSpec( + type=type_, default=default_value, optional=optional) + + else: + type_ = ir_component_inputs_dict['artifactType']['schemaTitle'] + schema_version = ir_component_inputs_dict['artifactType'][ + 'schemaVersion'] + # TODO: would be better to extract these fields from the proto + # message, as False default would be preserved + optional = ir_component_inputs_dict.get('isOptional', False) + is_artifact_list = ir_component_inputs_dict.get( + 'isArtifactList', False) + return InputSpec( + type=type_utils.create_bundled_artifact_type( + type_, schema_version), + optional=optional, + is_artifact_list=is_artifact_list) + + def __eq__(self, other: Any) -> bool: + """Equality comparison for InputSpec. Robust to different type + representations, such that it respects the maximum amount of + information possible to encode in IR. That is, because + `typing.List[str]` can only be represented a `List` in IR, + 'typing.List' == 'List' in this comparison. + + Args: + other (Any): The object to compare to InputSpec. + + Returns: + bool: True if the objects are equal, False otherwise. + """ + if isinstance(other, InputSpec): + return type_utils.get_canonical_name_for_outer_generic( + self.type) == type_utils.get_canonical_name_for_outer_generic( + other.type) and self.default == other.default + else: + return False + + def _validate_type(self) -> None: + """Type should either be a parameter or a valid bundled artifact type + by the time it gets to InputSpec. + + This allows us to perform fewer checks downstream. + """ + # TODO: add transformation logic so that we don't have to transform inputs at every place they are used, including v1 back compat support + if not spec_type_is_parameter(self.type): + type_utils.validate_bundled_artifact_type(self.type) + + def _validate_usage_of_optional(self) -> None: + """Validates that the optional and default properties are in consistent + states.""" + # Because None can be the default value, None cannot be used to to indicate no default. This is why we need the optional field. This check prevents users of InputSpec from setting these two values to an inconsistent state, forcing users of InputSpec to be explicit about optionality. + if self.optional is False and self.default is not None: + raise ValueError( + f'`optional` argument to {self.__class__.__name__} must be True if `default` is not None.' + ) + + +@dataclasses.dataclass +class OutputSpec: + """Component output definitions. + + Attributes: + type: The type of the output. + is_artifact_list: True if `type` represents a list of the artifact type. Only applies when `type` is an artifact. + description: Output description. + """ + type: Union[str, dict] + # This special flag for lists of artifacts allows type to be used the same way for list of artifacts and single artifacts. This is aligned with how IR represents lists of artifacts (same as for single artifacts), as well as simplifies downstream type handling/checking operations in the SDK since we don't need to parse the string `type` to determine if single artifact or list. + is_artifact_list: bool = False + description: Optional[str] = None + + def __post_init__(self) -> None: + self._validate_type() + + @classmethod + def from_ir_component_outputs_dict( + cls, ir_component_outputs_dict: Dict[str, Any]) -> 'OutputSpec': + """Creates an OutputSpec from a ComponentOutputsSpec message in dict + format (pipeline_spec.components..outputDefinitions.parameters|artifacts.). + + Args: + ir_component_outputs_dict (Dict[str, Any]): The ComponentOutputsSpec + in dict format. + + Returns: + OutputSpec: The OutputSpec object. + """ + if 'parameterType' in ir_component_outputs_dict: + type_string = ir_component_outputs_dict['parameterType'] + type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string) + if type_ is None: + raise ValueError(f'Unknown type {type_string} found in IR.') + return OutputSpec(type=type_,) + else: + type_ = ir_component_outputs_dict['artifactType']['schemaTitle'] + schema_version = ir_component_outputs_dict['artifactType'][ + 'schemaVersion'] + is_artifact_list = ir_component_outputs_dict.get( + 'isArtifactList', False) + return OutputSpec( + type=type_utils.create_bundled_artifact_type( + type_, schema_version), + is_artifact_list=is_artifact_list) + + def __eq__(self, other: Any) -> bool: + """Equality comparison for OutputSpec. Robust to different type + representations, such that it respects the maximum amount of + information possible to encode in IR. That is, because + `typing.List[str]` can only be represented a `List` in IR, + 'typing.List' == 'List' in this comparison. + + Args: + other (Any): The object to compare to OutputSpec. + + Returns: + bool: True if the objects are equal, False otherwise. + """ + if isinstance(other, OutputSpec): + return type_utils.get_canonical_name_for_outer_generic( + self.type) == type_utils.get_canonical_name_for_outer_generic( + other.type) + else: + return False + + def _validate_type(self): + """Type should either be a parameter or a valid bundled artifact type + by the time it gets to OutputSpec. + + This allows us to perform fewer checks downstream. + """ + # TODO: add transformation logic so that we don't have to transform outputs at every place they are used, including v1 back compat support + if not spec_type_is_parameter(self.type): + type_utils.validate_bundled_artifact_type(self.type) + + +def spec_type_is_parameter(type_: str) -> bool: + in_memory_type = type_annotations.maybe_strip_optional_from_annotation_string( + type_utils.get_canonical_name_for_outer_generic(type_)) + + return in_memory_type in type_utils.IN_MEMORY_SPEC_TYPE_TO_IR_TYPE or in_memory_type == 'PipelineTaskFinalStatus' + + +@dataclasses.dataclass +class ResourceSpec: + """The resource requirements of a container execution. + + Attributes: + cpu_request (optional): the requirement of the number of vCPU cores. + cpu_limit (optional): the limit of the number of vCPU cores. + memory_request (optional): the memory requirement in GB. + memory_limit (optional): the memory limit in GB. + accelerator_type (optional): the type of accelerators attached to the + container. + accelerator_count (optional): the number of accelerators attached. + """ + cpu_request: Optional[float] = None + cpu_limit: Optional[float] = None + memory_request: Optional[float] = None + memory_limit: Optional[float] = None + accelerator_type: Optional[str] = None + accelerator_count: Optional[int] = None + + +@dataclasses.dataclass +class ContainerSpec: + """Container definition. + + This is only used for pipeline authors when constructing a containerized component + using @container_component decorator. + + Examples: + :: + + @container_component + def container_with_artifact_output( + num_epochs: int, # built-in types are parsed as inputs + model: Output[Model], + model_config_path: OutputPath(str), + ): + return ContainerSpec( + image='gcr.io/my-image', + command=['sh', 'run.sh'], + args=[ + '--epochs', + num_epochs, + '--model_path', + model.uri, + '--model_config_path', + model_config_path, + ]) + """ + image: str + """Container image.""" + + command: Optional[List[placeholders.CommandLineElement]] = None + """Container entrypoint.""" + + args: Optional[List[placeholders.CommandLineElement]] = None + """Arguments to the container entrypoint.""" + + +@dataclasses.dataclass +class ContainerSpecImplementation: + """Container implementation definition.""" + image: str + """Container image.""" + + command: Optional[List[placeholders.CommandLineElement]] = None + """Container entrypoint.""" + + args: Optional[List[placeholders.CommandLineElement]] = None + """Arguments to the container entrypoint.""" + + env: Optional[Mapping[str, placeholders.CommandLineElement]] = None + """Environment variables to be passed to the container.""" + + resources: Optional[ResourceSpec] = None + """Specification on the resource requirements.""" + + def __post_init__(self) -> None: + self._transform_command() + self._transform_args() + self._transform_env() + + def _transform_command(self) -> None: + """Use None instead of empty list for command.""" + self.command = None if self.command == [] else self.command + + def _transform_args(self) -> None: + """Use None instead of empty list for args.""" + self.args = None if self.args == [] else self.args + + def _transform_env(self) -> None: + """Use None instead of empty dict for env.""" + self.env = None if self.env == {} else self.env + + @classmethod + def from_container_spec( + cls, + container_spec: ContainerSpec) -> 'ContainerSpecImplementation': + return ContainerSpecImplementation( + image=container_spec.image, + command=container_spec.command, + args=container_spec.args, + env=None, + resources=None) + + @classmethod + def from_container_dict( + cls, container_dict: Dict[str, + Any]) -> 'ContainerSpecImplementation': + """Creates a ContainerSpecImplementation from a PipelineContainerSpec + message in dict format + (pipeline_spec.deploymentSpec.executors..container). + + Args: + container_dict (Dict[str, Any]): PipelineContainerSpec message in dict format. + + Returns: + ContainerSpecImplementation: The ContainerSpecImplementation instance. + """ + + return ContainerSpecImplementation( + image=container_dict['image'], + command=container_dict.get('command'), + args=container_dict.get('args'), + env=container_dict.get('env'), + resources=None) # can only be set on tasks + + +@dataclasses.dataclass +class RetryPolicy: + """The retry policy of a container execution. + + Attributes: + num_retries (int): Number of times to retry on failure. + backoff_duration (int): The the number of seconds to wait before triggering a retry. + backoff_factor (float): The exponential backoff factor applied to backoff_duration. For example, if backoff_duration="60" (60 seconds) and backoff_factor=2, the first retry will happen after 60 seconds, then after 120, 240, and so on. + backoff_max_duration (int): The maximum duration during which the task will be retried. + """ + max_retry_count: Optional[int] = None + backoff_duration: Optional[str] = None + backoff_factor: Optional[float] = None + backoff_max_duration: Optional[str] = None + + def to_proto(self) -> pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy: + # include defaults so that IR is more reflective of runtime behavior + max_retry_count = self.max_retry_count or 0 + backoff_duration = self.backoff_duration or '0s' + backoff_factor = self.backoff_factor or 2.0 + backoff_max_duration = self.backoff_max_duration or '3600s' + + # include max duration seconds cap so that IR is more reflective of runtime behavior + backoff_duration_seconds = f'{convert_duration_to_seconds(backoff_duration)}s' + backoff_max_duration_seconds = f'{min(convert_duration_to_seconds(backoff_max_duration), 3600)}s' + + return json_format.ParseDict( + { + 'max_retry_count': max_retry_count, + 'backoff_duration': backoff_duration_seconds, + 'backoff_factor': backoff_factor, + 'backoff_max_duration': backoff_max_duration_seconds, + }, pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy()) + + +@dataclasses.dataclass +class TaskSpec: + """The spec of a pipeline task. + + Attributes: + name: The name of the task. + inputs: The sources of task inputs. Constant values or PipelineParams. + dependent_tasks: The list of upstream tasks. + component_ref: The name of a component spec this task is based on. + trigger_condition (optional): an expression which will be evaluated into + a boolean value. True to trigger the task to run. + trigger_strategy (optional): when the task will be ready to be triggered. + Valid values include: "TRIGGER_STRATEGY_UNSPECIFIED", + "ALL_UPSTREAM_TASKS_SUCCEEDED", and "ALL_UPSTREAM_TASKS_COMPLETED". + iterator_items (optional): the items to iterate on. A constant value or + a PipelineParam. + iterator_item_input (optional): the name of the input which has the item + from the [items][] collection. + enable_caching (optional): whether or not to enable caching for the task. + Default is True. + display_name (optional): the display name of the task. If not specified, + the task name will be used as the display name. + """ + name: str + inputs: Mapping[str, Any] + dependent_tasks: List[str] + component_ref: str + trigger_condition: Optional[str] = None + trigger_strategy: Optional[str] = None + iterator_items: Optional[Any] = None + iterator_item_input: Optional[str] = None + enable_caching: bool = True + display_name: Optional[str] = None + retry_policy: Optional[RetryPolicy] = None + + +@dataclasses.dataclass +class ImporterSpec: + """ImporterSpec definition. + + Attributes: + artifact_uri: The URI of the artifact. + schema_title: The schema_title of the artifact. + schema_version: The schema_version of the artifact. + reimport: Whether or not import an artifact regardless it has been + imported before. + metadata (optional): the properties of the artifact. + """ + artifact_uri: str + schema_title: str + schema_version: str + reimport: bool + metadata: Optional[Mapping[str, Any]] = None + + +@dataclasses.dataclass +class Implementation: + """Implementation definition. + + Attributes: + container: container implementation details. + graph: graph implementation details. + importer: importer implementation details. + """ + container: Optional[ContainerSpecImplementation] = None + importer: Optional[ImporterSpec] = None + # Use type forward reference to skip the type validation in BaseModel. + graph: Optional['pipeline_spec_pb2.PipelineSpec'] = None + + @classmethod + def from_pipeline_spec_dict(cls, pipeline_spec_dict: Dict[str, Any], + component_name: str) -> 'Implementation': + """Creates an Implementation object from a PipelineSpec message in dict + format. + + Args: + pipeline_spec_dict (Dict[str, Any]): PipelineSpec message in dict format. + component_name (str): The name of the component. + + Returns: + Implementation: An implementation object. + """ + executor_key = utils.sanitize_executor_label(component_name) + executor = pipeline_spec_dict['deploymentSpec']['executors'].get( + executor_key) + if executor is not None: + container_spec = ContainerSpecImplementation.from_container_dict( + executor['container']) if executor else None + return Implementation(container=container_spec) + else: + pipeline_spec = json_format.ParseDict( + pipeline_spec_dict, pipeline_spec_pb2.PipelineSpec()) + return Implementation(graph=pipeline_spec) + + +def check_placeholder_references_valid_io_name( + inputs_dict: Dict[str, InputSpec], + outputs_dict: Dict[str, OutputSpec], + arg: placeholders.CommandLineElement, +) -> None: + """Validates input/output placeholders refer to an existing input/output. + + Args: + valid_inputs: The existing input names. + valid_outputs: The existing output names. + arg: The placeholder argument for checking. + + Raises: + ValueError: if any placeholder references a nonexistant input or + output. + TypeError: if any argument is neither a str nor a placeholder + instance. + """ + if isinstance(arg, ContainerComponentArtifactChannel): + raise ValueError( + 'Cannot access artifact by itself in the container definition. Please use .uri or .path instead to access the artifact.' + ) + elif isinstance(arg, placeholders.PRIMITIVE_INPUT_PLACEHOLDERS): + if arg.input_name not in inputs_dict: + raise ValueError( + f'Argument "{arg.__class__.__name__}" references nonexistant input: "{arg.input_name}".' + ) + elif isinstance(arg, placeholders.PRIMITIVE_OUTPUT_PLACEHOLDERS): + if arg.output_name not in outputs_dict: + raise ValueError( + f'Argument "{arg.__class__.__name__}" references nonexistant output: "{arg.output_name}".' + ) + elif isinstance(arg, placeholders.IfPresentPlaceholder): + if arg.input_name not in inputs_dict: + raise ValueError( + f'Argument "{arg.__class__.__name__}" references nonexistant input: "{arg.input_name}".' + ) + + all_normalized_args: List[placeholders.CommandLineElement] = [] + if arg.then is None: + pass + elif isinstance(arg.then, list): + all_normalized_args.extend(arg.then) + else: + all_normalized_args.append(arg.then) + + if arg.else_ is None: + pass + elif isinstance(arg.else_, list): + all_normalized_args.extend(arg.else_) + else: + all_normalized_args.append(arg.else_) + + for arg in all_normalized_args: + check_placeholder_references_valid_io_name(inputs_dict, + outputs_dict, arg) + elif isinstance(arg, placeholders.ConcatPlaceholder): + for arg in arg.items: + check_placeholder_references_valid_io_name(inputs_dict, + outputs_dict, arg) + elif not isinstance( + arg, placeholders.ExecutorInputPlaceholder) and not isinstance( + arg, str): + raise TypeError(f'Unexpected argument "{arg}" of type {type(arg)}.') + + +@dataclasses.dataclass +class ComponentSpec: + """The definition of a component. + + Attributes: + name: The name of the component. + description (optional): the description of the component. + inputs (optional): the input definitions of the component. + outputs (optional): the output definitions of the component. + implementation: The implementation of the component. Either an executor + (container, importer) or a DAG consists of other components. + """ + name: str + implementation: Implementation + description: Optional[str] = None + inputs: Optional[Dict[str, InputSpec]] = None + outputs: Optional[Dict[str, OutputSpec]] = None + platform_spec: pipeline_spec_pb2.PlatformSpec = dataclasses.field( + default_factory=pipeline_spec_pb2.PlatformSpec) + + def __post_init__(self) -> None: + self._transform_name() + self._transform_inputs() + self._transform_outputs() + self._validate_placeholders() + + def _transform_name(self) -> None: + """Converts the name to a valid name.""" + self.name = utils.maybe_rename_for_k8s(self.name) + + def _transform_inputs(self) -> None: + """Use None instead of empty list for inputs.""" + self.inputs = None if self.inputs == {} else self.inputs + + def _transform_outputs(self) -> None: + """Use None instead of empty list for outputs.""" + self.outputs = None if self.outputs == {} else self.outputs + + def _validate_placeholders(self): + """Validates that input/output placeholders refer to an existing + input/output.""" + if self.implementation.container is None: + return + + valid_inputs = {} if self.inputs is None else self.inputs + valid_outputs = {} if self.outputs is None else self.outputs + for arg in itertools.chain( + (self.implementation.container.command or []), + (self.implementation.container.args or [])): + check_placeholder_references_valid_io_name(valid_inputs, + valid_outputs, arg) + + @classmethod + def from_v1_component_spec( + cls, + v1_component_spec: v1_structures.ComponentSpec) -> 'ComponentSpec': + """Converts V1 ComponentSpec to V2 ComponentSpec. + + Args: + v1_component_spec: The V1 ComponentSpec. + + Returns: + Component spec in the form of V2 ComponentSpec. + + Raises: + ValueError: If implementation is not found. + TypeError: If any argument is neither a str nor Dict. + """ + component_dict = v1_component_spec.to_dict() + if component_dict.get('implementation') is None: + raise ValueError('Implementation field not found') + + if 'implementation' not in component_dict or 'container' not in component_dict[ + 'implementation']: + raise NotImplementedError('Container implementation not found.') + + container = component_dict['implementation']['container'] + command = [ + placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + command, component_dict=component_dict) + for command in container.get('command', []) + ] + args = [ + placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + command, component_dict=component_dict) + for command in container.get('args', []) + ] + env = { + key: + placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( + command, component_dict=component_dict) + for key, command in container.get('env', {}).items() + } + container_spec = ContainerSpecImplementation.from_container_dict({ + 'image': container['image'], + 'command': command, + 'args': args, + 'env': env + }) + + inputs = {} + for spec in component_dict.get('inputs', []): + type_ = spec.get('type') + optional = spec.get('optional', False) or 'default' in spec + default = spec.get('default') + default = type_utils.deserialize_v1_component_yaml_default( + type_=type_, default=default) + + if isinstance(type_, str): + type_ = type_utils.get_canonical_name_for_outer_generic(type_) + + if isinstance(type_, str) and type_ == 'PipelineTaskFinalStatus': + inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( + type=type_, optional=True) + continue + + elif isinstance(type_, str) and type_.lower( + ) in type_utils.PARAMETER_TYPES_MAPPING: + type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] + ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( + type_enum) + in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ + ir_parameter_type_name] + inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( + type=in_memory_parameter_type_name, + default=default, + optional=optional, + ) + continue + + elif isinstance(type_, str) and re.match( + type_utils._GOOGLE_TYPES_PATTERN, type_): + schema_title = type_ + schema_version = type_utils._GOOGLE_TYPES_VERSION + + elif isinstance(type_, str) and type_.lower( + ) in type_utils._ARTIFACT_CLASSES_MAPPING: + artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[ + type_.lower()] + schema_title = artifact_class.schema_title + schema_version = artifact_class.schema_version + + elif type_ is None or isinstance(type_, dict) or type_.lower( + ) not in type_utils._ARTIFACT_CLASSES_MAPPING: + schema_title = artifact_types.Artifact.schema_title + schema_version = artifact_types.Artifact.schema_version + + else: + raise ValueError(f'Unknown input: {type_}') + + if optional: + # handles optional artifacts with no default value + inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( + type=type_utils.create_bundled_artifact_type( + schema_title, schema_version), + default=default, + optional=optional, + ) + else: + inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( + type=type_utils.create_bundled_artifact_type( + schema_title, schema_version)) + + outputs = {} + for spec in component_dict.get('outputs', []): + type_ = spec.get('type') + if isinstance(type_, str): + type_ = type_utils.get_canonical_name_for_outer_generic(type_) + + if isinstance(type_, str) and type_.lower( + ) in type_utils.PARAMETER_TYPES_MAPPING: + type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] + ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( + type_enum) + in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ + ir_parameter_type_name] + outputs[utils.sanitize_input_name(spec['name'])] = OutputSpec( + type=in_memory_parameter_type_name) + continue + + elif isinstance(type_, str) and re.match( + type_utils._GOOGLE_TYPES_PATTERN, type_): + schema_title = type_ + schema_version = type_utils._GOOGLE_TYPES_VERSION + + elif isinstance(type_, str) and type_.lower( + ) in type_utils._ARTIFACT_CLASSES_MAPPING: + artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[ + type_.lower()] + schema_title = artifact_class.schema_title + schema_version = artifact_class.schema_version + + elif type_ is None or isinstance(type_, dict) or type_.lower( + ) not in type_utils._ARTIFACT_CLASSES_MAPPING: + schema_title = artifact_types.Artifact.schema_title + schema_version = artifact_types.Artifact.schema_version + + else: + raise ValueError(f'Unknown output: {type_}') + + outputs[utils.sanitize_input_name(spec['name'])] = OutputSpec( + type=type_utils.create_bundled_artifact_type( + schema_title, schema_version)) + + return ComponentSpec( + name=component_dict.get('name', 'name'), + description=component_dict.get('description'), + implementation=Implementation(container=container_spec), + inputs=inputs, + outputs=outputs, + ) + + @classmethod + def from_ir_dicts( + cls, + pipeline_spec_dict: dict, + platform_spec_dict: dict, + ) -> 'ComponentSpec': + """Creates a ComponentSpec from the PipelineSpec and PlatformSpec + messages as dicts.""" + raw_name = pipeline_spec_dict['pipelineInfo']['name'] + + def inputs_dict_from_component_spec_dict( + component_spec_dict: Dict[str, Any]) -> Dict[str, InputSpec]: + parameters = component_spec_dict.get('inputDefinitions', + {}).get('parameters', {}) + artifacts = component_spec_dict.get('inputDefinitions', + {}).get('artifacts', {}) + all_inputs = {**parameters, **artifacts} + return { + name: InputSpec.from_ir_component_inputs_dict(input_dict) + for name, input_dict in all_inputs.items() + } + + def outputs_dict_from_component_spec_dict( + components_spec_dict: Dict[str, Any]) -> Dict[str, OutputSpec]: + parameters = component_spec_dict.get('outputDefinitions', + {}).get('parameters', {}) + artifacts = components_spec_dict.get('outputDefinitions', + {}).get('artifacts', {}) + all_outputs = {**parameters, **artifacts} + return { + name: OutputSpec.from_ir_component_outputs_dict(output_dict) + for name, output_dict in all_outputs.items() + } + + def extract_description_from_command( + commands: List[str]) -> Union[str, None]: + for command in commands: + if isinstance(command, str) and 'import kfp' in command: + for node in ast.walk(ast.parse(command)): + if isinstance( + node, + (ast.FunctionDef, ast.ClassDef, ast.Module)): + docstring = ast.get_docstring(node) + if docstring: + return docstring + return None + + component_key = utils.sanitize_component_name(raw_name) + component_spec_dict = pipeline_spec_dict['components'].get( + component_key, pipeline_spec_dict['root']) + + inputs = inputs_dict_from_component_spec_dict(component_spec_dict) + outputs = outputs_dict_from_component_spec_dict(component_spec_dict) + + implementation = Implementation.from_pipeline_spec_dict( + pipeline_spec_dict, raw_name) + + description = extract_description_from_command( + implementation.container.command or + []) if implementation.container else None + + platform_spec = pipeline_spec_pb2.PlatformSpec() + json_format.ParseDict(platform_spec_dict, platform_spec) + + return ComponentSpec( + name=raw_name, + implementation=implementation, + description=description, + inputs=inputs, + outputs=outputs, + platform_spec=platform_spec, + ) + + @classmethod + def from_yaml_documents(cls, component_yaml: str) -> 'ComponentSpec': + """Loads V1 or V2 component YAML into a ComponentSpec. + + Args: + component_yaml: PipelineSpec and optionally PlatformSpec YAML documents as a single string. + + Returns: + ComponentSpec: The ComponentSpec object. + """ + + def extract_description(component_yaml: str) -> Union[str, None]: + heading = '# Description: ' + multi_line_description_prefix = '# ' + index_of_heading = 2 + if heading in component_yaml: + description = component_yaml.splitlines()[index_of_heading] + + # Multi line + comments = component_yaml.splitlines() + index = index_of_heading + 1 + while comments[index][:len(multi_line_description_prefix + )] == multi_line_description_prefix: + description += '\n' + comments[index][ + len(multi_line_description_prefix) + 1:] + index += 1 + + return description[len(heading):] + else: + return None + + pipeline_spec_dict, platform_spec_dict = load_documents_from_yaml( + component_yaml) + + is_v1 = 'implementation' in set(pipeline_spec_dict.keys()) + if is_v1: + v1_component = v1_components._load_component_spec_from_component_text( + component_yaml) + return cls.from_v1_component_spec(v1_component) + else: + component_spec = ComponentSpec.from_ir_dicts( + pipeline_spec_dict, platform_spec_dict) + if not component_spec.description: + component_spec.description = extract_description( + component_yaml=component_yaml) + return component_spec + + def save_to_component_yaml(self, output_file: str) -> None: + """Saves ComponentSpec into IR YAML file. + + Args: + output_file: File path to store the component yaml. + """ + from kfp.compiler import pipeline_spec_builder as builder + + pipeline_spec = self.to_pipeline_spec() + builder.write_pipeline_spec_to_file( + pipeline_spec, + None, + pipeline_spec_pb2.PlatformSpec(), + output_file, + ) + + def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Creates a pipeline instance and constructs the pipeline spec for a + single component. + + Args: + component_spec: The ComponentSpec to convert to PipelineSpec. + + Returns: + A PipelineSpec proto representing the compiled component. + """ + # import here to aviod circular module dependency + from kfp.compiler import compiler_utils + from kfp.compiler import pipeline_spec_builder as builder + from kfp.dsl import pipeline_channel + from kfp.dsl import pipeline_task + from kfp.dsl import tasks_group + + args_dict = {} + pipeline_inputs = self.inputs or {} + + for arg_name, input_spec in pipeline_inputs.items(): + args_dict[arg_name] = pipeline_channel.create_pipeline_channel( + name=arg_name, + channel_type=input_spec.type, + is_artifact_list=input_spec.is_artifact_list) + + task = pipeline_task.PipelineTask(self, args_dict) + + # instead of constructing a pipeline with pipeline_context.Pipeline, + # just build the single task group + group = tasks_group.TasksGroup( + group_type=tasks_group.TasksGroupType.PIPELINE) + group.tasks.append(task) + + group.name = uuid.uuid4().hex + + pipeline_name = self.name + task_group = group + + pipeline_outputs = {} + pipeline_output_spec = self.outputs or {} + + for arg_name, output_spec in pipeline_output_spec.items(): + pipeline_outputs[ + arg_name] = pipeline_channel.create_pipeline_channel( + name=arg_name, + channel_type=output_spec.type, + task_name=task.name) + + utils.validate_pipeline_name(pipeline_name) + + pipeline_spec = pipeline_spec_pb2.PipelineSpec() + pipeline_spec.pipeline_info.name = pipeline_name + pipeline_spec.sdk_version = f'kfp-{kfp.__version__}' + # Schema version 2.1.0 is required for kfp-pipeline-spec>0.1.13 + pipeline_spec.schema_version = '2.1.0' + + # if we decide to surface component outputs to pipeline level, + # can just assign the component_spec_proto directly to .root + component_spec_proto = builder._build_component_spec_from_component_spec_structure( + self) + pipeline_spec.root.CopyFrom(component_spec_proto) + + builder._build_dag_outputs( + component_spec=pipeline_spec.root, dag_outputs=pipeline_outputs) + + deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() + root_group = task_group + + task_name_to_parent_groups, group_name_to_parent_groups = compiler_utils.get_parent_groups( + root_group) + + def get_inputs(task_group: tasks_group.TasksGroup, + task_name_to_parent_groups): + inputs = collections.defaultdict(set) + if len(task_group.tasks) != 1: + raise ValueError( + f'Error compiling component. Expected one task in task group, got {len(task_group.tasks)}.' + ) + only_task = task_group.tasks[0] + if only_task.channel_inputs: + for group_name in task_name_to_parent_groups[only_task.name]: + inputs[group_name].add((only_task.channel_inputs[-1], None)) + return inputs + + inputs = get_inputs(task_group, task_name_to_parent_groups) + + builder.build_spec_by_group( + pipeline_spec=pipeline_spec, + deployment_config=deployment_config, + group=root_group, + inputs=inputs, + outputs=collections.defaultdict( + dict), # empty -- no sub-DAG outputs to surface + dependencies={}, # no dependencies for single-component pipeline + rootgroup_name=root_group.name, + task_name_to_parent_groups=task_name_to_parent_groups, + group_name_to_parent_groups=group_name_to_parent_groups, + name_to_for_loop_group={}, # no for loop in single-component pipeline + platform_spec=pipeline_spec_pb2.PlatformSpec( + ), # no PlatformSpec single-component pipeline + is_compiled_component=True, + ) + + return pipeline_spec + + +def normalize_time_string(duration: str) -> str: + """Normalizes a time string. + Examples: + - '1 hour' -> '1h' + - '2 hours' -> '2h' + - '2hours' -> '2h' + - '2 w' -> '2w' + - '2w' -> '2w' + Args: + duration (str): The unnormalized duration string. + Returns: + str: The normalized duration string. + """ + no_ws_duration = duration.replace(' ', '') + duration_split = [el for el in re.split(r'(\D+)', no_ws_duration) if el] + + if len(duration_split) != 2: + raise ValueError( + f"Invalid duration string: '{duration}'. Expected one value (as integer in string) and one unit, such as '1 hour'." + ) + + value = duration_split[0] + unit = duration_split[1] + + first_letter_of_unit = unit[0] + return value + first_letter_of_unit + + +def convert_duration_to_seconds(duration: str) -> int: + """Converts a duration string to seconds. + + Args: + duration (str): The unnormalized duration string. (e.g. '1h', '1 hour', '2 + hours', '2w', '2 weeks', '2d', etc.) + Raises: + ValueError: If the time unit is not one of seconds, minutes, hours, days, + or weeks. + Returns: + int: The number of seconds in the duration. + """ + duration = normalize_time_string(duration) + seconds_per_unit = {'s': 1, 'm': 60, 'h': 3_600, 'd': 86_400, 'w': 604_800} + if duration[-1] not in seconds_per_unit.keys(): + raise ValueError( + f"Unsupported duration unit: '{duration[-1]}' for '{duration}'.") + return int(duration[:-1]) * seconds_per_unit[duration[-1]] + + +def load_documents_from_yaml(component_yaml: str) -> Tuple[dict, dict]: + """Loads up to two YAML documents from a YAML string. + + First document must always be present. If second document is + present, it is returned as a dict, else an empty dict. + """ + documents = list(yaml.safe_load_all(component_yaml)) + num_docs = len(documents) + if num_docs == 1: + pipeline_spec_dict = documents[0] + platform_spec_dict = {} + elif num_docs == 2: + pipeline_spec_dict = documents[0] + platform_spec_dict = documents[1] + else: + raise ValueError( + f'Expected one or two YAML documents in the IR YAML file. Got: {num_docs}.' + ) + return pipeline_spec_dict, platform_spec_dict diff --git a/sdk/python/kfp/dsl-test/structures_test.py b/sdk/python/kfp/dsl/structures_test.py similarity index 94% rename from sdk/python/kfp/dsl-test/structures_test.py rename to sdk/python/kfp/dsl/structures_test.py index d36a34e57f..ad6274d931 100644 --- a/sdk/python/kfp/dsl-test/structures_test.py +++ b/sdk/python/kfp/dsl/structures_test.py @@ -22,7 +22,6 @@ from kfp import compiler from kfp import components from kfp import dsl -from kfp.components import load_yaml_utilities from kfp.dsl import component_factory from kfp.dsl import placeholders from kfp.dsl import structures @@ -264,7 +263,7 @@ def test_simple_component_spec_save_to_component_yaml(self): # test that it can be read back correctly with open(output_path, 'r') as f: contents = f.read() - new_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + new_component_spec = structures.ComponentSpec.from_yaml_documents( contents) self.assertEqual(original_component_spec, new_component_spec) @@ -319,7 +318,7 @@ def test_simple_component_spec_load_from_v2_component_yaml(self): sdkVersion: kfp-2.0.0-alpha.2 """) - generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + generated_spec = structures.ComponentSpec.from_yaml_documents( component_yaml_v2) expected_spec = structures.ComponentSpec( @@ -360,8 +359,7 @@ def test_simple_component_spec_load_from_v2_component_yaml(self): ) def test_component_spec_placeholder_load_from_v2_component_yaml( self, yaml, expected_component): - generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( - yaml) + generated_spec = structures.ComponentSpec.from_yaml_documents(yaml) self.assertEqual(generated_spec, expected_component) def test_component_spec_load_from_v1_component_yaml(self): @@ -390,7 +388,7 @@ def test_component_spec_load_from_v1_component_yaml(self): - {outputPath: Output 2} """) - generated_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + generated_spec = structures.ComponentSpec.from_yaml_documents( component_yaml_v1) expected_spec = structures.ComponentSpec( @@ -641,7 +639,7 @@ def test_from_ir_component_outputs_dict(self): class TestReadInComponent(parameterized.TestCase): def test_read_v1(self): - component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + component_spec = structures.ComponentSpec.from_yaml_documents( V1_YAML_IF_PLACEHOLDER) self.assertEqual(component_spec.name, 'component-if') self.assertEqual(component_spec.implementation.container.image, @@ -696,7 +694,7 @@ def test_simple_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + loaded_component_spec = structures.ComponentSpec.from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='component1', @@ -764,7 +762,7 @@ def test_if_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + loaded_component_spec = structures.ComponentSpec.from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='if', @@ -835,7 +833,7 @@ def test_concat_placeholder(self): parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") - loaded_component_spec = load_yaml_utilities._load_component_spec_from_yaml_documents( + loaded_component_spec = structures.ComponentSpec.from_yaml_documents( compiled_yaml) component_spec = structures.ComponentSpec( name='concat', @@ -1115,5 +1113,47 @@ def test_load_noncanonical_v1_generic_types(self): self.assertEqual(outputs['output4'].type, 'Dict') +class TestLoadDocumentsFromYAML(unittest.TestCase): + + def test_no_documents(self): + with self.assertRaisesRegex( + ValueError, + r'Expected one or two YAML documents in the IR YAML file\. Got\: 0\.' + ): + structures.load_documents_from_yaml('') + + def test_one_document(self): + doc1, doc2 = structures.load_documents_from_yaml( + textwrap.dedent("""\ + key1: value1 + """)) + self.assertEqual(doc1, {'key1': 'value1'}) + self.assertEqual(doc2, {}) + + def test_two_documents(self): + doc1, doc2 = structures.load_documents_from_yaml( + textwrap.dedent("""\ + key1: value1 + --- + key2: value2 + """)) + self.assertEqual(doc1, {'key1': 'value1'}) + self.assertEqual(doc2, {'key2': 'value2'}) + + def test_three_documents(self): + with self.assertRaisesRegex( + ValueError, + r'Expected one or two YAML documents in the IR YAML file\. Got\: 3\.' + ): + structures.load_documents_from_yaml( + textwrap.dedent("""\ + key3: value3 + --- + key3: value3 + --- + key3: value3 + """)) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/task_final_status.py b/sdk/python/kfp/dsl/task_final_status.py new file mode 100644 index 0000000000..bd2386d2d6 --- /dev/null +++ b/sdk/python/kfp/dsl/task_final_status.py @@ -0,0 +1,55 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definition for PipelineTaskFinalStatus.""" + +import dataclasses +from typing import Optional + + +@dataclasses.dataclass +class PipelineTaskFinalStatus: + """A final status of a pipeline task. Annotate a component parameter with + this class to obtain a handle to a task's status (see example). + + This is the Python representation of the proto message `PipelineTaskFinalStatus `_. + + Examples: + :: + + @dsl.component + def task_status(user_input: str, status: PipelineTaskFinalStatus): + print('Pipeline status: ', status.state) + print('Job resource name: ', status.pipeline_job_resource_name) + print('Pipeline task name: ', status.pipeline_task_name) + print('Error code: ', status.error_code) + print('Error message: ', status.error_message) + + @dsl.pipeline(name='my_pipeline') + def my_pipeline(): + task = task_status(user_input='my_input') + """ + state: str + """Final state of the task. The value could be one of ``'SUCCEEDED'``, ``'FAILED'`` or ``'CANCELLED'``.""" + + pipeline_job_resource_name: str + """Pipeline job resource name, in the format of ``projects/{project}/locations/{location}/pipelineJobs/{pipeline_job}``.""" + + pipeline_task_name: str + """Name of the task that produced this status.""" + + error_code: Optional[int] + """The `google.rpc.Code `_ in case of error. If state is ``'SUCCEEDED'``, this is ``None``.""" + + error_message: Optional[str] + """In case of error, the detailed error message. If state is ``'SUCCEEDED'``, this is ``None``.""" diff --git a/sdk/python/kfp/dsl/tasks_group.py b/sdk/python/kfp/dsl/tasks_group.py new file mode 100644 index 0000000000..42d1446a9d --- /dev/null +++ b/sdk/python/kfp/dsl/tasks_group.py @@ -0,0 +1,230 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definition for TasksGroup.""" + +import enum +from typing import Optional, Union + +from kfp.dsl import for_loop +from kfp.dsl import pipeline_channel +from kfp.dsl import pipeline_context +from kfp.dsl import pipeline_task + + +class TasksGroupType(str, enum.Enum): + """Types of TasksGroup.""" + PIPELINE = 'pipeline' + CONDITION = 'condition' + FOR_LOOP = 'for-loop' + EXIT_HANDLER = 'exit-handler' + + +class TasksGroup: + """Represents a logical group of tasks and groups of TasksGroups. + + This class is the base class for groups of tasks, such as tasks + sharing an exit handler, a condition branch, or a loop. This class + is not supposed to be used by pipeline authors. It is useful for + implementing a compiler. + + Attributes: + group_type: The type of the TasksGroup. + tasks: A list of all PipelineTasks in this group. + groups: A list of TasksGroups in this group. + display_name: The optional user given name of the group. + dependencies: A list of tasks or groups this group depends on. + is_root: If TasksGroup is root group. + """ + + def __init__( + self, + group_type: TasksGroupType, + name: Optional[str] = None, + is_root: bool = False, + ): + """Create a new instance of TasksGroup. + + Args: + group_type: The type of the group. + name: The name of the group. Used as display name in UI. + """ + self.group_type = group_type + self.tasks = [] + self.groups = [] + self.display_name = name + self.dependencies = [] + self.is_root = is_root + + def __enter__(self): + if not pipeline_context.Pipeline.get_default_pipeline(): + raise ValueError('Default pipeline not defined.') + + self._make_name_unique() + + pipeline_context.Pipeline.get_default_pipeline().push_tasks_group(self) + return self + + def __exit__(self, *unused_args): + pipeline_context.Pipeline.get_default_pipeline().pop_tasks_group() + + def _make_name_unique(self): + """Generates a unique TasksGroup name in the pipeline.""" + if not pipeline_context.Pipeline.get_default_pipeline(): + raise ValueError('Default pipeline not defined.') + + group_id = pipeline_context.Pipeline.get_default_pipeline( + ).get_next_group_id() + self.name = f'{self.group_type.value}-{group_id}' + self.name = self.name.replace('_', '-') + + def remove_task_recursive(self, task: pipeline_task.PipelineTask): + """Removes a task from the group recursively.""" + if self.tasks and task in self.tasks: + self.tasks.remove(task) + for group in self.groups or []: + group.remove_task_recursive(task) + + +class ExitHandler(TasksGroup): + """A class for setting an exit handler task that is invoked upon exiting a + group of other tasks. + + Args: + exit_task: The task that is invoked after exiting a group of other tasks. + name: The name of the exit handler group. + + Example: + :: + + exit_task = ExitComponent(...) + with ExitHandler(exit_task): + task1 = my_component1(...) + task2 = my_component2(...) + """ + + def __init__( + self, + exit_task: pipeline_task.PipelineTask, + name: Optional[str] = None, + ): + """Initializes a Condition task group.""" + super().__init__( + group_type=TasksGroupType.EXIT_HANDLER, + name=name, + is_root=False, + ) + + if exit_task.dependent_tasks: + raise ValueError('exit_task cannot depend on any other tasks.') + + # Removing exit_task form any group + pipeline_context.Pipeline.get_default_pipeline( + ).remove_task_from_groups(exit_task) + + # Set is_exit_handler since the compiler might be using this attribute. + exit_task.is_exit_handler = True + + self.exit_task = exit_task + + +class Condition(TasksGroup): + """A class for creating conditional control flow within a pipeline + definition. + + Args: + condition: A comparative expression that evaluates to True or False. At least one of the operands must be an output from an upstream task or a pipeline parameter. + name: The name of the condition group. + + Example: + :: + + task1 = my_component1(...) + with Condition(task1.output=='pizza', 'pizza-condition'): + task2 = my_component2(...) + """ + + def __init__( + self, + condition: pipeline_channel.ConditionOperator, + name: Optional[str] = None, + ): + """Initializes a conditional task group.""" + super().__init__( + group_type=TasksGroupType.CONDITION, + name=name, + is_root=False, + ) + self.condition = condition + + +class ParallelFor(TasksGroup): + """A class for creating parallelized for loop control flow over a static + set of items within a pipeline definition. + + Args: + items: The items to loop over. It can be either a constant Python list or a list output from an upstream task. + name: The name of the for loop group. + parallelism: The maximum number of concurrent iterations that can be scheduled for execution. A value of 0 represents unconstrained parallelism (default is unconstrained). + + Example: + :: + + with dsl.ParallelFor( + items=[{'a': 1, 'b': 10}, {'a': 2, 'b': 20}], + parallelism=1 + ) as item: + task1 = my_component(..., number=item.a) + task2 = my_component(..., number=item.b) + + In the example, the group of tasks containing ``task1`` and ``task2`` would + be executed twice, once with case ``args=[{'a': 1, 'b': 10}]`` and once with + case ``args=[{'a': 2, 'b': 20}]``. The ``parallelism=1`` setting causes only + 1 execution to be scheduled at a time. + """ + + def __init__( + self, + items: Union[for_loop.ItemList, pipeline_channel.PipelineChannel], + name: Optional[str] = None, + parallelism: Optional[int] = None, + ): + """Initializes a for loop task group.""" + parallelism = parallelism or 0 + if parallelism < 0: + raise ValueError( + f'ParallelFor parallelism must be >= 0. Got: {parallelism}.') + + super().__init__( + group_type=TasksGroupType.FOR_LOOP, + name=name, + is_root=False, + ) + + if isinstance(items, pipeline_channel.PipelineChannel): + self.loop_argument = for_loop.LoopArgument.from_pipeline_channel( + items) + self.items_is_pipeline_channel = True + else: + self.loop_argument = for_loop.LoopArgument.from_raw_items( + raw_items=items, + name_code=pipeline_context.Pipeline.get_default_pipeline() + .get_next_group_id(), + ) + self.items_is_pipeline_channel = False + + self.parallelism_limit = parallelism + + def __enter__(self) -> for_loop.LoopArgument: + super().__enter__() + return self.loop_argument diff --git a/sdk/python/kfp/dsl-test/tasks_group_test.py b/sdk/python/kfp/dsl/tasks_group_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/tasks_group_test.py rename to sdk/python/kfp/dsl/tasks_group_test.py diff --git a/test/presubmit-test-kfp-dsl-runtime-code.sh b/sdk/python/kfp/dsl/types/__init__.py old mode 100755 new mode 100644 similarity index 62% rename from test/presubmit-test-kfp-dsl-runtime-code.sh rename to sdk/python/kfp/dsl/types/__init__.py index d8c94dc06b..b4447dd583 --- a/test/presubmit-test-kfp-dsl-runtime-code.sh +++ b/sdk/python/kfp/dsl/types/__init__.py @@ -1,5 +1,4 @@ -#!/bin/bash -ex -# Copyright 2023 Kubeflow Pipelines contributors +# Copyright 2021 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,13 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -source_root=$(pwd) - -pip install --upgrade pip -pip install -e $source_root/sdk/python/kfp-dsl -pip install pyyaml -pip install $(grep 'absl-py==' sdk/python/requirements-dev.txt) -pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) - -pytest sdk/python/kfp-dsl diff --git a/sdk/python/kfp/dsl/types/artifact_types.py b/sdk/python/kfp/dsl/types/artifact_types.py new file mode 100644 index 0000000000..2c6999c2d8 --- /dev/null +++ b/sdk/python/kfp/dsl/types/artifact_types.py @@ -0,0 +1,472 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Classes for input/output Artifacts in KFP SDK.""" + +from typing import Dict, List, Optional, Type + +_GCS_LOCAL_MOUNT_PREFIX = '/gcs/' +_MINIO_LOCAL_MOUNT_PREFIX = '/minio/' +_S3_LOCAL_MOUNT_PREFIX = '/s3/' + + +class Artifact: + """Represents a generic machine learning artifact. + + This class and all artifact classes store the name, uri, and metadata for a machine learning artifact. Use this artifact type when an artifact does not fit into another more specific artifact type (e.g., ``Model``, ``Dataset``). + + Args: + name: Name of the artifact. + uri: The artifact's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the artifact. + + Example: + :: + + from kfp import dsl + from kfp.dsl import Output, Artifact, Input + + + @dsl.component + def create_artifact( + data: str, + output_artifact: Output[Artifact], + ): + with open(output_artifact.path, 'w') as f: + f.write(data) + + + @dsl.component + def use_artifact(input_artifact: Input[Artifact]): + with open(input_artifact.path) as input_file: + artifact_contents = input_file.read() + print(artifact_contents) + + + @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my/storage') + def my_pipeline(): + create_task = create_artifact(data='my data') + use_artifact(input_artifact=create_task.outputs['output_artifact']) + + Note: Other artifacts are used similarly to the usage of ``Artifact`` in the example above (within ``Input[]`` and ``Output[]``). + """ + schema_title = 'system.Artifact' + schema_version = '0.0.1' + + def __init__(self, + name: Optional[str] = None, + uri: Optional[str] = None, + metadata: Optional[Dict] = None) -> None: + """Initializes the Artifact with the given name, URI and metadata.""" + self.uri = uri or '' + self.name = name or '' + self.metadata = metadata or {} + + @property + def path(self) -> str: + return self._get_path() + + @path.setter + def path(self, path: str) -> None: + self._set_path(path) + + def _get_path(self) -> Optional[str]: + if self.uri.startswith('gs://'): + return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):] + elif self.uri.startswith('minio://'): + return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):] + elif self.uri.startswith('s3://'): + return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):] + return None + + def _set_path(self, path: str) -> None: + if path.startswith(_GCS_LOCAL_MOUNT_PREFIX): + path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):] + elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX): + path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):] + elif path.startswith(_S3_LOCAL_MOUNT_PREFIX): + path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):] + self.uri = path + + +class Model(Artifact): + """An artifact representing a machine learning model. + + Args: + name: Name of the model. + uri: The model's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the model. + """ + schema_title = 'system.Model' + + @property + def framework(self) -> str: + return self._get_framework() + + def _get_framework(self) -> str: + return self.metadata.get('framework', '') + + @framework.setter + def framework(self, framework: str) -> None: + self._set_framework(framework) + + def _set_framework(self, framework: str) -> None: + self.metadata['framework'] = framework + + +class Dataset(Artifact): + """An artifact representing a machine learning dataset. + + Args: + name: Name of the dataset. + uri: The dataset's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the dataset. + """ + schema_title = 'system.Dataset' + + +class Metrics(Artifact): + """An artifact for storing key-value scalar metrics. + + Args: + name: Name of the metrics artifact. + uri: The metrics artifact's location on disk or cloud storage. + metadata: Key-value scalar metrics. + """ + schema_title = 'system.Metrics' + + def log_metric(self, metric: str, value: float) -> None: + """Sets a custom scalar metric in the artifact's metadata. + + Args: + metric: The metric key. + value: The metric value. + """ + self.metadata[metric] = value + + +class ClassificationMetrics(Artifact): + """An artifact for storing classification metrics. + + Args: + name: Name of the metrics artifact. + uri: The metrics artifact's location on disk or cloud storage. + metadata: The key-value scalar metrics. + """ + schema_title = 'system.ClassificationMetrics' + + def log_roc_data_point(self, fpr: float, tpr: float, + threshold: float) -> None: + """Logs a single data point in the ROC curve to metadata. + + Args: + fpr: False positive rate value of the data point. + tpr: True positive rate value of the data point. + threshold: Threshold value for the data point. + """ + + roc_reading = { + 'confidenceThreshold': threshold, + 'recall': tpr, + 'falsePositiveRate': fpr + } + if 'confidenceMetrics' not in self.metadata.keys(): + self.metadata['confidenceMetrics'] = [] + + self.metadata['confidenceMetrics'].append(roc_reading) + + def log_roc_curve(self, fpr: List[float], tpr: List[float], + threshold: List[float]) -> None: + """Logs an ROC curve to metadata. + + Args: + fpr: List of false positive rate values. + tpr: List of true positive rate values. + threshold: List of threshold values. + + Raises: + ValueError: If the lists ``fpr``, ``tpr`` and ``threshold`` are not the same length. + """ + if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len( + tpr) != len(threshold): + raise ValueError( + f'Length of fpr, tpr and threshold must be the same. Got lengths {len(fpr)}, {len(tpr)} and {len(threshold)} respectively.' + ) + + for i in range(len(fpr)): + self.log_roc_data_point( + fpr=fpr[i], tpr=tpr[i], threshold=threshold[i]) + + def set_confusion_matrix_categories(self, categories: List[str]) -> None: + """Stores confusion matrix categories to metadata. + + Args: + categories: List of strings specifying the categories. + """ + + self._categories = [] + annotation_specs = [] + for category in categories: + annotation_spec = {'displayName': category} + self._categories.append(category) + annotation_specs.append(annotation_spec) + + self._matrix = [] + for row in range(len(self._categories)): + self._matrix.append({'row': [0] * len(self._categories)}) + + self._confusion_matrix = { + 'annotationSpecs': annotation_specs, + 'rows': self._matrix + } + + self.metadata['confusionMatrix'] = self._confusion_matrix + + def log_confusion_matrix_row(self, row_category: str, + row: List[float]) -> None: + """Logs a confusion matrix row to metadata. + + Args: + row_category: Category to which the row belongs. + row: List of integers specifying the values for the row. + + Raises: + ValueError: If ``row_category`` is not in the list of categories + set in ``set_categories`` call. + """ + if row_category not in self._categories: + raise ValueError( + f'Invalid category: {row_category} passed. Expected one of: {self._categories}' + ) + + if len(row) != len(self._categories): + raise ValueError( + f'Invalid row. Expected size: {len(self._categories)} got: {len(row)}' + ) + + self._matrix[self._categories.index(row_category)] = {'row': row} + self.metadata['confusionMatrix'] = self._confusion_matrix + + def log_confusion_matrix_cell(self, row_category: str, col_category: str, + value: int) -> None: + """Logs a cell in the confusion matrix to metadata. + + Args: + row_category: String representing the name of the row category. + col_category: String representing the name of the column category. + value: Value of the cell. + + Raises: + ValueError: If ``row_category`` or ``col_category`` is not in the list of + categories set in ``set_categories``. + """ + if row_category not in self._categories: + raise ValueError( + f'Invalid category: {row_category} passed. Expected one of: {self._categories}' + ) + + if col_category not in self._categories: + raise ValueError( + f'Invalid category: {row_category} passed. Expected one of: {self._categories}' + ) + + self._matrix[self._categories.index(row_category)]['row'][ + self._categories.index(col_category)] = value + self.metadata['confusionMatrix'] = self._confusion_matrix + + def log_confusion_matrix(self, categories: List[str], + matrix: List[List[int]]) -> None: + """Logs a confusion matrix to metadata. + + Args: + categories: List of the category names. + matrix: Complete confusion matrix. + + Raises: + ValueError: If the length of ``categories`` does not match number of rows or columns of ``matrix``. + """ + self.set_confusion_matrix_categories(categories) + + if len(matrix) != len(categories): + raise ValueError( + f'Invalid matrix: {matrix} passed for categories: {categories}') + + for index in range(len(categories)): + if len(matrix[index]) != len(categories): + raise ValueError( + f'Invalid matrix: {matrix} passed for categories: {categories}' + ) + + self.log_confusion_matrix_row(categories[index], matrix[index]) + + self.metadata['confusionMatrix'] = self._confusion_matrix + + +class SlicedClassificationMetrics(Artifact): + """An artifact for storing sliced classification metrics. + + Similar to ``ClassificationMetrics``, tasks using this class are + expected to use log methods of the class to log metrics with the + difference being each log method takes a slice to associate the + ``ClassificationMetrics``. + + Args: + name: Name of the metrics artifact. + uri: The metrics artifact's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the metrics artifact. + """ + + schema_title = 'system.SlicedClassificationMetrics' + + def _upsert_classification_metrics_for_slice(self, slice: str) -> None: + """Upserts the classification metrics instance for a slice.""" + if slice not in self._sliced_metrics: + self._sliced_metrics[slice] = ClassificationMetrics() + + def _update_metadata(self, slice: str) -> None: + """Updates metadata to adhere to the metrics schema.""" + self.metadata = {'evaluationSlices': []} + for slice in self._sliced_metrics.keys(): + slice_metrics = { + 'slice': + slice, + 'sliceClassificationMetrics': + self._sliced_metrics[slice].metadata + } + self.metadata['evaluationSlices'].append(slice_metrics) + + def log_roc_reading(self, slice: str, threshold: float, tpr: float, + fpr: float) -> None: + """Logs a single data point in the ROC curve of a slice to metadata. + + Args: + slice: String representing slice label. + threshold: Thresold value for the data point. + tpr: True positive rate value of the data point. + fpr: False positive rate value of the data point. + """ + + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr) + self._update_metadata(slice) + + def load_roc_readings(self, slice: str, + readings: List[List[float]]) -> None: + """Bulk loads ROC curve readings for a slice. + + Args: + slice: String representing slice label. + readings: A 2-dimensional list providing ROC curve data points. The expected order of the data points is: threshold, true positive rate, false positive rate. + """ + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].load_roc_readings(readings) + self._update_metadata(slice) + + def set_confusion_matrix_categories(self, slice: str, + categories: List[str]) -> None: + """Logs confusion matrix categories for a slice to metadata. + + Categories are stored in the internal ``metrics_utils.ConfusionMatrix`` + instance of the slice. + + Args: + slice: String representing slice label. + categories: List of strings specifying the categories. + """ + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].set_confusion_matrix_categories(categories) + self._update_metadata(slice) + + def log_confusion_matrix_row(self, slice: str, row_category: str, + row: List[int]) -> None: + """Logs a confusion matrix row for a slice to metadata. + + Row is updated on the internal ``metrics_utils.ConfusionMatrix`` + instance of the slice. + + Args: + slice: String representing slice label. + row_category: Category to which the row belongs. + row: List of integers specifying the values for the row. + """ + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row) + self._update_metadata(slice) + + def log_confusion_matrix_cell(self, slice: str, row_category: str, + col_category: str, value: int) -> None: + """Logs a confusion matrix cell for a slice to metadata. + + Cell is updated on the internal ``metrics_utils.ConfusionMatrix`` + instance of the slice. + + Args: + slice: String representing slice label. + row_category: String representing the name of the row category. + col_category: String representing the name of the column category. + value: Value of the cell. + """ + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].log_confusion_matrix_cell( + row_category, col_category, value) + self._update_metadata(slice) + + def load_confusion_matrix(self, slice: str, categories: List[str], + matrix: List[List[int]]) -> None: + """Bulk loads the whole confusion matrix for a slice. + + Args: + slice: String representing slice label. + categories: List of the category names. + matrix: Complete confusion matrix. + """ + self._upsert_classification_metrics_for_slice(slice) + self._sliced_metrics[slice].log_confusion_matrix_cell( + categories, matrix) + self._update_metadata(slice) + + +class HTML(Artifact): + """An artifact representing an HTML file. + + Args: + name: Name of the HTML file. + uri: The HTML file's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the HTML file. + """ + schema_title = 'system.HTML' + + +class Markdown(Artifact): + """An artifact representing a markdown file. + + Args: + name: Name of the markdown file. + uri: The markdown file's location on disk or cloud storage. + metadata: Arbitrary key-value pairs about the markdown file. + """ + schema_title = 'system.Markdown' + + +_SCHEMA_TITLE_TO_TYPE: Dict[str, Type[Artifact]] = { + x.schema_title: x for x in [ + Artifact, + Model, + Dataset, + Metrics, + ClassificationMetrics, + SlicedClassificationMetrics, + HTML, + Markdown, + ] +} diff --git a/sdk/python/kfp/dsl-test/types/artifact_types_test.py b/sdk/python/kfp/dsl/types/artifact_types_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/types/artifact_types_test.py rename to sdk/python/kfp/dsl/types/artifact_types_test.py diff --git a/sdk/python/kfp/dsl/types/custom_artifact_types.py b/sdk/python/kfp/dsl/types/custom_artifact_types.py new file mode 100644 index 0000000000..484dfa6508 --- /dev/null +++ b/sdk/python/kfp/dsl/types/custom_artifact_types.py @@ -0,0 +1,191 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ast +import inspect +from typing import Callable, Dict, List, Union + +from kfp.dsl import component_factory +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils + +RETURN_PREFIX = 'return-' + + +def get_custom_artifact_type_import_statements(func: Callable) -> List[str]: + """Gets a list of custom artifact type import statements from a lightweight + Python component function.""" + artifact_imports = get_custom_artifact_import_items_from_function(func) + imports_source = [] + for obj_str in artifact_imports: + if '.' in obj_str: + path, name = obj_str.rsplit('.', 1) + imports_source.append(f'from {path} import {name}') + else: + imports_source.append(f'import {obj_str}') + return imports_source + + +def get_param_to_custom_artifact_class(func: Callable) -> Dict[str, type]: + """Gets a map of parameter names to custom artifact classes. + + Return key is 'return-' for normal returns and 'return-' for + typing.NamedTuple returns. + """ + param_to_artifact_cls: Dict[str, type] = {} + kfp_artifact_classes = set(type_utils._ARTIFACT_CLASSES_MAPPING.values()) + + signature = inspect.signature(func) + for name, param in signature.parameters.items(): + annotation = param.annotation + if type_annotations.is_Input_Output_artifact_annotation(annotation): + artifact_class = type_annotations.get_io_artifact_class(annotation) + if artifact_class not in kfp_artifact_classes: + param_to_artifact_cls[name] = artifact_class + elif type_annotations.is_artifact_class(annotation): + param_to_artifact_cls[name] = annotation + if artifact_class not in kfp_artifact_classes: + param_to_artifact_cls[name] = artifact_class + + return_annotation = signature.return_annotation + + if return_annotation is inspect.Signature.empty: + pass + + elif type_utils.is_typed_named_tuple_annotation(return_annotation): + for name, annotation in return_annotation.__annotations__.items(): + if type_annotations.is_artifact_class( + annotation) and annotation not in kfp_artifact_classes: + param_to_artifact_cls[f'{RETURN_PREFIX}{name}'] = annotation + + elif type_annotations.is_artifact_class( + return_annotation + ) and return_annotation not in kfp_artifact_classes: + param_to_artifact_cls[RETURN_PREFIX] = return_annotation + + return param_to_artifact_cls + + +def get_full_qualname_for_artifact(obj: type) -> str: + """Gets the fully qualified name for an object. For example, for class Foo + in module bar.baz, this function returns bar.baz.Foo. + + Note: typing.get_type_hints purports to do the same thing, but it behaves + differently when executed within the scope of a test, so preferring this + approach instead. + + Args: + obj: The class or module for which to get the fully qualified name. + + Returns: + The fully qualified name for the class. + """ + module = obj.__module__ + name = obj.__qualname__ + if module is not None: + name = module + '.' + name + return name + + +def get_symbol_import_path(artifact_class_base_symbol: str, + qualname: str) -> str: + """Gets the fully qualified name of the symbol that must be imported for + the custom artifact type annotation to be referenced successfully. + + Args: + artifact_class_base_symbol: The base symbol from which the artifact class is referenced (e.g., aiplatform for aiplatform.VertexDataset). + qualname: The fully qualified type annotation name as a string. + + Returns: + The fully qualified names of the module or type to import. + """ + split_qualname = qualname.split('.') + if artifact_class_base_symbol in split_qualname: + name_to_import = '.'.join( + split_qualname[:split_qualname.index(artifact_class_base_symbol) + + 1]) + else: + raise TypeError( + f"Module or type name aliases are not supported. You appear to be using an alias in your type annotation: '{qualname}'. This may be due to use of an 'as' statement in an import statement or a reassignment of a module or type to a new name. Reference the module and/or type using the name as defined in the source from which the module or type is imported." + ) + return name_to_import + + +def traverse_ast_node_values_to_get_id(obj: Union[ast.Slice, None]) -> str: + while not hasattr(obj, 'id'): + obj = getattr(obj, 'value') + return obj.id + + +def get_custom_artifact_base_symbol_for_parameter(func: Callable, + arg_name: str) -> str: + """Gets the symbol required for the custom artifact type annotation to be + referenced correctly.""" + module_node = ast.parse( + component_factory._get_function_source_definition(func)) + args = module_node.body[0].args.args + args = {arg.arg: arg for arg in args} + annotation = args[arg_name].annotation + return traverse_ast_node_values_to_get_id(annotation.slice) + + +def get_custom_artifact_base_symbol_for_return(func: Callable, + return_name: str) -> str: + """Gets the symbol required for the custom artifact type return annotation + to be referenced correctly.""" + module_node = ast.parse( + component_factory._get_function_source_definition(func)) + return_ann = module_node.body[0].returns + + if return_name == RETURN_PREFIX: + if isinstance(return_ann, (ast.Name, ast.Attribute)): + return traverse_ast_node_values_to_get_id(return_ann) + elif isinstance(return_ann, ast.Call): + func = return_ann.func + # handles NamedTuple and typing.NamedTuple + if (isinstance(func, ast.Attribute) and func.value.id == 'typing' and + func.attr == 'NamedTuple') or (isinstance(func, ast.Name) and + func.id == 'NamedTuple'): + nt_field_list = return_ann.args[1].elts + for el in nt_field_list: + if f'{RETURN_PREFIX}{el.elts[0].s}' == return_name: + return traverse_ast_node_values_to_get_id(el.elts[1]) + + raise TypeError(f"Unexpected type annotation '{return_ann}' for {func}.") + + +def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: + """Gets the fully qualified name of the symbol that must be imported for + the custom artifact type annotation to be referenced successfully from a + component function.""" + + param_to_ann_obj = get_param_to_custom_artifact_class(func) + import_items = [] + for param_name, artifact_class in param_to_ann_obj.items(): + + base_symbol = get_custom_artifact_base_symbol_for_return( + func, param_name + ) if param_name.startswith( + RETURN_PREFIX) else get_custom_artifact_base_symbol_for_parameter( + func, param_name) + artifact_qualname = get_full_qualname_for_artifact(artifact_class) + symbol_import_path = get_symbol_import_path(base_symbol, + artifact_qualname) + + # could use set here, but want to be have deterministic import ordering + # in compilation + if symbol_import_path not in import_items: + import_items.append(symbol_import_path) + + return import_items diff --git a/sdk/python/kfp/dsl-test/types/custom_artifact_types_test.py b/sdk/python/kfp/dsl/types/custom_artifact_types_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/types/custom_artifact_types_test.py rename to sdk/python/kfp/dsl/types/custom_artifact_types_test.py diff --git a/sdk/python/kfp/dsl-test/types/test_data/expected_bulk_loaded_confusion_matrix.json b/sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json similarity index 100% rename from sdk/python/kfp/dsl-test/types/test_data/expected_bulk_loaded_confusion_matrix.json rename to sdk/python/kfp/dsl/types/test_data/expected_bulk_loaded_confusion_matrix.json diff --git a/sdk/python/kfp/dsl-test/types/test_data/expected_confusion_matrix.json b/sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json similarity index 100% rename from sdk/python/kfp/dsl-test/types/test_data/expected_confusion_matrix.json rename to sdk/python/kfp/dsl/types/test_data/expected_confusion_matrix.json diff --git a/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_bulk_load_classification_metrics.json b/sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json similarity index 100% rename from sdk/python/kfp/dsl-test/types/test_data/expected_io_types_bulk_load_classification_metrics.json rename to sdk/python/kfp/dsl/types/test_data/expected_io_types_bulk_load_classification_metrics.json diff --git a/sdk/python/kfp/dsl-test/types/test_data/expected_io_types_classification_metrics.json b/sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json similarity index 100% rename from sdk/python/kfp/dsl-test/types/test_data/expected_io_types_classification_metrics.json rename to sdk/python/kfp/dsl/types/test_data/expected_io_types_classification_metrics.json diff --git a/sdk/python/kfp/dsl/types/type_annotations.py b/sdk/python/kfp/dsl/types/type_annotations.py new file mode 100644 index 0000000000..aa39d2002e --- /dev/null +++ b/sdk/python/kfp/dsl/types/type_annotations.py @@ -0,0 +1,245 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Classes for input/output type annotations in KFP SDK. + +These are only compatible with v2 Pipelines. +""" + +import re +from typing import List, Type, TypeVar, Union + +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations +from kfp.dsl.types import type_utils + + +class OutputPath: + """Type annotation used in component definitions for indicating a parameter + is a path to an output. The path parameter typed with this annotation can + be treated as a locally accessible filepath within the component body. + + The argument typed with this annotation is provided at runtime by the executing backend and does not need to be passed as an input by the pipeline author (see example). + + + Args: + type: The type of the value written to the output path. + + Example: + :: + + @dsl.component + def create_parameter( + message: str, + output_parameter_path: OutputPath(str), + ): + with open(output_parameter_path, 'w') as f: + f.write(message) + + + @dsl.component + def consume_parameter(message: str): + print(message) + + + @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my-bucket') + def my_pipeline(message: str = 'default message'): + create_param_op = create_parameter(message=message) + consume_parameter(message=create_param_op.outputs['output_parameter_path']) + """ + + def __init__(self, type=None): + self.type = construct_type_for_inputpath_or_outputpath(type) + + def __eq__(self, other): + return isinstance(other, OutputPath) and self.type == other.type + + +class InputPath: + """Type annotation used in component definitions for indicating a parameter + is a path to an input. + + Example: + :: + + @dsl.component + def create_dataset(dataset_path: OutputPath('Dataset'),): + import json + dataset = {'my_dataset': [[1, 2, 3], [4, 5, 6]]} + with open(dataset_path, 'w') as f: + json.dump(dataset, f) + + + @dsl.component + def consume_dataset(dataset: InputPath('Dataset')): + print(dataset) + + + @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my-bucket') + def my_pipeline(): + create_dataset_op = create_dataset() + consume_dataset(dataset=create_dataset_op.outputs['dataset_path']) + """ + + def __init__(self, type=None): + self.type = construct_type_for_inputpath_or_outputpath(type) + + def __eq__(self, other): + return isinstance(other, InputPath) and self.type == other.type + + +def construct_type_for_inputpath_or_outputpath( + type_: Union[str, Type, None]) -> Union[str, None]: + if type_annotations.is_artifact_class(type_): + return type_utils.create_bundled_artifact_type(type_.schema_title, + type_.schema_version) + elif isinstance( + type_, + str) and type_.lower() in type_utils._ARTIFACT_CLASSES_MAPPING: + # v1 artifact backward compat, e.g. dsl.OutputPath('Dataset') + return type_utils.create_bundled_artifact_type( + type_utils._ARTIFACT_CLASSES_MAPPING[type_.lower()].schema_title) + elif type_utils.get_parameter_type(type_): + return type_ + else: + # v1 unknown type dsl.OutputPath('MyCustomType') + return type_utils.create_bundled_artifact_type( + artifact_types.Artifact.schema_title) + + +class InputAnnotation: + """Marker type for input artifacts.""" + + +class OutputAnnotation: + """Marker type for output artifacts.""" + + +def is_Input_Output_artifact_annotation(typ) -> bool: + if not hasattr(typ, '__metadata__'): + return False + + if typ.__metadata__[0] not in [InputAnnotation, OutputAnnotation]: + return False + + return True + + +def is_input_artifact(typ) -> bool: + """Returns True if typ is of type Input[T].""" + if not is_Input_Output_artifact_annotation(typ): + return False + + return typ.__metadata__[0] == InputAnnotation + + +def is_output_artifact(typ) -> bool: + """Returns True if typ is of type Output[T].""" + if not is_Input_Output_artifact_annotation(typ): + return False + + return typ.__metadata__[0] == OutputAnnotation + + +def get_io_artifact_class(typ): + from kfp.dsl import Input + from kfp.dsl import Output + if not is_Input_Output_artifact_annotation(typ): + return None + if typ == Input or typ == Output: + return None + + # extract inner type from list of artifacts + inner = typ.__args__[0] + if hasattr(inner, '__origin__') and inner.__origin__ == list: + return inner.__args__[0] + + return inner + + +def get_io_artifact_annotation(typ): + if not is_Input_Output_artifact_annotation(typ): + return None + + return typ.__metadata__[0] + + +T = TypeVar('T') + + +def maybe_strip_optional_from_annotation(annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + + Args: + annotation: The original type annotation which may or may not has + `Optional`. + + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', + None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + return annotation + + +def maybe_strip_optional_from_annotation_string(annotation: str) -> str: + if annotation.startswith('Optional[') and annotation.endswith(']'): + return annotation.lstrip('Optional[').rstrip(']') + return annotation + + +def get_short_type_name(type_name: str) -> str: + """Extracts the short form type name. + + This method is used for looking up serializer for a given type. + + For example:: + typing.List -> List + typing.List[int] -> List + typing.Dict[str, str] -> Dict + List -> List + str -> str + + Args: + type_name: The original type name. + + Returns: + The short form type name or the original name if pattern doesn't match. + """ + match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) + return match['type'] if match else type_name + + +def is_artifact_class(artifact_class_or_instance: Type) -> bool: + # we do not yet support non-pre-registered custom artifact types with instance_schema attribute + return hasattr(artifact_class_or_instance, 'schema_title') and hasattr( + artifact_class_or_instance, 'schema_version') + + +def is_list_of_artifacts( + type_var: Union[Type[List[artifact_types.Artifact]], + Type[artifact_types.Artifact]] +) -> bool: + # the type annotation for this function's `type_var` parameter may not actually be a subclass of the KFP SDK's Artifact class for custom artifact types + is_list_or_list_generic = getattr(type_var, '__origin__', None) == list + # in >= python3.9, List wont have .__args__ if it's used as `-> List` with no inner type argument + contains_artifact = hasattr( + type_var, '__args__') and type_annotations.is_artifact_class( + type_var.__args__[0]) + return is_list_or_list_generic and contains_artifact diff --git a/sdk/python/kfp/dsl-test/types/type_annotations_test.py b/sdk/python/kfp/dsl/types/type_annotations_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/types/type_annotations_test.py rename to sdk/python/kfp/dsl/types/type_annotations_test.py diff --git a/sdk/python/kfp/dsl/types/type_utils.py b/sdk/python/kfp/dsl/types/type_utils.py new file mode 100644 index 0000000000..40723f4f1f --- /dev/null +++ b/sdk/python/kfp/dsl/types/type_utils.py @@ -0,0 +1,543 @@ +# Copyright 2020-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities for component I/O type mapping.""" + +from distutils import util +import inspect +import json +from typing import Any, Callable, Dict, Optional, Type, Union +import warnings + +import kfp +from kfp.dsl import structures +from kfp.dsl import task_final_status +from kfp.dsl.types import artifact_types +from kfp.dsl.types import type_annotations + +DEFAULT_ARTIFACT_SCHEMA_VERSION = '0.0.1' +PARAMETER_TYPES = Union[str, int, float, bool, dict, list] + +# ComponentSpec I/O types to DSL ontology artifact classes mapping. +_ARTIFACT_CLASSES_MAPPING = { + 'artifact': artifact_types.Artifact, + 'model': artifact_types.Model, + 'dataset': artifact_types.Dataset, + 'metrics': artifact_types.Metrics, + 'classificationmetrics': artifact_types.ClassificationMetrics, + 'slicedclassificationmetrics': artifact_types.SlicedClassificationMetrics, + 'html': artifact_types.HTML, + 'markdown': artifact_types.Markdown, +} + +_GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$' +_GOOGLE_TYPES_VERSION = DEFAULT_ARTIFACT_SCHEMA_VERSION + +# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping. +# The keys are normalized (lowercased). These are types viewed as Parameters. +# The values are the corresponding IR parameter primitive types. + +# pipeline_spec_pb2.ParameterType enum values +NUMBER_DOUBLE = 1 +NUMBER_INTEGER = 2 +STRING = 3 +BOOLEAN = 4 +LIST = 5 +STRUCT = 6 +PARAMETER_TYPES_MAPPING = { + 'integer': 2, + 'int': NUMBER_INTEGER, + 'double': NUMBER_DOUBLE, + 'float': NUMBER_DOUBLE, + 'string': STRING, + 'str': STRING, + 'text': STRING, + 'bool': BOOLEAN, + 'boolean': BOOLEAN, + 'dict': STRUCT, + 'list': LIST, + 'jsonobject': STRUCT, + 'jsonarray': LIST, +} + + +def bool_cast_fn(default: Union[str, bool]) -> bool: + if isinstance(default, str): + default = util.strtobool(default) == 1 + return default + + +def try_loading_json(default: str) -> Union[dict, list, str]: + try: + return json.loads(default) + except: + return default + + +_V1_DEFAULT_DESERIALIZER_MAPPING: Dict[str, Callable] = { + 'integer': int, + 'int': int, + 'double': float, + 'float': float, + 'string': str, + 'str': str, + 'text': str, + 'bool': bool_cast_fn, + 'boolean': bool_cast_fn, + 'dict': try_loading_json, + 'list': try_loading_json, + 'jsonobject': try_loading_json, + 'jsonarray': try_loading_json, +} + + +def deserialize_v1_component_yaml_default(type_: str, default: Any) -> Any: + """Deserializes v1 default values to correct in-memory types. + + Typecasts for primitive types. Tries to load JSON for arrays and + structs. + """ + if default is None: + return default + if isinstance(type_, str): + cast_fn = _V1_DEFAULT_DESERIALIZER_MAPPING.get(type_.lower(), + lambda x: x) + return cast_fn(default) + return default + + +def is_task_final_status_type(type_name: Optional[Union[str, dict]]) -> bool: + """Check if a ComponentSpec I/O type is PipelineTaskFinalStatus. + + Args: + type_name: type name of the ComponentSpec I/O type. + + Returns: + True if the type name is 'PipelineTaskFinalStatus'. + """ + return isinstance(type_name, str) and ( + type_name == task_final_status.PipelineTaskFinalStatus.__name__) + + +def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool: + """Check if a ComponentSpec I/O type is considered as a parameter type. + + Args: + type_name: type name of the ComponentSpec I/O type. + + Returns: + True if the type name maps to a parameter type else False. + """ + if isinstance(type_name, str): + type_name = type_annotations.get_short_type_name(type_name) + elif isinstance(type_name, dict): + type_name = list(type_name.keys())[0] + else: + return False + + return type_name.lower( + ) in PARAMETER_TYPES_MAPPING or is_task_final_status_type(type_name) + + +def bundled_artifact_to_artifact_proto( + bundled_artifact_str: str) -> 'pipeline_spec_pb2.ArtifactTypeSchema': + """Gets the IR ArtifactTypeSchema proto for a bundled artifact in form + `.@x.x.x` (e.g., system.Artifact@0.0.1).""" + bundled_artifact_str, schema_version = bundled_artifact_str.split('@') + + from kfp.pipeline_spec import pipeline_spec_pb2 + + return pipeline_spec_pb2.ArtifactTypeSchema( + schema_title=bundled_artifact_str, + schema_version=schema_version, + ) + + +def get_parameter_type( + param_type: Optional[Union[Type, str, dict]] +) -> 'pipeline_spec_pb2.ParameterType': + """Get the IR I/O parameter type for the given ComponentSpec I/O type. + + Args: + param_type: type of the ComponentSpec I/O type. Can be a primitive Python + builtin type or a type name. + + Returns: + The enum value of the mapped IR I/O primitive type. + + Raises: + AttributeError: if type_name is not a string type. + """ + # Special handling for PipelineTaskFinalStatus, treat it as Dict type. + if is_task_final_status_type(param_type): + param_type = 'dict' + if type(param_type) == type: + type_name = param_type.__name__ + elif isinstance(param_type, dict): + type_name = list(param_type.keys())[0] + else: + type_name = type_annotations.get_short_type_name(str(param_type)) + return PARAMETER_TYPES_MAPPING.get(type_name.lower()) + + +def get_parameter_type_name( + param_type: Optional[Union[Type, str, dict]]) -> str: + """Gets the parameter type name.""" + + from kfp.pipeline_spec import pipeline_spec_pb2 + + return pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( + get_parameter_type(param_type)) + + +class InconsistentTypeException(Exception): + """InconsistencyTypeException is raised when two types are not + consistent.""" + + +class InconsistentTypeWarning(Warning): + """InconsistentTypeWarning is issued when two types are not consistent.""" + + +def _get_type_string_from_component_argument( + argument_value: Union['pipeline_channel.PipelineChannel', str, bool, int, + float, dict, list] +) -> str: + # avoid circular imports + from kfp.dsl import pipeline_channel + if isinstance(argument_value, pipeline_channel.PipelineChannel): + return argument_value.channel_type + + # argument is a constant + argument_type = type(argument_value) + if argument_type in _TYPE_TO_TYPE_NAME: + return _TYPE_TO_TYPE_NAME[argument_type] + + raise ValueError( + f'Constant argument inputs must be one of type {list(_TYPE_TO_TYPE_NAME.values())} Got: {argument_value!r} of type {type(argument_value)!r}.' + ) + + +def verify_type_compatibility( + given_value: Union['pipeline_channel.PipelineChannel', str, bool, int, + float, dict, list], + expected_spec: Union[structures.InputSpec, structures.OutputSpec], + error_message_prefix: str, + checks_input: bool = True, + raise_on_error: bool = True, +) -> bool: + """Verifies the given argument type is compatible with the expected type. + + Args: + given_value: The channel or constant provided as an argument. + expected_spec: The InputSpec or OutputSpec that describes the expected type of given_value. + error_message_prefix: The prefix for the error message. + checks_input: True if checks an argument (given_value) against a component/pipeline input type (expected_spec). False if checks a component output (argument_value) against the pipeline output type (expected_spec). + raise_on_error: Whether to raise on type compatibility error. Should be passed kfp.TYPE_CHECK. + + Returns: + True if types are compatible, and False if otherwise. + + Raises: + InconsistentTypeException if raise_on_error=True. + """ + # extract and normalize types + expected_type = expected_spec.type + given_type = _get_type_string_from_component_argument(given_value) + + given_is_param = is_parameter_type(str(given_type)) + if given_is_param: + given_type = get_parameter_type_name(given_type) + given_is_artifact_list = False + else: + given_is_artifact_list = given_value.is_artifact_list + + expected_is_param = is_parameter_type(expected_type) + if expected_is_param: + expected_type = get_parameter_type_name(expected_type) + expected_is_artifact_list = False + else: + expected_is_artifact_list = expected_spec.is_artifact_list + + # compare the normalized types + if given_is_param != expected_is_param: + types_are_compatible = False + elif given_is_param and expected_is_param: + types_are_compatible = check_parameter_type_compatibility( + given_type, expected_type) + else: + types_are_compatible = check_artifact_type_compatibility( + given_type=given_type, + given_is_artifact_list=given_is_artifact_list, + expected_type=expected_type, + expected_is_artifact_list=expected_is_artifact_list) + + # maybe raise, maybe warn, return bool + if not types_are_compatible: + # update the types for lists of artifacts for error message + given_type = f'List[{given_type}]' if given_is_artifact_list else given_type + expected_type = f'List[{expected_type}]' if expected_is_artifact_list else expected_type + if checks_input: + error_message_suffix = f'Argument type {given_type!r} is incompatible with the input type {expected_type!r}' + else: + error_message_suffix = f'Output of type {given_type!r} cannot be surfaced as pipeline output type {expected_type!r}' + error_text = error_message_prefix + error_message_suffix + if raise_on_error: + raise InconsistentTypeException(error_text) + else: + warnings.warn(InconsistentTypeWarning(error_text)) + + return types_are_compatible + + +def check_artifact_type_compatibility(given_type: str, + given_is_artifact_list: bool, + expected_type: str, + expected_is_artifact_list: bool) -> bool: + given_schema_title, given_schema_version = given_type.split('@') + expected_schema_title, expected_schema_version = expected_type.split('@') + same_list_of_artifacts_status = expected_is_artifact_list == given_is_artifact_list + if not same_list_of_artifacts_status: + return False + elif artifact_types.Artifact.schema_title in { + given_schema_title, expected_schema_title + }: + return True + else: + schema_title_compatible = given_schema_title == expected_schema_title + schema_version_compatible = given_schema_version.split( + '.')[0] == expected_schema_version.split('.')[0] + + return schema_title_compatible and schema_version_compatible + + +def check_parameter_type_compatibility(given_type: str, + expected_type: str) -> bool: + if isinstance(given_type, str) and isinstance(expected_type, str): + return given_type == expected_type + else: + return check_v1_struct_parameter_type_compatibility( + given_type, expected_type) + + +def check_v1_struct_parameter_type_compatibility( + given_type: Union[str, dict], + expected_type: Union[str, dict], +) -> bool: + if isinstance(given_type, str): + given_type = {given_type: {}} + if isinstance(expected_type, str): + expected_type = {expected_type: {}} + return _check_dict_types(given_type, expected_type) + + +def _check_dict_types( + given_type: dict, + expected_type: dict, +) -> bool: + given_type_name, _ = list(given_type.items())[0] + expected_type_name, _ = list(expected_type.items())[0] + if given_type_name == '' or expected_type_name == '': + # If the type name is empty, it matches any types + return True + if given_type_name != expected_type_name: + print('type name ' + str(given_type_name) + + ' is different from expected: ' + str(expected_type_name)) + return False + type_name = given_type_name + for type_property in given_type[type_name]: + if type_property not in expected_type[type_name]: + print(type_name + ' has a property ' + str(type_property) + + ' that the latter does not.') + return False + if given_type[type_name][type_property] != expected_type[type_name][ + type_property]: + print(type_name + ' has a property ' + str(type_property) + + ' with value: ' + str(given_type[type_name][type_property]) + + ' and ' + str(expected_type[type_name][type_property])) + return False + return True + + +_TYPE_TO_TYPE_NAME = { + str: 'String', + int: 'Integer', + float: 'Float', + bool: 'Boolean', + list: 'List', + dict: 'Dict', +} + + +def get_canonical_type_name_for_type(typ: Type) -> Optional[str]: + """Find the canonical type name for a given type. + + Args: + typ: The type to search for. + + Returns: + The canonical name of the type found. + """ + return _TYPE_TO_TYPE_NAME.get(typ, None) + + +class TypeCheckManager: + """Context manager to set a type check mode within context, then restore + mode to original value upon exiting the context.""" + + def __init__(self, enable: bool) -> None: + """TypeCheckManager constructor. + + Args: + enable: Type check mode used within context. + """ + self._enable = enable + + def __enter__(self) -> 'TypeCheckManager': + """Set type check mode to self._enable. + + Returns: + TypeCheckManager: Returns itself. + """ + self._prev = kfp.TYPE_CHECK + kfp.TYPE_CHECK = self._enable + return self + + def __exit__(self, *unused_args) -> None: + """Restore type check mode to its previous state.""" + kfp.TYPE_CHECK = self._prev + + +# for reading in IR back to in-memory data structures +IR_TYPE_TO_IN_MEMORY_SPEC_TYPE = { + 'STRING': 'String', + 'NUMBER_INTEGER': 'Integer', + 'NUMBER_DOUBLE': 'Float', + 'LIST': 'List', + 'STRUCT': 'Dict', + 'BOOLEAN': 'Boolean', + 'TASK_FINAL_STATUS': task_final_status.PipelineTaskFinalStatus.__name__, +} + +IR_TYPE_TO_COMMENT_TYPE_STRING = { + 'STRING': str.__name__, + 'NUMBER_INTEGER': int.__name__, + 'NUMBER_DOUBLE': float.__name__, + 'LIST': list.__name__, + 'STRUCT': dict.__name__, + 'BOOLEAN': bool.__name__, + 'TASK_FINAL_STATUS': task_final_status.PipelineTaskFinalStatus.__name__, +} + +IN_MEMORY_SPEC_TYPE_TO_IR_TYPE = { + v: k for k, v in IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.items() +} + + +def get_canonical_name_for_outer_generic(type_name: Any) -> str: + """Maps a complex/nested type name back to a canonical type. + + E.g. + get_canonical_name_for_outer_generic('typing.List[str]') + 'List' + + get_canonical_name_for_outer_generic('typing.Dict[typing.List[str], str]') + 'Dict' + + Args: + type_name (Any): The type. Returns input if not a string. + + Returns: + str: The canonical type. + """ + if not isinstance(type_name, str): + return type_name + + if type_name.startswith('typing.'): + type_name = type_name.lstrip('typing.') + + if type_name.lower().startswith('list') or type_name.lower().startswith( + 'dict'): + return type_name.split('[')[0] + + else: + return type_name + + +def create_bundled_artifact_type(schema_title: str, + schema_version: Optional[str] = None) -> str: + if not isinstance(schema_title, str): + raise ValueError + return schema_title + '@' + ( + schema_version or DEFAULT_ARTIFACT_SCHEMA_VERSION) + + +def validate_schema_version(schema_version: str) -> None: + split_schema_version = schema_version.split('.') + if len(split_schema_version) != 3: + raise TypeError( + f'Artifact schema_version must use three-part semantic versioning. Got: {schema_version}' + ) + + +def validate_schema_title(schema_title: str) -> None: + split_schema_title = schema_title.split('.') + if len(split_schema_title) != 2: + raise TypeError( + f'Artifact schema_title must have both a namespace and a name, separated by a `.`. Got: {schema_title}' + ) + namespace, _ = split_schema_title + if namespace not in {'system', 'google'}: + raise TypeError( + f'Artifact schema_title must belong to `system` or `google` namespace. Got: {schema_title}' + ) + + +def validate_bundled_artifact_type(type_: str) -> None: + split_type = type_.split('@') + # two parts and neither are empty strings + if len(split_type) != 2 or not all(split_type): + raise TypeError( + f'Artifacts must have both a schema_title and a schema_version, separated by `@`. Got: {type_}' + ) + schema_title, schema_version = split_type + validate_schema_title(schema_title) + validate_schema_version(schema_version) + + +def _annotation_to_type_struct(annotation): + if not annotation or annotation == inspect.Parameter.empty: + return None + if hasattr(annotation, 'to_dict'): + annotation = annotation.to_dict() + if isinstance(annotation, dict): + return annotation + if isinstance(annotation, type): + type_struct = get_canonical_type_name_for_type(annotation) + if type_struct: + return type_struct + elif type_annotations.is_artifact_class(annotation): + schema_title = annotation.schema_title + else: + schema_title = str(annotation.__name__) + elif hasattr(annotation, '__forward_arg__'): + schema_title = str(annotation.__forward_arg__) + else: + schema_title = str(annotation) + type_struct = get_canonical_type_name_for_type(schema_title) + return type_struct or schema_title + + +def is_typed_named_tuple_annotation(annotation: Any) -> bool: + return hasattr(annotation, '_fields') and hasattr(annotation, + '__annotations__') diff --git a/sdk/python/kfp/dsl-test/types/type_utils_test.py b/sdk/python/kfp/dsl/types/type_utils_test.py similarity index 99% rename from sdk/python/kfp/dsl-test/types/type_utils_test.py rename to sdk/python/kfp/dsl/types/type_utils_test.py index 9706fc4ec6..ee2cf16180 100644 --- a/sdk/python/kfp/dsl-test/types/type_utils_test.py +++ b/sdk/python/kfp/dsl/types/type_utils_test.py @@ -727,7 +727,6 @@ def test_verify_type_compatibility( given_value=argument_value, expected_spec=parameter_input_spec, error_message_prefix='', - raise_on_error=kfp.TYPE_CHECK, )) else: with self.assertRaises(InconsistentTypeException): @@ -735,7 +734,6 @@ def test_verify_type_compatibility( given_value=argument_value, expected_spec=parameter_input_spec, error_message_prefix='', - raise_on_error=kfp.TYPE_CHECK, ) def test_list_of_artifacts_across_compilation_valid(self): diff --git a/sdk/python/kfp/dsl/utils.py b/sdk/python/kfp/dsl/utils.py new file mode 100644 index 0000000000..781ddd0de5 --- /dev/null +++ b/sdk/python/kfp/dsl/utils.py @@ -0,0 +1,128 @@ +# Copyright 2021 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Definitions of utils methods.""" + +import importlib +import os +import re +import sys +import types +from typing import List + +_COMPONENT_NAME_PREFIX = 'comp-' +_EXECUTOR_LABEL_PREFIX = 'exec-' + + +def load_module(module_name: str, module_directory: str) -> types.ModuleType: + """Dynamically imports the Python module with the given name and package + path. + + E.g., Assuming there is a file called `my_module.py` under + `/some/directory/my_module`, we can use:: + + load_module('my_module', '/some/directory') + + to effectively `import mymodule`. + + Args: + module_name: The name of the module. + package_path: The package under which the specified module resides. + """ + module_spec = importlib.util.spec_from_file_location( + name=module_name, + location=os.path.join(module_directory, f'{module_name}.py')) + module = importlib.util.module_from_spec(module_spec) + sys.modules[module_spec.name] = module + sys.path.insert(0, str(module_directory)) + module_spec.loader.exec_module(module) + return module + + +def maybe_rename_for_k8s(name: str) -> str: + """Cleans and converts a name to be k8s compatible. + + Args: + name: The original name. + + Returns: + A sanitized name. + """ + return re.sub('-+', '-', re.sub('[^-0-9a-z]+', '-', + name.lower())).lstrip('-').rstrip('-') + + +def sanitize_input_name(name: str) -> str: + """Sanitizes input name.""" + return re.sub('[^_0-9a-z]+', '_', name.lower()).lstrip('_').rstrip('_') + + +def sanitize_component_name(name: str) -> str: + """Sanitizes component name.""" + return _COMPONENT_NAME_PREFIX + maybe_rename_for_k8s(name) + + +def sanitize_task_name(name: str) -> str: + """Sanitizes task name.""" + return maybe_rename_for_k8s(name) + + +def sanitize_executor_label(label: str) -> str: + """Sanitizes executor label.""" + return _EXECUTOR_LABEL_PREFIX + maybe_rename_for_k8s(label) + + +def make_name_unique_by_adding_index( + name: str, + collection: List[str], + delimiter: str, +) -> str: + """Makes a unique name by adding index. + + The index starts from 2 and increase by 1 until we find a unique name. + + Args: + name: The original name. + collection: The collection of existing names. + delimiter: The delimiter to connect the original name and an index. + + Returns: + A unique name composed of name+delimiter+next index + """ + unique_name = name + if unique_name in collection: + for i in range(2, sys.maxsize**10): + unique_name = name + delimiter + str(i) + if unique_name not in collection: + break + return unique_name + + +def validate_pipeline_name(name: str) -> None: + """Validate pipeline name. + + A valid pipeline name should match ^[a-z0-9][a-z0-9-]{0,127}$. + + Args: + name: The pipeline name. + + Raises: + ValueError if the pipeline name doesn't conform to the regular expression. + """ + pattern = re.compile(r'^[a-z0-9][a-z0-9-]{0,127}$') + if not pattern.match(name): + raise ValueError( + 'Invalid pipeline name: %s.\n' + 'Please specify a pipeline name that matches the regular ' + 'expression "^[a-z0-9][a-z0-9-]{0,127}$" using ' + '`dsl.pipeline(name=...)` decorator.' % name) diff --git a/sdk/python/kfp/dsl-test/utils_test.py b/sdk/python/kfp/dsl/utils_test.py similarity index 100% rename from sdk/python/kfp/dsl-test/utils_test.py rename to sdk/python/kfp/dsl/utils_test.py diff --git a/sdk/python/kfp/dsl/v1_components.py b/sdk/python/kfp/dsl/v1_components.py new file mode 100644 index 0000000000..9714d56eef --- /dev/null +++ b/sdk/python/kfp/dsl/v1_components.py @@ -0,0 +1,44 @@ +# Copyright 2018-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import hashlib +import warnings + +from kfp.dsl import v1_structures +import yaml + + +def _load_component_spec_from_component_text( + text) -> v1_structures.ComponentSpec: + component_dict = yaml.safe_load(text) + component_spec = v1_structures.ComponentSpec.from_dict(component_dict) + + if isinstance(component_spec.implementation, + v1_structures.ContainerImplementation) and ( + component_spec.implementation.container.command is None): + warnings.warn( + 'Container component must specify command to be compatible with KFP ' + 'v2 compatible mode and emissary executor, which will be the default' + ' executor for KFP v2.' + 'https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/', + category=FutureWarning, + ) + + # Calculating hash digest for the component + data = text if isinstance(text, bytes) else text.encode('utf-8') + data = data.replace(b'\r\n', b'\n') # Normalizing line endings + digest = hashlib.sha256(data).hexdigest() + component_spec._digest = digest + + return component_spec diff --git a/sdk/python/kfp/dsl/v1_modelbase.py b/sdk/python/kfp/dsl/v1_modelbase.py new file mode 100644 index 0000000000..c1facf6c8e --- /dev/null +++ b/sdk/python/kfp/dsl/v1_modelbase.py @@ -0,0 +1,379 @@ +# Copyright 2018-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import abc +from collections import OrderedDict +import inspect +from typing import (Any, cast, Dict, get_type_hints, List, Mapping, + MutableMapping, MutableSequence, Sequence, Type, TypeVar, + Union) + +T = TypeVar('T') + + +def verify_object_against_type(x: Any, typ: Type[T]) -> T: + """Verifies that the object is compatible to the specified type (types from + the typing package can be used).""" + #TODO: Merge with parse_object_from_struct_based_on_type which has almost the same code + if typ is type(None): + if x is None: + return x + else: + raise TypeError(f'Error: Object "{x}" is not None.') + + if typ is Any or type(typ) is TypeVar: + return x + + try: #isinstance can fail for generics + if isinstance(x, typ): + return cast(typ, x) + except Exception: + pass + + if hasattr(typ, '__origin__'): #Handling generic types + if typ.__origin__ is Union: #Optional == Union + exception_map = {} + possible_types = typ.__args__ + if type( + None + ) in possible_types and x is None: #Shortcut for Optional[] tests. Can be removed, but the exceptions will be more noisy. + return x + for possible_type in possible_types: + try: + verify_object_against_type(x, possible_type) + return x + except Exception as ex: + exception_map[possible_type] = ex + #exception_lines = ['Exception for type {}: {}.'.format(t, e) for t, e in exception_map.items()] + exception_lines = [str(e) for t, e in exception_map.items()] + exception_lines.append( + f'Error: Object "{x}" is incompatible with type "{typ}".') + raise TypeError('\n'.join(exception_lines)) + + #not Union => not None + if x is None: + raise TypeError( + f'Error: None object is incompatible with type {typ}') + + generic_type = typ.__origin__ + if generic_type in [ + list, List, abc.Sequence, abc.MutableSequence, Sequence, + MutableSequence + ] and type(x) is not str: #! str is also Sequence + if not isinstance(x, generic_type): + raise TypeError( + f'Error: Object "{x}" is incompatible with type "{typ}"') + + # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts + type_args = typ.__args__ if getattr( + typ, '__args__', None) is not None else (Any, Any) + inner_type = type_args[0] + for item in x: + verify_object_against_type(item, inner_type) + return x + + elif generic_type in [ + dict, Dict, abc.Mapping, abc.MutableMapping, Mapping, + MutableMapping, OrderedDict + ]: + if not isinstance(x, generic_type): + raise TypeError( + f'Error: Object "{x}" is incompatible with type "{typ}"') + + # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts + type_args = typ.__args__ if getattr( + typ, '__args__', None) is not None else (Any, Any) + inner_key_type = type_args[0] + inner_value_type = type_args[1] + for k, v in x.items(): + verify_object_against_type(k, inner_key_type) + verify_object_against_type(v, inner_value_type) + return x + + else: + raise TypeError( + f'Error: Unsupported generic type "{typ}". type.__origin__ or type.__extra__ == "{generic_type}"' + ) + + raise TypeError(f'Error: Object "{x}" is incompatible with type "{typ}"') + + +def parse_object_from_struct_based_on_type(struct: Any, typ: Type[T]) -> T: + """Constructs an object from structure (usually dict) based on type. + + Supports list and dict types from the typing package plus Optional[] + and Union[] types. If some type is a class that has .from_dict class + method, that method is used for object construction. + """ + if typ is type(None): + if struct is None: + return None + else: + raise TypeError(f'Error: Structure "{struct}" is not None.') + + if typ is Any or type(typ) is TypeVar: + return struct + + try: #isinstance can fail for generics + #if (isinstance(struct, typ) + # and not (typ is Sequence and type(struct) is str) #! str is also Sequence + # and not (typ is int and type(struct) is bool) #! bool is int + #): + if type(struct) is typ: + return struct + except: + pass + if hasattr(typ, 'from_dict'): + try: #More informative errors + return typ.from_dict(struct) + except Exception as ex: + raise TypeError( + f'Error: {typ.__name__}.from_dict(struct={struct}) failed with exception:\n{str(ex)}' + ) + if hasattr(typ, '__origin__'): #Handling generic types + if typ.__origin__ is Union: #Optional == Union + results = {} + exception_map = {} + # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts + # Union without subscripts seems useless, but semantically it should be the same as Any. + possible_types = list(getattr(typ, '__args__', [Any])) + #if type(None) in possible_types and struct is None: #Shortcut for Optional[] tests. Can be removed, but the exceptions will be more noisy. + # return None + + for possible_type in possible_types: + try: + obj = parse_object_from_struct_based_on_type( + struct, possible_type) + results[possible_type] = obj + except Exception as ex: + if isinstance(ex, TypeError): + exception_map[possible_type] = ex + else: + exception_map[ + possible_type] = f'Unexpected exception when trying to convert structure "{struct}" to type "{typ}": {type(ex)}: {ex}' + + #Single successful parsing. + if len(results) == 1: + return list(results.values())[0] + + if len(results) > 1: + raise TypeError( + f'Error: Structure "{struct}" is ambiguous. It can be parsed to multiple types: {list(results.keys())}.' + ) + + exception_lines = [str(e) for t, e in exception_map.items()] + exception_lines.append( + f'Error: Structure "{struct}" is incompatible with type "{typ}" - none of the types in Union are compatible.' + ) + raise TypeError('\n'.join(exception_lines)) + #not Union => not None + if struct is None: + raise TypeError( + f'Error: None structure is incompatible with type {typ}') + + generic_type = typ.__origin__ + if generic_type in [ + list, List, abc.Sequence, abc.MutableSequence, Sequence, + MutableSequence + ] and type(struct) is not str: #! str is also Sequence + if not isinstance(struct, generic_type): + raise TypeError( + f'Error: Structure "{struct}" is incompatible with type "{typ}" - it does not have list type.' + ) + + # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts + type_args = typ.__args__ if getattr( + typ, '__args__', None) is not None else (Any, Any) + inner_type = type_args[0] + return [ + parse_object_from_struct_based_on_type(item, inner_type) + for item in struct + ] + + elif generic_type in [ + dict, Dict, abc.Mapping, abc.MutableMapping, Mapping, + MutableMapping, OrderedDict + ]: + if not isinstance(struct, generic_type): + raise TypeError( + f'Error: Structure "{struct}" is incompatible with type "{typ}" - it does not have dict type.' + ) + + # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts + type_args = typ.__args__ if getattr( + typ, '__args__', None) is not None else (Any, Any) + inner_key_type = type_args[0] + inner_value_type = type_args[1] + return { + parse_object_from_struct_based_on_type(k, inner_key_type): + parse_object_from_struct_based_on_type(v, inner_value_type) + for k, v in struct.items() + } + + else: + raise TypeError( + f'Error: Unsupported generic type "{typ}". type.__origin__ or type.__extra__ == "{generic_type}"' + ) + + raise TypeError( + f'Error: Structure "{struct}" is incompatible with type "{typ}". Structure is not the instance of the type, the type does not have .from_dict method and is not generic.' + ) + + +def convert_object_to_struct(obj, serialized_names: Mapping[str, str] = {}): + """Converts an object to structure (usually a dict). + + Serializes all properties that do not start with underscores. If the + type of some property is a class that has .to_dict class method, + that method is used for conversion. Used by the ModelBase class. + """ + signature = inspect.signature(obj.__init__) #Needed for default values + result = {} + for python_name in signature.parameters: #TODO: Make it possible to specify the field ordering regardless of the presence of default values + value = getattr(obj, python_name) + if python_name.startswith('_'): + continue + attr_name = serialized_names.get(python_name, python_name) + if hasattr(value, 'to_dict'): + result[attr_name] = value.to_dict() + elif isinstance(value, list): + result[attr_name] = [ + (x.to_dict() if hasattr(x, 'to_dict') else x) for x in value + ] + elif isinstance(value, dict): + result[attr_name] = { + k: (v.to_dict() if hasattr(v, 'to_dict') else v) + for k, v in value.items() + } + else: + param = signature.parameters.get(python_name, None) + if param is None or param.default == inspect.Parameter.empty or value != param.default: + result[attr_name] = value + + return result + + +def parse_object_from_struct_based_on_class_init( + cls: Type[T], + struct: Mapping, + serialized_names: Mapping[str, str] = {}) -> T: + """Constructs an object of specified class from structure (usually dict) + using the class.__init__ method. Converts all constructor arguments to + appropriate types based on the __init__ type hints. Used by the ModelBase + class. + + Arguments: + + serialized_names: specifies the mapping between __init__ parameter names and the structure key names for cases where these names are different (due to language syntax clashes or style differences). + """ + parameter_types = get_type_hints( + cls.__init__) #Properlty resolves forward references + + serialized_names_to_pythonic = {v: k for k, v in serialized_names.items()} + #If a pythonic name has a different original name, we forbid the pythonic name in the structure. Otherwise, this function would accept "python-styled" structures that should be invalid + forbidden_struct_keys = set( + serialized_names_to_pythonic.values()).difference( + serialized_names_to_pythonic.keys()) + args = {} + for original_name, value in struct.items(): + if original_name in forbidden_struct_keys: + raise ValueError( + f'Use "{serialized_names[original_name]}" key instead of pythonic key "{original_name}" in the structure: {struct}.' + ) + python_name = serialized_names_to_pythonic.get(original_name, + original_name) + param_type = parameter_types.get(python_name, None) + if param_type is not None: + args[python_name] = parse_object_from_struct_based_on_type( + value, param_type) + else: + args[python_name] = value + + return cls(**args) + + +class ModelBase: + """Base class for types that can be converted to JSON-like dict structures + or constructed from such structures. The object fields, their types and + default values are taken from the __init__ method arguments. Override the + _serialized_names mapping to control the key names of the serialized + structures. + + The derived class objects will have the .from_dict and .to_dict methods for conversion to or from structure. The base class constructor accepts the arguments map, checks the argument types and sets the object field values. + + Example derived class: + + class TaskSpec(ModelBase): + _serialized_names = { + 'component_ref': 'componentRef', + 'is_enabled': 'isEnabled', + } + + def __init__(self, + component_ref: ComponentReference, + arguments: Optional[Mapping[str, ArgumentType]] = None, + is_enabled: Optional[Union[ArgumentType, EqualsPredicate, NotEqualsPredicate]] = None, #Optional property with default value + ): + super().__init__(locals()) #Calling the ModelBase constructor to check the argument types and set the object field values. + + task_spec = TaskSpec.from_dict("{'componentRef': {...}, 'isEnabled: {'and': {...}}}") # = instance of TaskSpec + task_struct = task_spec.to_dict() #= "{'componentRef': {...}, 'isEnabled: {'and': {...}}}" + """ + _serialized_names = {} + + def __init__(self, args): + parameter_types = get_type_hints(self.__class__.__init__) + field_values = { + k: v + for k, v in args.items() + if k != 'self' and not k.startswith('_') + } + for k, v in field_values.items(): + parameter_type = parameter_types.get(k, None) + if parameter_type is not None: + try: + verify_object_against_type(v, parameter_type) + except Exception as e: + raise TypeError( + f'Argument for {k} is not compatible with type "{parameter_type}". Exception: {e}' + ) + self.__dict__.update(field_values) + + @classmethod + def from_dict(cls: Type[T], struct: Mapping) -> T: + return parse_object_from_struct_based_on_class_init( + cls, struct, serialized_names=cls._serialized_names) + + def to_dict(self) -> Dict[str, Any]: + return convert_object_to_struct( + self, serialized_names=self._serialized_names) + + def _get_field_names(self): + return list(inspect.signature(self.__init__).parameters) + + def __repr__(self): + return self.__class__.__name__ + '(' + ', '.join( + param + '=' + repr(getattr(self, param)) + for param in self._get_field_names()) + ')' + + def __eq__(self, other): + return self.__class__ == other.__class__ and { + k: getattr(self, k) for k in self._get_field_names() + } == {k: getattr(other, k) for k in other._get_field_names()} + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(repr(self)) diff --git a/sdk/python/kfp/dsl/v1_structures.py b/sdk/python/kfp/dsl/v1_structures.py new file mode 100644 index 0000000000..661cef196f --- /dev/null +++ b/sdk/python/kfp/dsl/v1_structures.py @@ -0,0 +1,851 @@ +# Copyright 2018-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import OrderedDict +from typing import Any, Dict, List, Mapping, Optional, Union + +from kfp.dsl.v1_modelbase import ModelBase +import yaml + +PrimitiveTypes = Union[str, int, float, bool] +PrimitiveTypesIncludingNone = Optional[PrimitiveTypes] + +TypeSpecType = Union[str, Dict, List] + + +class InputSpec(ModelBase): + """Describes the component input specification.""" + + def __init__( + self, + name: str, + type: Optional[TypeSpecType] = None, + description: Optional[str] = None, + default: Optional[PrimitiveTypes] = None, + optional: Optional[bool] = False, + annotations: Optional[Dict[str, Any]] = None, + ): + super().__init__(locals()) + + +class OutputSpec(ModelBase): + """Describes the component output specification.""" + + def __init__( + self, + name: str, + type: Optional[TypeSpecType] = None, + description: Optional[str] = None, + annotations: Optional[Dict[str, Any]] = None, + ): + super().__init__(locals()) + + +class InputValuePlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by the input argument value.""" + _serialized_names = { + 'input_name': 'inputValue', + } + + def __init__( + self, + input_name: str, + ): + super().__init__(locals()) + + +class InputPathPlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by a local file path pointing to a file containing the input + argument value.""" + _serialized_names = { + 'input_name': 'inputPath', + } + + def __init__( + self, + input_name: str, + ): + super().__init__(locals()) + + +class OutputPathPlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by a local file path pointing to a file where the program + should write its output data.""" + _serialized_names = { + 'output_name': 'outputPath', + } + + def __init__( + self, + output_name: str, + ): + super().__init__(locals()) + + +class InputUriPlaceholder(ModelBase): # Non-standard attr names + """Represents a placeholder for the URI of an input artifact. + + Represents the command-line argument placeholder that will be + replaced at run-time by the URI of the input artifact argument. + """ + _serialized_names = { + 'input_name': 'inputUri', + } + + def __init__( + self, + input_name: str, + ): + super().__init__(locals()) + + +class OutputUriPlaceholder(ModelBase): # Non-standard attr names + """Represents a placeholder for the URI of an output artifact. + + Represents the command-line argument placeholder that will be + replaced at run-time by a URI of the output artifac where the + program should write its output data. + """ + _serialized_names = { + 'output_name': 'outputUri', + } + + def __init__( + self, + output_name: str, + ): + super().__init__(locals()) + + +class InputMetadataPlaceholder(ModelBase): # Non-standard attr names + """Represents the file path to an input artifact metadata. + + During runtime, this command-line argument placeholder will be + replaced by the path where the metadata file associated with this + artifact has been written to. Currently only supported in v2 + components. + """ + _serialized_names = { + 'input_name': 'inputMetadata', + } + + def __init__(self, input_name: str): + super().__init__(locals()) + + +class InputOutputPortNamePlaceholder(ModelBase): # Non-standard attr names + """Represents the output port name of an input artifact. + + During compile time, this command-line argument placeholder will be + replaced by the actual output port name used by the producer task. + Currently only supported in v2 components. + """ + _serialized_names = { + 'input_name': 'inputOutputPortName', + } + + def __init__(self, input_name: str): + super().__init__(locals()) + + +class OutputMetadataPlaceholder(ModelBase): # Non-standard attr names + """Represents the output metadata JSON file location of this task. + + This file will encode the metadata information produced by this task: + - Artifacts metadata, but not the content of the artifact, and + - output parameters. + + Only supported in v2 components. + """ + _serialized_names = { + 'output_metadata': 'outputMetadata', + } + + def __init__(self, output_metadata: type(None) = None): + if output_metadata: + raise RuntimeError( + 'Output metadata placeholder cannot be associated with key') + super().__init__(locals()) + + def to_dict(self) -> Mapping[str, Any]: + # Override parent implementation. Otherwise it always returns {}. + return {'outputMetadata': None} + + +class ExecutorInputPlaceholder(ModelBase): # Non-standard attr names + """Represents the serialized ExecutorInput message at runtime. + + This placeholder will be replaced by a serialized + [ExecutorInput](https://github.com/kubeflow/pipelines/blob/61f9c2c328d245d89c9d9b8c923f24dbbd08cdc9/api/v2alpha1/pipeline_spec.proto#L730) + proto message at runtime, which includes parameters of the task, artifact + URIs and metadata. + """ + _serialized_names = { + 'executor_input': 'executorInput', + } + + def __init__(self, executor_input: type(None) = None): + if executor_input: + raise RuntimeError( + f'Executor input placeholder cannot be associated with input key. Got {executor_input}' + ) + super().__init__(locals()) + + def to_dict(self) -> Mapping[str, Any]: + # Override parent implementation. Otherwise it always returns {}. + return {'executorInput': None} + + +CommandlineArgumentType = Union[str, InputValuePlaceholder, + InputPathPlaceholder, OutputPathPlaceholder, + InputUriPlaceholder, OutputUriPlaceholder, + InputMetadataPlaceholder, + InputOutputPortNamePlaceholder, + OutputMetadataPlaceholder, + ExecutorInputPlaceholder, 'ConcatPlaceholder', + 'IfPlaceholder',] + + +class ConcatPlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by the concatenated values of its items.""" + _serialized_names = { + 'items': 'concat', + } + + def __init__( + self, + items: List[CommandlineArgumentType], + ): + super().__init__(locals()) + + +class IsPresentPlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by a boolean value specifying whether the caller has passed an + argument for the specified optional input.""" + _serialized_names = { + 'input_name': 'isPresent', + } + + def __init__( + self, + input_name: str, + ): + super().__init__(locals()) + + +IfConditionArgumentType = Union[bool, str, IsPresentPlaceholder, + InputValuePlaceholder] + + +class IfPlaceholderStructure(ModelBase): #Non-standard attr names + '''Used in by the IfPlaceholder - the command-line argument placeholder that will be replaced at run-time by the expanded value of either "then_value" or "else_value" depending on the submissio-time resolved value of the "cond" predicate.''' + _serialized_names = { + 'condition': 'cond', + 'then_value': 'then', + 'else_value': 'else', + } + + def __init__( + self, + condition: IfConditionArgumentType, + then_value: Union[CommandlineArgumentType, + List[CommandlineArgumentType]], + else_value: Optional[Union[CommandlineArgumentType, + List[CommandlineArgumentType]]] = None, + ): + super().__init__(locals()) + + +class IfPlaceholder(ModelBase): #Non-standard attr names + """Represents the command-line argument placeholder that will be replaced + at run-time by the expanded value of either "then_value" or "else_value" + depending on the submissio-time resolved value of the "cond" predicate.""" + _serialized_names = { + 'if_structure': 'if', + } + + def __init__( + self, + if_structure: IfPlaceholderStructure, + ): + super().__init__(locals()) + + +class ContainerSpec(ModelBase): + """Describes the container component implementation.""" + _serialized_names = { + 'file_outputs': + 'fileOutputs', #TODO: rename to something like legacy_unconfigurable_output_paths + } + + def __init__( + self, + image: str, + command: Optional[List[CommandlineArgumentType]] = None, + args: Optional[List[CommandlineArgumentType]] = None, + env: Optional[Mapping[str, str]] = None, + file_outputs: + Optional[Mapping[ + str, + str]] = None, #TODO: rename to something like legacy_unconfigurable_output_paths + ): + super().__init__(locals()) + + +class ContainerImplementation(ModelBase): + """Represents the container component implementation.""" + + def __init__( + self, + container: ContainerSpec, + ): + super().__init__(locals()) + + +ImplementationType = Union[ContainerImplementation, 'GraphImplementation'] + + +class MetadataSpec(ModelBase): + + def __init__( + self, + annotations: Optional[Dict[str, str]] = None, + labels: Optional[Dict[str, str]] = None, + ): + super().__init__(locals()) + + +class ComponentSpec(ModelBase): + """Component specification. + + Describes the metadata (name, description, annotations and labels), + the interface (inputs and outputs) and the implementation of the + component. + """ + + def __init__( + self, + name: Optional[str] = None, #? Move to metadata? + description: Optional[str] = None, #? Move to metadata? + metadata: Optional[MetadataSpec] = None, + inputs: Optional[List[InputSpec]] = None, + outputs: Optional[List[OutputSpec]] = None, + implementation: Optional[ImplementationType] = None, + version: Optional[str] = 'google.com/cloud/pipelines/component/v1', + #tags: Optional[Set[str]] = None, + ): + super().__init__(locals()) + self._post_init() + + def _post_init(self): + #Checking input names for uniqueness + self._inputs_dict = {} + if self.inputs: + for input in self.inputs: + if input.name in self._inputs_dict: + raise ValueError(f'Non-unique input name "{input.name}"') + self._inputs_dict[input.name] = input + + #Checking output names for uniqueness + self._outputs_dict = {} + if self.outputs: + for output in self.outputs: + if output.name in self._outputs_dict: + raise ValueError(f'Non-unique output name "{output.name}"') + self._outputs_dict[output.name] = output + + if isinstance(self.implementation, ContainerImplementation): + container = self.implementation.container + + if container.file_outputs: + for output_name, path in container.file_outputs.items(): + if output_name not in self._outputs_dict: + raise TypeError( + 'Unconfigurable output entry "{}" references non-existing output.' + .format({output_name: path})) + + def verify_arg(arg): + if arg is None: + pass + elif isinstance( + arg, (str, int, float, bool, OutputMetadataPlaceholder, + ExecutorInputPlaceholder)): + pass + elif isinstance(arg, list): + for arg2 in arg: + verify_arg(arg2) + elif isinstance( + arg, + (InputUriPlaceholder, InputValuePlaceholder, + InputPathPlaceholder, IsPresentPlaceholder, + InputMetadataPlaceholder, InputOutputPortNamePlaceholder)): + if arg.input_name not in self._inputs_dict: + raise TypeError( + f'Argument "{arg}" references non-existing input.') + elif isinstance(arg, + (OutputUriPlaceholder, OutputPathPlaceholder)): + if arg.output_name not in self._outputs_dict: + raise TypeError( + f'Argument "{arg}" references non-existing output.') + elif isinstance(arg, ConcatPlaceholder): + for arg2 in arg.items: + verify_arg(arg2) + elif isinstance(arg, IfPlaceholder): + verify_arg(arg.if_structure.condition) + verify_arg(arg.if_structure.then_value) + verify_arg(arg.if_structure.else_value) + else: + raise TypeError(f'Unexpected argument "{arg}"') + + verify_arg(container.command) + verify_arg(container.args) + + if isinstance(self.implementation, GraphImplementation): + graph = self.implementation.graph + + if graph.output_values is not None: + for output_name, argument in graph.output_values.items(): + if output_name not in self._outputs_dict: + raise TypeError( + 'Graph output argument entry "{}" references non-existing output.' + .format({output_name: argument})) + + if graph.tasks is not None: + for task in graph.tasks.values(): + if task.arguments is not None: + for argument in task.arguments.values(): + if isinstance( + argument, GraphInputArgument + ) and argument.graph_input.input_name not in self._inputs_dict: + raise TypeError( + f'Argument "{argument}" references non-existing input.' + ) + + def save(self, file_path: str): + """Saves the component definition to file. + + It can be shared online and later loaded using the + load_component function. + """ + + component_yaml = yaml.dump(self.to_dict(), sort_keys=True) + with open(file_path, 'w') as f: + f.write(component_yaml) + + +class ComponentReference(ModelBase): + """Component reference. + + Contains information that can be used to locate and load a component + by name, digest or URL + """ + + def __init__( + self, + name: Optional[str] = None, + digest: Optional[str] = None, + tag: Optional[str] = None, + url: Optional[str] = None, + spec: Optional[ComponentSpec] = None, + ): + super().__init__(locals()) + self._post_init() + + def _post_init(self) -> None: + if not any([self.name, self.digest, self.tag, self.url, self.spec]): + raise TypeError('Need at least one argument.') + + +class GraphInputReference(ModelBase): + """References the input of the graph (the scope is a single graph).""" + _serialized_names = { + 'input_name': 'inputName', + } + + def __init__( + self, + input_name: str, + type: + Optional[ + TypeSpecType] = None, # Can be used to override the reference data type + ): + super().__init__(locals()) + + def as_argument(self) -> 'GraphInputArgument': + return GraphInputArgument(graph_input=self) + + def with_type(self, type_spec: TypeSpecType) -> 'GraphInputReference': + return GraphInputReference( + input_name=self.input_name, + type=type_spec, + ) + + def without_type(self) -> 'GraphInputReference': + return self.with_type(None) + + +class GraphInputArgument(ModelBase): + """Represents the component argument value that comes from the graph + component input.""" + _serialized_names = { + 'graph_input': 'graphInput', + } + + def __init__( + self, + graph_input: GraphInputReference, + ): + super().__init__(locals()) + + +class TaskOutputReference(ModelBase): + """References the output of some task (the scope is a single graph).""" + _serialized_names = { + 'task_id': 'taskId', + 'output_name': 'outputName', + } + + def __init__( + self, + output_name: str, + task_id: + Optional[ + str] = None, # Used for linking to the upstream task in serialized component file. + task: + Optional[ + 'TaskSpec'] = None, # Used for linking to the upstream task in runtime since Task does not have an ID until inserted into a graph. + type: + Optional[ + TypeSpecType] = None, # Can be used to override the reference data type + ): + super().__init__(locals()) + if self.task_id is None and self.task is None: + raise TypeError('task_id and task cannot be None at the same time.') + + def with_type(self, type_spec: TypeSpecType) -> 'TaskOutputReference': + return TaskOutputReference( + output_name=self.output_name, + task_id=self.task_id, + task=self.task, + type=type_spec, + ) + + def without_type(self) -> 'TaskOutputReference': + return self.with_type(None) + + +class TaskOutputArgument(ModelBase + ): #Has additional constructor for convenience + """Represents the component argument value that comes from the output of + another task.""" + _serialized_names = { + 'task_output': 'taskOutput', + } + + def __init__( + self, + task_output: TaskOutputReference, + ): + super().__init__(locals()) + + @staticmethod + def construct( + task_id: str, + output_name: str, + ) -> 'TaskOutputArgument': + return TaskOutputArgument( + TaskOutputReference( + task_id=task_id, + output_name=output_name, + )) + + def with_type(self, type_spec: TypeSpecType) -> 'TaskOutputArgument': + return TaskOutputArgument( + task_output=self.task_output.with_type(type_spec),) + + def without_type(self) -> 'TaskOutputArgument': + return self.with_type(None) + + +ArgumentType = Union[PrimitiveTypes, GraphInputArgument, TaskOutputArgument] + + +class TwoOperands(ModelBase): + + def __init__( + self, + op1: ArgumentType, + op2: ArgumentType, + ): + super().__init__(locals()) + + +class BinaryPredicate(ModelBase): #abstract base type + + def __init__(self, operands: TwoOperands): + super().__init__(locals()) + + +class EqualsPredicate(BinaryPredicate): + """Represents the "equals" comparison predicate.""" + _serialized_names = {'operands': '=='} + + +class NotEqualsPredicate(BinaryPredicate): + """Represents the "not equals" comparison predicate.""" + _serialized_names = {'operands': '!='} + + +class GreaterThanPredicate(BinaryPredicate): + """Represents the "greater than" comparison predicate.""" + _serialized_names = {'operands': '>'} + + +class GreaterThanOrEqualPredicate(BinaryPredicate): + """Represents the "greater than or equal" comparison predicate.""" + _serialized_names = {'operands': '>='} + + +class LessThenPredicate(BinaryPredicate): + """Represents the "less than" comparison predicate.""" + _serialized_names = {'operands': '<'} + + +class LessThenOrEqualPredicate(BinaryPredicate): + """Represents the "less than or equal" comparison predicate.""" + _serialized_names = {'operands': '<='} + + +PredicateType = Union[ArgumentType, EqualsPredicate, NotEqualsPredicate, + GreaterThanPredicate, GreaterThanOrEqualPredicate, + LessThenPredicate, LessThenOrEqualPredicate, + 'NotPredicate', 'AndPredicate', 'OrPredicate',] + + +class TwoBooleanOperands(ModelBase): + + def __init__( + self, + op1: PredicateType, + op2: PredicateType, + ): + super().__init__(locals()) + + +class NotPredicate(ModelBase): + """Represents the "not" logical operation.""" + _serialized_names = {'operand': 'not'} + + def __init__(self, operand: PredicateType): + super().__init__(locals()) + + +class AndPredicate(ModelBase): + """Represents the "and" logical operation.""" + _serialized_names = {'operands': 'and'} + + def __init__(self, operands: TwoBooleanOperands): + super().__init__(locals()) + + +class OrPredicate(ModelBase): + """Represents the "or" logical operation.""" + _serialized_names = {'operands': 'or'} + + def __init__(self, operands: TwoBooleanOperands): + super().__init__(locals()) + + +class RetryStrategySpec(ModelBase): + _serialized_names = { + 'max_retries': 'maxRetries', + } + + def __init__( + self, + max_retries: int, + ): + super().__init__(locals()) + + +class CachingStrategySpec(ModelBase): + _serialized_names = { + 'max_cache_staleness': 'maxCacheStaleness', + } + + def __init__( + self, + max_cache_staleness: Optional[ + str] = None, # RFC3339 compliant duration: P30DT1H22M3S + ): + super().__init__(locals()) + + +class ExecutionOptionsSpec(ModelBase): + _serialized_names = { + 'retry_strategy': 'retryStrategy', + 'caching_strategy': 'cachingStrategy', + } + + def __init__( + self, + retry_strategy: Optional[RetryStrategySpec] = None, + caching_strategy: Optional[CachingStrategySpec] = None, + ): + super().__init__(locals()) + + +class TaskSpec(ModelBase): + """Task specification. + + Task is a "configured" component - a component supplied with arguments and other applied configuration changes. + """ + _serialized_names = { + 'component_ref': 'componentRef', + 'is_enabled': 'isEnabled', + 'execution_options': 'executionOptions' + } + + def __init__( + self, + component_ref: ComponentReference, + arguments: Optional[Mapping[str, ArgumentType]] = None, + is_enabled: Optional[PredicateType] = None, + execution_options: Optional[ExecutionOptionsSpec] = None, + annotations: Optional[Dict[str, Any]] = None, + ): + super().__init__(locals()) + #TODO: If component_ref is resolved to component spec, then check that the arguments correspond to the inputs + + def _init_outputs(self): + #Adding output references to the task + if self.component_ref.spec is None: + return + task_outputs = OrderedDict() + for output in self.component_ref.spec.outputs or []: + task_output_ref = TaskOutputReference( + output_name=output.name, + task=self, + type=output. + type, # TODO: Resolve type expressions. E.g. type: {TypeOf: Input 1} + ) + task_output_arg = TaskOutputArgument(task_output=task_output_ref) + task_outputs[output.name] = task_output_arg + + self.outputs = task_outputs + if len(task_outputs) == 1: + self.output = list(task_outputs.values())[0] + + +class GraphSpec(ModelBase): + """Describes the graph component implementation. + + It represents a graph of component tasks connected to the upstream + sources of data using the argument specifications. It also describes + the sources of graph output values. + """ + _serialized_names = { + 'output_values': 'outputValues', + } + + def __init__( + self, + tasks: Mapping[str, TaskSpec], + output_values: Mapping[str, ArgumentType] = None, + ): + super().__init__(locals()) + self._post_init() + + def _post_init(self): + #Checking task output references and preparing the dependency table + task_dependencies = {} + for task_id, task in self.tasks.items(): + dependencies = set() + task_dependencies[task_id] = dependencies + if task.arguments is not None: + for argument in task.arguments.values(): + if isinstance(argument, TaskOutputArgument): + dependencies.add(argument.task_output.task_id) + if argument.task_output.task_id not in self.tasks: + raise TypeError( + f'Argument "{argument}" references non-existing task.' + ) + + #Topologically sorting tasks to detect cycles + task_dependents = {k: set() for k in task_dependencies.keys()} + for task_id, dependencies in task_dependencies.items(): + for dependency in dependencies: + task_dependents[dependency].add(task_id) + task_number_of_remaining_dependencies = { + k: len(v) for k, v in task_dependencies.items() + } + sorted_tasks = OrderedDict() + + def process_task(task_id): + if task_number_of_remaining_dependencies[ + task_id] == 0 and task_id not in sorted_tasks: + sorted_tasks[task_id] = self.tasks[task_id] + for dependent_task in task_dependents[task_id]: + task_number_of_remaining_dependencies[ + dependent_task] = task_number_of_remaining_dependencies[ + dependent_task] - 1 + process_task(dependent_task) + + for task_id in task_dependencies.keys(): + process_task(task_id) + if len(sorted_tasks) != len(task_dependencies): + tasks_with_unsatisfied_dependencies = { + k: v + for k, v in task_number_of_remaining_dependencies.items() + if v > 0 + } + task_wth_minimal_number_of_unsatisfied_dependencies = min( + tasks_with_unsatisfied_dependencies.keys(), + key=lambda task_id: tasks_with_unsatisfied_dependencies[task_id] + ) + raise ValueError( + f'Task "{task_wth_minimal_number_of_unsatisfied_dependencies}" has cyclical dependency.' + ) + + self._toposorted_tasks = sorted_tasks + + +class GraphImplementation(ModelBase): + """Represents the graph component implementation.""" + + def __init__( + self, + graph: GraphSpec, + ): + super().__init__(locals()) + + +class PipelineRunSpec(ModelBase): + """The object that can be sent to the backend to start a new Run.""" + _serialized_names = { + 'root_task': 'rootTask', + #'on_exit_task': 'onExitTask', + } + + def __init__( + self, + root_task: TaskSpec, + #on_exit_task: Optional[TaskSpec] = None, + ): + super().__init__(locals()) diff --git a/sdk/python/kfp/dsl/yaml_component.py b/sdk/python/kfp/dsl/yaml_component.py new file mode 100644 index 0000000000..187fa533f2 --- /dev/null +++ b/sdk/python/kfp/dsl/yaml_component.py @@ -0,0 +1,54 @@ +# Copyright 2021-2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Component loaded from YAML.""" + +from google.protobuf import json_format +from kfp.dsl import base_component +from kfp.dsl import structures +from kfp.pipeline_spec import pipeline_spec_pb2 + + +class YamlComponent(base_component.BaseComponent): + """A component loaded from a YAML file. + + **Note:** ``YamlComponent`` is not intended to be used to construct components directly. Use ``kfp.components.load_component_from_*()`` instead. + + Attribute: + component_spec: Component definition. + component_yaml: The yaml string that this component is loaded from. + """ + + def __init__( + self, + component_spec: structures.ComponentSpec, + component_yaml: str, + ): + super().__init__(component_spec=component_spec) + self.component_yaml = component_yaml + + @property + def pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec: + """Returns the pipeline spec of the component.""" + component_dict = structures.load_documents_from_yaml( + self.component_yaml)[0] + is_v1 = 'implementation' in set(component_dict.keys()) + if is_v1: + return self.component_spec.to_pipeline_spec() + else: + return json_format.ParseDict(component_dict, + pipeline_spec_pb2.PipelineSpec()) + + def execute(self, *args, **kwargs): + """Not implemented.""" + raise NotImplementedError diff --git a/sdk/python/requirements.in b/sdk/python/requirements.in index 27604e8a18..975568b8b7 100644 --- a/sdk/python/requirements.in +++ b/sdk/python/requirements.in @@ -10,7 +10,6 @@ google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0 google-auth>=1.6.1,<3 # https://github.com/googleapis/python-storage/blob/main/CHANGELOG.md#221-2022-03-15 google-cloud-storage>=2.2.1,<3 -kfp-dsl==2.1.2 # pin kfp-pipeline-spec to an exact version, since this is the contract between a given KFP SDK version and the BE. we don't want old version of the SDK to write new fields and to have the BE reject the new unsupported field (even if the new field backward compatible from a proto perspective) kfp-pipeline-spec==0.2.2 # Update the upper version whenever a new major version of the diff --git a/sdk/python/test_data/components/add_numbers.yaml b/sdk/python/test_data/components/add_numbers.yaml index 3b717513fa..5b5486da36 100644 --- a/sdk/python/test_data/components/add_numbers.yaml +++ b/sdk/python/test_data/components/add_numbers.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/component_with_metadata_fields.yaml b/sdk/python/test_data/components/component_with_metadata_fields.yaml index 039b24833f..61a41867cf 100644 --- a/sdk/python/test_data/components/component_with_metadata_fields.yaml +++ b/sdk/python/test_data/components/component_with_metadata_fields.yaml @@ -48,7 +48,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/component_with_pip_install.yaml b/sdk/python/test_data/components/component_with_pip_install.yaml index 24b66764ce..4e4335a204 100644 --- a/sdk/python/test_data/components/component_with_pip_install.yaml +++ b/sdk/python/test_data/components/component_with_pip_install.yaml @@ -19,7 +19,7 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/components/component_with_task_final_status.yaml b/sdk/python/test_data/components/component_with_task_final_status.yaml index aed58ffea2..ac138f7055 100644 --- a/sdk/python/test_data/components/component_with_task_final_status.yaml +++ b/sdk/python/test_data/components/component_with_task_final_status.yaml @@ -24,7 +24,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/concat_message.yaml b/sdk/python/test_data/components/concat_message.yaml index 8dd970f199..5dc62f9620 100644 --- a/sdk/python/test_data/components/concat_message.yaml +++ b/sdk/python/test_data/components/concat_message.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/dict_input.yaml b/sdk/python/test_data/components/dict_input.yaml index 4a2d083e3d..977103a338 100644 --- a/sdk/python/test_data/components/dict_input.yaml +++ b/sdk/python/test_data/components/dict_input.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/identity.yaml b/sdk/python/test_data/components/identity.yaml index 7ec3ce6ea0..b8a4551a9f 100644 --- a/sdk/python/test_data/components/identity.yaml +++ b/sdk/python/test_data/components/identity.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/input_artifact.yaml b/sdk/python/test_data/components/input_artifact.yaml index 638ad9935b..e029dd8161 100644 --- a/sdk/python/test_data/components/input_artifact.yaml +++ b/sdk/python/test_data/components/input_artifact.yaml @@ -25,7 +25,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/nested_return.yaml b/sdk/python/test_data/components/nested_return.yaml index 705cf55da3..810215dcf3 100644 --- a/sdk/python/test_data/components/nested_return.yaml +++ b/sdk/python/test_data/components/nested_return.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/output_metrics.yaml b/sdk/python/test_data/components/output_metrics.yaml index c093bf9bdc..6a18a32d0b 100644 --- a/sdk/python/test_data/components/output_metrics.yaml +++ b/sdk/python/test_data/components/output_metrics.yaml @@ -27,7 +27,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/components/preprocess.yaml b/sdk/python/test_data/components/preprocess.yaml index 66c6c07f06..03c46dbdac 100644 --- a/sdk/python/test_data/components/preprocess.yaml +++ b/sdk/python/test_data/components/preprocess.yaml @@ -56,7 +56,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index a359b5cc07..f53f6ae05d 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index 79ca63434f..59ebc83433 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -19,7 +19,7 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index 14c23f61ec..5bcf95a08e 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -155,7 +155,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index f12b9d071c..abc9a2995d 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -78,7 +78,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -130,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index c696d315c5..b7525f874c 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -108,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -135,7 +135,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -162,7 +162,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml index c193905629..ad5e32ce02 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml @@ -285,7 +285,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -315,7 +315,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -345,7 +345,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -375,7 +375,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -403,7 +403,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml index 28b8a5bb76..55f5c8ae24 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml @@ -90,7 +90,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -136,7 +136,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index 2768287ce7..c2d8aae620 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -158,7 +158,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index 5cb736de7a..af4379d557 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -150,7 +150,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -177,7 +177,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -229,7 +229,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index 422002351c..b76f1ad5b6 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -251,7 +251,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -277,7 +277,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -303,7 +303,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -330,7 +330,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -357,7 +357,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -383,7 +383,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index 05cb6b5a25..9bc16ff5b2 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -75,7 +75,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -111,7 +111,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index 4faf1f4e64..18fc3aa052 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index 1dd3e2eb57..42c88e3a68 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,7 +129,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -156,7 +156,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -183,7 +183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -210,7 +210,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index e37138b305..9c8f5e0993 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index 380de8c413..63ce9aceb0 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -188,7 +188,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index 70ff1cb7f3..ab7d67cac7 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index 9672d77fbc..5eed3984a5 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -88,7 +88,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -116,7 +116,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -144,7 +144,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 85507fd6f5..6443b13909 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -94,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index ccdd19d965..789a1e975d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -41,7 +41,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index 9cd106e04b..b1c6091fe2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,7 +65,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -92,7 +92,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -119,7 +119,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index 11291ec534..6753ae29a0 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -57,7 +57,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp-dsl==2.0.1' 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -90,7 +90,7 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp-dsl==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index b6e7bfa5d2..a7678237f6 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -127,7 +127,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index 68d26df39d..13999d852c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -198,7 +198,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -250,7 +250,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -276,7 +276,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -302,7 +302,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -328,7 +328,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -354,7 +354,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index a63192501f..fbf6dd967b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,7 +602,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -631,7 +631,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -660,7 +660,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -688,7 +688,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -714,7 +714,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -741,7 +741,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -768,7 +768,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -795,7 +795,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -822,7 +822,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -849,7 +849,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -876,7 +876,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -903,7 +903,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -930,7 +930,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 24e6cd0c44..1aa009e344 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -95,7 +95,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index 3f136829e2..d2091815bf 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -89,7 +89,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index 2777957763..3bbec7526c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,7 +125,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -233,7 +233,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -260,7 +260,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -287,7 +287,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index 5a21d018f5..e81a303531 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,7 +147,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -175,7 +175,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -259,7 +259,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -286,7 +286,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -313,7 +313,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -340,7 +340,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index f92791b406..9b601893ed 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -145,7 +145,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -172,7 +172,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -199,7 +199,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index b35286eeb9..1cba4dd0a2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,7 +104,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -131,7 +131,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index cbcb9c86cc..f1f3a5fa23 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -205,7 +205,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -231,7 +231,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -257,7 +257,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -283,7 +283,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index 39778a63a2..6f31bc7deb 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -128,7 +128,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index ca1787d667..5a313c4ed4 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,7 +55,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -107,7 +107,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -133,7 +133,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -159,7 +159,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index d3b92f441f..34c474435b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,7 +30,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index 86ad841a3d..e53e19ac60 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -99,7 +99,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index e688046946..385cb4a1d4 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,7 +35,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -62,7 +62,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ \ && \"$0\" \"$@\"\n" - sh - -ec diff --git a/test/presubmit-component-yaml.sh b/test/presubmit-component-yaml.sh index 91aad1ba79..62a4f403ed 100755 --- a/test/presubmit-component-yaml.sh +++ b/test/presubmit-component-yaml.sh @@ -16,7 +16,7 @@ source_root=$(pwd) python3 -m pip install --upgrade pip -source sdk/python/install_from_source.sh +python3 -m pip install sdk/python # Test loading all component.yaml definitions "$source_root/components/test_load_all_components.sh" diff --git a/test/presubmit-test-run-all-gcpc-modules.sh b/test/presubmit-test-run-all-gcpc-modules.sh index a8a1e05226..81389e128a 100755 --- a/test/presubmit-test-run-all-gcpc-modules.sh +++ b/test/presubmit-test-run-all-gcpc-modules.sh @@ -16,7 +16,7 @@ source_root=$(pwd) pip install --upgrade pip -source $source_root/sdk/python/install_from_source.sh +pip install $source_root/sdk/python pip install components/google-cloud pip install $(grep 'pytest==' sdk/python/requirements-dev.txt) diff --git a/test/presubmit-test-sdk-upgrade.sh b/test/presubmit-test-sdk-upgrade.sh index 1ed8d923ee..eb93ded09f 100755 --- a/test/presubmit-test-sdk-upgrade.sh +++ b/test/presubmit-test-sdk-upgrade.sh @@ -18,14 +18,14 @@ set -ex python3 -m pip install --upgrade pip python3 -m pip install kfp -LATEST_KFP_SDK_RELEASE=$(pip show kfp | grep "Version:" | awk '{print $2}' | awk '{$1=$1};1') +LATEST_KFP_SDK_RELEASE=$(python3 -m pip show kfp | grep "Version:" | awk '{print $2}' | awk '{$1=$1};1') echo "Installed latest KFP SDK version: $LATEST_KFP_SDK_RELEASE" # install in normal mode, not editable mode, to emulate typical user upgrade behavior -pip3 install sdk/python/kfp-dsl -pip3 install sdk/python -HEAD_KFP_SDK_VERSION=$(pip show kfp | grep "Version:" | awk '{print $2}') +python3 -m pip install sdk/python +# HEAD will only be different than latest for a release PR +HEAD_KFP_SDK_VERSION=$(python3 -m pip show kfp | grep "Version:" | awk '{print $2}') echo "Successfully upgraded to KFP SDK version @ HEAD: $HEAD_KFP_SDK_VERSION" -python -c 'import kfp' +python3 -c 'import kfp' echo "Successfully ran 'import kfp' @ HEAD: $HEAD_KFP_SDK_VERSION" diff --git a/test/presubmit-tests-sdk.sh b/test/presubmit-tests-sdk.sh index 215f3e07b0..7ce14e8bb5 100755 --- a/test/presubmit-tests-sdk.sh +++ b/test/presubmit-tests-sdk.sh @@ -24,7 +24,7 @@ python3 -m pip install $(grep 'pytest-xdist==' sdk/python/requirements-dev.txt) python3 -m pip install $(grep 'pytest-cov==' sdk/python/requirements-dev.txt) python3 -m pip install --upgrade protobuf -source $source_root/sdk/python/install_from_source.sh +python3 -m pip install sdk/python # TODO: remove deprecated dependency; then remove --ignore arg pytest sdk/python/kfp --ignore=sdk/python/kfp/deprecated --cov=kfp diff --git a/test/presubmit-tests-tfx.sh b/test/presubmit-tests-tfx.sh index ecf6517711..e26a8f9fa5 100755 --- a/test/presubmit-tests-tfx.sh +++ b/test/presubmit-tests-tfx.sh @@ -30,7 +30,7 @@ pip3 install junit_xml # Using Argo to lint all compiled workflows "${source_root}/test/install-argo-cli.sh" -source $source_root/sdk/python/install_from_source.sh +python3 -m pip install sdk/python # Test against TFX # Compile and setup bazel for compiling the protos From 3e3a747b582abf21d95149502343d8efdfa5fc62 Mon Sep 17 00:00:00 2001 From: Junggil Lee Date: Wed, 16 Aug 2023 23:37:03 +0900 Subject: [PATCH 093/253] fix(samples): Update execution_order, loop_output samples to v2 pipelines (#9867) * Update execution_order, loop_output samples to v2 pipelines * Update execution_order, loop_output samples to v2 pipelines --- .../core/execution_order/execution_order.py | 20 ++++----- .../execution_order/execution_order_test.py | 4 +- samples/core/loop_output/loop_output.py | 20 +++++---- samples/core/loop_output/loop_output_test.py | 13 ++---- samples/core/loop_output/loop_output_v2.py | 41 ------------------- 5 files changed, 29 insertions(+), 69 deletions(-) delete mode 100644 samples/core/loop_output/loop_output_v2.py diff --git a/samples/core/execution_order/execution_order.py b/samples/core/execution_order/execution_order.py index 229364ccba..260ab14ab5 100755 --- a/samples/core/execution_order/execution_order.py +++ b/samples/core/execution_order/execution_order.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2019 The Kubeflow Authors +# Copyright 2019-2023 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,29 +14,29 @@ # limitations under the License. -from kfp.deprecated import dsl, compiler -import kfp.deprecated.components as comp +from kfp import compiler +from kfp import dsl -@comp.create_component_from_func +@dsl.component def echo1_op(text1: str): print(text1) -@comp.create_component_from_func +@dsl.component def echo2_op(text2: str): print(text2) @dsl.pipeline( - name='execution-order-pipeline', - description='A pipeline to demonstrate execution order management.' + name='execution-order-pipeline', + description='A pipeline to demonstrate execution order management.' ) def execution_order_pipeline(text1: str='message 1', text2: str='message 2'): """A two step pipeline with an explicitly defined execution order.""" - step1_task = echo1_op(text1) - step2_task = echo2_op(text2) + step1_task = echo1_op(text1=text1) + step2_task = echo2_op(text2=text2) step2_task.after(step1_task) if __name__ == '__main__': - compiler.Compiler().compile(execution_order_pipeline, __file__ + '.yaml') \ No newline at end of file + compiler.Compiler().compile(execution_order_pipeline, __file__ + '.yaml') diff --git a/samples/core/execution_order/execution_order_test.py b/samples/core/execution_order/execution_order_test.py index 64da9040c9..cc05c533e0 100644 --- a/samples/core/execution_order/execution_order_test.py +++ b/samples/core/execution_order/execution_order_test.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp.deprecated as kfp +import kfp from kfp.samples.test.utils import TestCase, relative_path, run_pipeline_func run_pipeline_func([ TestCase( pipeline_file=relative_path(__file__, 'execution_order.py'), - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, + mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, ), ]) diff --git a/samples/core/loop_output/loop_output.py b/samples/core/loop_output/loop_output.py index 2ff75e5fa9..b6e6da4863 100644 --- a/samples/core/loop_output/loop_output.py +++ b/samples/core/loop_output/loop_output.py @@ -12,22 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kfp.deprecated import components -from kfp.deprecated import dsl +from kfp import compiler, dsl -@components.create_component_from_func +@dsl.component def args_generator_op() -> str: return '[1.1, 1.2, 1.3]' -@components.create_component_from_func -def print_op(s: float): +# TODO(Bobgy): how can we make this component with type float? +# got error: kfp.components.types.type_utils.InconsistentTypeException: +# Incompatible argument passed to the input "s" of component "Print op": Argument +# type "STRING" is incompatible with the input type "NUMBER_DOUBLE" +@dsl.component +def print_op(s: str): print(s) -@dsl.pipeline(name='pipeline-with-loop-output') +@dsl.pipeline(name='pipeline-with-loop-output-v2') def my_pipeline(): args_generator = args_generator_op() with dsl.ParallelFor(args_generator.output) as item: - print_op(item) + print_op(s=item) + +if __name__ == '__main__': + compiler.Compiler().compile(my_pipeline, __file__ + '.yaml') diff --git a/samples/core/loop_output/loop_output_test.py b/samples/core/loop_output/loop_output_test.py index b9b49ae9ca..d2648cdc9f 100644 --- a/samples/core/loop_output/loop_output_test.py +++ b/samples/core/loop_output/loop_output_test.py @@ -15,11 +15,10 @@ from __future__ import annotations import unittest -import kfp.deprecated as kfp +import kfp import kfp_server_api -from ml_metadata.proto import Execution -from .loop_output import my_pipeline -from .loop_output_v2 import my_pipeline as my_pipeline_v2 +from ml_metadata.proto.metadata_store_pb2 import Execution +from loop_output import my_pipeline from kfp.samples.test.utils import KfpTask, run_pipeline_func, TestCase @@ -76,12 +75,8 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun, run_pipeline_func([ TestCase( - pipeline_func=my_pipeline_v2, + pipeline_func=my_pipeline, mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE, verify_func=verify, ), - TestCase( - pipeline_func=my_pipeline, - mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY, - ), ]) diff --git a/samples/core/loop_output/loop_output_v2.py b/samples/core/loop_output/loop_output_v2.py deleted file mode 100644 index c7dd4332d5..0000000000 --- a/samples/core/loop_output/loop_output_v2.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from kfp import dsl - -# In tests, we install a KFP package from the PR under test. Users should not -# normally need to specify `kfp_package_path` in their component definitions. -_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') - - -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def args_generator_op() -> str: - return '[1.1, 1.2, 1.3]' - - -# TODO(Bobgy): how can we make this component with type float? -# got error: kfp.components.types.type_utils.InconsistentTypeException: -# Incompatible argument passed to the input "s" of component "Print op": Argument -# type "STRING" is incompatible with the input type "NUMBER_DOUBLE" -@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH) -def print_op(s: str): - print(s) - - -@dsl.pipeline(name='pipeline-with-loop-output-v2') -def my_pipeline(): - args_generator = args_generator_op() - with dsl.ParallelFor(args_generator.output) as item: - print_op(s=item) From f6d077217562a7f29a9da1ca5c8d4089a6fbad9f Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 16 Aug 2023 09:47:04 -0700 Subject: [PATCH 094/253] chore(sdk): remove kfp-dsl source code (#9881) --- sdk/python/kfp-dsl/README.md | 25 - sdk/python/kfp-dsl/build.sh | 30 - sdk/python/kfp-dsl/kfp/dsl/__init__.py | 251 ---- sdk/python/kfp-dsl/kfp/dsl/base_component.py | 148 -- .../kfp-dsl/kfp/dsl/component_decorator.py | 127 -- .../kfp-dsl/kfp/dsl/component_factory.py | 645 -------- sdk/python/kfp-dsl/kfp/dsl/constants.py | 29 - .../container_component_artifact_channel.py | 46 - .../kfp/dsl/container_component_class.py | 40 - .../kfp/dsl/container_component_decorator.py | 53 - sdk/python/kfp-dsl/kfp/dsl/executor.py | 364 ----- sdk/python/kfp-dsl/kfp/dsl/executor_main.py | 105 -- sdk/python/kfp-dsl/kfp/dsl/for_loop.py | 315 ---- sdk/python/kfp-dsl/kfp/dsl/graph_component.py | 95 -- .../kfp-dsl/kfp/dsl/importer_component.py | 30 - sdk/python/kfp-dsl/kfp/dsl/importer_node.py | 145 -- sdk/python/kfp-dsl/kfp/dsl/kfp_config.py | 106 -- .../kfp-dsl/kfp/dsl/pipeline_channel.py | 379 ----- .../kfp-dsl/kfp/dsl/pipeline_context.py | 203 --- sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py | 686 --------- sdk/python/kfp-dsl/kfp/dsl/placeholders.py | 458 ------ .../kfp-dsl/kfp/dsl/python_component.py | 44 - sdk/python/kfp-dsl/kfp/dsl/structures.py | 1049 ------------- .../kfp-dsl/kfp/dsl/task_final_status.py | 55 - sdk/python/kfp-dsl/kfp/dsl/tasks_group.py | 230 --- sdk/python/kfp-dsl/kfp/dsl/types/__init__.py | 13 - .../kfp-dsl/kfp/dsl/types/artifact_types.py | 472 ------ .../kfp/dsl/types/custom_artifact_types.py | 191 --- .../kfp-dsl/kfp/dsl/types/type_annotations.py | 245 --- .../kfp-dsl/kfp/dsl/types/type_utils.py | 558 ------- sdk/python/kfp-dsl/kfp/dsl/utils.py | 128 -- sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py | 379 ----- sdk/python/kfp-dsl/kfp/dsl/v1_structures.py | 839 ----------- sdk/python/kfp-dsl/kfp/dsl/yaml_component.py | 60 - .../execute_commands_args_test.py | 144 -- .../kfp-dsl/runtime_tests/executor_test.py | 1333 ----------------- .../runtime_tests/import_objects_test.py | 21 - .../pipeline_with_task_final_status.py | 58 - .../pipeline_with_task_final_status.yaml | 183 --- sdk/python/kfp-dsl/setup.py | 29 - 40 files changed, 10311 deletions(-) delete mode 100644 sdk/python/kfp-dsl/README.md delete mode 100755 sdk/python/kfp-dsl/build.sh delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/__init__.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/base_component.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/component_decorator.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/component_factory.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/constants.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/container_component_class.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/executor.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/executor_main.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/for_loop.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/graph_component.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/importer_component.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/importer_node.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/kfp_config.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/placeholders.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/python_component.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/structures.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/task_final_status.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/tasks_group.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/types/__init__.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/utils.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/v1_structures.py delete mode 100644 sdk/python/kfp-dsl/kfp/dsl/yaml_component.py delete mode 100644 sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py delete mode 100644 sdk/python/kfp-dsl/runtime_tests/executor_test.py delete mode 100644 sdk/python/kfp-dsl/runtime_tests/import_objects_test.py delete mode 100644 sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py delete mode 100644 sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml delete mode 100644 sdk/python/kfp-dsl/setup.py diff --git a/sdk/python/kfp-dsl/README.md b/sdk/python/kfp-dsl/README.md deleted file mode 100644 index bf898f9757..0000000000 --- a/sdk/python/kfp-dsl/README.md +++ /dev/null @@ -1,25 +0,0 @@ -## kfp-dsl package - -`kfp-dsl` is a subpackage of the KFP SDK that is released separately in order to provide a minimal dependency runtime package for Lightweight Python Components. **`kfp-dsl` should not be installed and used directly.** - -`kfp-dsl` enables the KFP runtime code and objects to be installed at Lightweight Python Component runtime without needing to install the full KFP SDK package. - -### Release -`kfp-dsl` should be released immediately prior to each full `kfp` release. The version of `kfp-dsl` should match the version of `kfp` that depends on it. - -### Development -To develop on `kfp` with a version of `kfp-dsl` built from source, run the following from the repository root: - -```sh -source sdk/python/install_from_source.sh -``` - -**Note:** Modules in the `kfp-dsl` package are only permitted to have *top-level* imports from the Python standard library, the `typing-extensions` package, and the `kfp-dsl` package itself. Imports from other subpackages of the main `kfp` package or its transitive dependencies must be nested within functions to avoid runtime import errors when only `kfp-dsl` is installed. - -### Testing -The `kfp-dsl` code is tested alongside the full KFP SDK in `sdk/python/kfp/dsl-test`. This is because many of the DSL tests require the full KFP SDK to be installed (e.g., requires creating and compiling a component/pipeline). - -There are also dedicated `kfp-dsl` tests `./sdk/python/kfp-dsl/runtime_tests/` which test the dedicated runtime code in `kfp-dsl` and should *not* be run with the full KFP SDK installed. Specifically, these tests ensure: -* That KFP runtime logic is correct -* That `kfp-dsl` specifies all of its dependencies (i.e., no module not found errors from missing `kfp-dsl` dependencies) -* That `kfp-dsl` dependencies on the main `kfp` package have associated imports nested inside function calls (i.e., no module not found errors from missing `kfp` dependencies) diff --git a/sdk/python/kfp-dsl/build.sh b/sdk/python/kfp-dsl/build.sh deleted file mode 100755 index 6ec5cc49c4..0000000000 --- a/sdk/python/kfp-dsl/build.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -ex -# -# Copyright 2018 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# The scripts creates the Kubeflow Pipelines python SDK package. -# -# Usage: -# ./build.sh [output_file] - - -target_archive_file=$1 - -pushd "$(dirname "$0")" -dist_dir=$(mktemp -d) -python3 setup.py sdist --format=gztar --dist-dir "$dist_dir" -cp "$dist_dir"/*.tar.gz "$target_archive_file" -popd diff --git a/sdk/python/kfp-dsl/kfp/dsl/__init__.py b/sdk/python/kfp-dsl/kfp/dsl/__init__.py deleted file mode 100644 index e8f89b6254..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/__init__.py +++ /dev/null @@ -1,251 +0,0 @@ -"""The `kfp.dsl` module contains domain-specific language objects used to -compose pipelines.""" -# Copyright 2020 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -__all__ = [ - 'component', - 'container_component', - 'pipeline', - 'importer', - 'ContainerSpec', - 'Condition', - 'ExitHandler', - 'ParallelFor', - 'Collected', - 'Input', - 'Output', - 'InputPath', - 'OutputPath', - 'IfPresentPlaceholder', - 'ConcatPlaceholder', - 'PipelineTaskFinalStatus', - 'PIPELINE_JOB_NAME_PLACEHOLDER', - 'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER', - 'PIPELINE_JOB_ID_PLACEHOLDER', - 'PIPELINE_TASK_NAME_PLACEHOLDER', - 'PIPELINE_TASK_ID_PLACEHOLDER', - 'PIPELINE_ROOT_PLACEHOLDER', - 'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER', - 'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER', - 'Artifact', - 'ClassificationMetrics', - 'Dataset', - 'HTML', - 'Markdown', - 'Metrics', - 'Model', - 'SlicedClassificationMetrics', - 'PipelineTask', -] - -_kfp_dsl_import_error_msg = 'It looks like only `kfp-dsl` is installed. Please install the full KFP SDK using `pip install kfp`.' - -try: - from typing import Annotated -except ImportError: - from typing_extensions import Annotated - -from typing import TypeVar - -from kfp.dsl.component_decorator import component -from kfp.dsl.container_component_decorator import container_component -from kfp.dsl.for_loop import Collected -from kfp.dsl.importer_node import importer -from kfp.dsl.pipeline_context import pipeline -from kfp.dsl.pipeline_task import PipelineTask -from kfp.dsl.placeholders import ConcatPlaceholder -from kfp.dsl.placeholders import IfPresentPlaceholder -from kfp.dsl.structures import ContainerSpec -from kfp.dsl.task_final_status import PipelineTaskFinalStatus -from kfp.dsl.tasks_group import Condition -from kfp.dsl.tasks_group import ExitHandler -from kfp.dsl.tasks_group import ParallelFor -from kfp.dsl.types.artifact_types import Artifact -from kfp.dsl.types.artifact_types import ClassificationMetrics -from kfp.dsl.types.artifact_types import Dataset -from kfp.dsl.types.artifact_types import HTML -from kfp.dsl.types.artifact_types import Markdown -from kfp.dsl.types.artifact_types import Metrics -from kfp.dsl.types.artifact_types import Model -from kfp.dsl.types.artifact_types import SlicedClassificationMetrics -from kfp.dsl.types.type_annotations import InputAnnotation -from kfp.dsl.types.type_annotations import InputPath -from kfp.dsl.types.type_annotations import OutputAnnotation -from kfp.dsl.types.type_annotations import OutputPath - -# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py - -PIPELINE_JOB_NAME_PLACEHOLDER = '{{$.pipeline_job_name}}' -"""A placeholder used to obtain a pipeline job name within a task at pipeline runtime. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Job name:', - value=dsl.PIPELINE_JOB_NAME_PLACEHOLDER, - ) -""" - -PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER = '{{$.pipeline_job_resource_name}}' -"""A placeholder used to obtain a pipeline job resource name within a task at pipeline runtime. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Job resource name:', - value=dsl.PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER, - ) -""" - -PIPELINE_JOB_ID_PLACEHOLDER = '{{$.pipeline_job_uuid}}' -"""A placeholder used to obtain a pipeline job ID within a task at pipeline runtime. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Job ID:', - value=dsl.PIPELINE_JOB_ID_PLACEHOLDER, - ) -""" - -PIPELINE_TASK_NAME_PLACEHOLDER = '{{$.pipeline_task_name}}' -"""A placeholder used to obtain a task name within a task at pipeline runtime. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Task name:', - value=dsl.PIPELINE_TASK_NAME_PLACEHOLDER, - ) -""" - -PIPELINE_TASK_ID_PLACEHOLDER = '{{$.pipeline_task_uuid}}' -"""A placeholder used to obtain a task ID within a task at pipeline runtime. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Task ID:', - value=dsl.PIPELINE_TASK_ID_PLACEHOLDER, - ) -""" - -PIPELINE_ROOT_PLACEHOLDER = '{{$.pipeline_root}}' -"""A placeholder used to obtain the pipeline root. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - store_model( - tmp_dir=dsl.PIPELINE_ROOT_PLACEHOLDER+'/tmp', - ) -""" - -PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER = '{{$.pipeline_job_create_time_utc}}' -"""A placeholder used to obtain the time that a pipeline job was created. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Job created at:', - value=dsl.PIPELINE_JOB_CREATE_TIME_UTC, - ) -""" -PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER = '{{$.pipeline_job_schedule_time_utc}}' -"""A placeholder used to obtain the time for which a pipeline job is scheduled. - - Example: - :: - - @dsl.pipeline - def my_pipeline(): - print_op( - msg='Job scheduled at:', - value=dsl.PIPELINE_JOB_SCHEDULE_TIME_UTC, - ) -""" - -T = TypeVar('T') -Input = Annotated[T, InputAnnotation] -"""Type generic used to represent an input artifact of type ``T``, where ``T`` is an artifact class. - -Use ``Input[Artifact]`` or ``Output[Artifact]`` to indicate whether the enclosed artifact is a component input or output. - -Args: - T: The type of the input artifact. - -Example: - :: - - @dsl.component - def artifact_producer(model: Output[Artifact]): - with open(model.path, 'w') as f: - f.write('my model') - - @dsl.component - def artifact_consumer(model: Input[Artifact]): - print(model) - - @dsl.pipeline - def my_pipeline(): - producer_task = artifact_producer() - artifact_consumer(model=producer_task.output) -""" - -Output = Annotated[T, OutputAnnotation] -"""A type generic used to represent an output artifact of type ``T``, where ``T`` is an artifact class. The argument typed with this annotation is provided at runtime by the executing backend and does not need to be passed as an input by the pipeline author (see example). - -Use ``Input[Artifact]`` or ``Output[Artifact]`` to indicate whether the enclosed artifact is a component input or output. - -Args: - T: The type of the output artifact. - -Example: - :: - - @dsl.component - def artifact_producer(model: Output[Artifact]): - with open(model.path, 'w') as f: - f.write('my model') - - @dsl.component - def artifact_consumer(model: Input[Artifact]): - print(model) - - @dsl.pipeline - def my_pipeline(): - producer_task = artifact_producer() - artifact_consumer(model=producer_task.output) -""" diff --git a/sdk/python/kfp-dsl/kfp/dsl/base_component.py b/sdk/python/kfp-dsl/kfp/dsl/base_component.py deleted file mode 100644 index 1e8e561b2c..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/base_component.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Base class for KFP components.""" - -import abc -from typing import List - -from kfp.dsl import pipeline_task -from kfp.dsl import structures -from kfp.dsl.types import type_utils - - -class BaseComponent(abc.ABC): - """Base class for a component. - - **Note:** ``BaseComponent`` is not intended to be used to construct components directly. Use ``@kfp.dsl.component`` or ``kfp.components.load_component_from_*()`` instead. - - Attributes: - name: Name of the component. - component_spec: Component definition. - """ - - def __init__(self, component_spec: structures.ComponentSpec): - """Init function for BaseComponent. - - Args: - component_spec: The component definition. - """ - self.component_spec = component_spec - self.name = component_spec.name - self.description = component_spec.description or None - - # Arguments typed as PipelineTaskFinalStatus are special arguments that - # do not count as user inputs. Instead, they are reserved to for the - # (backend) system to pass a value. - self._component_inputs = { - input_name for input_name, input_spec in ( - self.component_spec.inputs or {}).items() - if not type_utils.is_task_final_status_type(input_spec.type) - } - - def _prevent_using_output_lists_of_artifacts(self): - """This method should be called at the end of __init__ for - PythonComponent and ContainerComponent subclasses to temporarily block - outputting lists of artifacts from a component.""" - # TODO: remove when output lists of artifacts from primitive components is supported - for output_name, output_spec in (self.component_spec.outputs or - {}).items(): - if output_spec.is_artifact_list: - raise ValueError( - f'Output lists of artifacts are only supported for pipelines. Got output list of artifacts for output parameter {output_name!r} of component {self.name!r}.' - ) - - def __call__(self, *args, **kwargs) -> pipeline_task.PipelineTask: - """Creates a PipelineTask object. - - The arguments are generated on the fly based on component input - definitions. - """ - task_inputs = {} - - if args: - raise TypeError( - 'Components must be instantiated using keyword arguments. Positional ' - f'parameters are not allowed (found {len(args)} such parameters for ' - f'component "{self.name}").') - - for k, v in kwargs.items(): - if k not in self._component_inputs: - raise TypeError( - f'{self.name}() got an unexpected keyword argument "{k}".') - task_inputs[k] = v - - # Skip optional inputs and arguments typed as PipelineTaskFinalStatus. - missing_arguments = [ - arg for arg in self.required_inputs if arg not in kwargs - ] - if missing_arguments: - argument_or_arguments = 'argument' if len( - missing_arguments) == 1 else 'arguments' - arguments = ', '.join( - arg_name.replace('-', '_') for arg_name in missing_arguments) - - raise TypeError( - f'{self.name}() missing {len(missing_arguments)} required ' - f'{argument_or_arguments}: {arguments}.') - - return pipeline_task.PipelineTask( - component_spec=self.component_spec, - args=task_inputs, - ) - - @property - def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': - """Returns the pipeline spec of the component.""" - with BlockPipelineTaskRegistration(): - return self.component_spec.to_pipeline_spec() - - @property - def platform_spec(self) -> 'pipeline_spec_pb2.PlatformSpec': - """Returns the PlatformSpec of the component. - - Useful when the component is a GraphComponent, else will be - empty per component_spec.platform_spec default. - """ - return self.component_spec.platform_spec - - @abc.abstractmethod - def execute(self, **kwargs): - """Executes the component locally if implemented by the inheriting - subclass.""" - - @property - def required_inputs(self) -> List[str]: - return [ - input_name for input_name, input_spec in ( - self.component_spec.inputs or {}).items() - if not input_spec.optional - ] - - -class BlockPipelineTaskRegistration: - """Temporarily stop registering tasks to the default pipeline. - - Handles special, uncommon functions that decorate and mutate a - component, possibly by using the component's .pipeline_spec - attribute. This is exhibited in the version of - google_cloud_pipeline_components compatible with KFP SDK v2. - """ - - # TODO: this handles the special case of a compiled component (when compiled inside a pipeline), which should not have any concept of a default pipeline. Perhaps there is a way to unify component/pipeline compilation concepts to remove this workaround? - - def __enter__(self): - self.task_handler, pipeline_task.PipelineTask._register_task_handler = pipeline_task.PipelineTask._register_task_handler, pipeline_task._register_task_handler - - def __exit__(self, *args): - pipeline_task.PipelineTask._register_task_handler = self.task_handler diff --git a/sdk/python/kfp-dsl/kfp/dsl/component_decorator.py b/sdk/python/kfp-dsl/kfp/dsl/component_decorator.py deleted file mode 100644 index 7c6589589d..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/component_decorator.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import functools -from typing import Callable, List, Optional -import warnings - -from kfp.dsl import component_factory - - -def component(func: Optional[Callable] = None, - *, - base_image: Optional[str] = None, - target_image: Optional[str] = None, - packages_to_install: List[str] = None, - pip_index_urls: Optional[List[str]] = None, - output_component_file: Optional[str] = None, - install_kfp_package: bool = True, - kfp_package_path: Optional[str] = None): - """Decorator for Python-function based components. - - A KFP component can either be a lightweight component or a containerized - component. - - If ``target_image`` is not specified, this function creates a lightweight - component. A lightweight component is a self-contained Python function that - includes all necessary imports and dependencies. In lightweight components, - ``packages_to_install`` will be used to install dependencies at runtime. The - parameters ``install_kfp_package`` and ``kfp_package_path`` can be used to control - how and from where KFP should be installed when the lightweight component is executed. - - If ``target_image`` is specified, this function creates a component definition - based around the ``target_image``. The assumption is that the function in ``func`` - will be packaged by KFP into this ``target_image``. You can use the KFP CLI's ``build`` - command to package the function into ``target_image``. - - Args: - func: Python function from which to create a component. The function - should have type annotations for all its arguments, indicating how - each argument is intended to be used (e.g. as an input/output artifact, - a plain parameter, or a path to a file). - base_image: Image to use when executing the Python function. It should - contain a default Python interpreter that is compatible with KFP. - target_image: Image to when creating containerized components. - packages_to_install: List of packages to install before - executing the Python function. These will always be installed at component runtime. - pip_index_urls: Python Package Index base URLs from which to - install ``packages_to_install``. Defaults to installing from only PyPI - (``'https://pypi.org/simple'``). For more information, see `pip install docs `_. - output_component_file: If specified, this function will write a - shareable/loadable version of the component spec into this file. - - **Warning:** This compilation approach is deprecated. - install_kfp_package: Specifies if the KFP SDK should add the ``kfp`` Python package to - ``packages_to_install``. Lightweight Python functions always require - an installation of KFP in ``base_image`` to work. If you specify - a ``base_image`` that already contains KFP, you can set this to ``False``. - This flag is ignored when ``target_image`` is specified, which implies - a choice to build a containerized component. Containerized components - will always install KFP as part of the build process. - kfp_package_path: Specifies the location from which to install KFP. By - default, this will try to install from PyPI using the same version - as that used when this component was created. Component authors can - choose to override this to point to a GitHub pull request or - other pip-compatible package server. - - Returns: - A component task factory that can be used in pipeline definitions. - - Example: - :: - - from kfp import dsl - - @dsl.component - def my_function_one(input: str, output: Output[Model]): - ... - - @dsl.component( - base_image='python:3.9', - output_component_file='my_function.yaml' - ) - def my_function_two(input: Input[Mode])): - ... - - @dsl.pipeline(name='my-pipeline', pipeline_root='...') - def pipeline(): - my_function_one_task = my_function_one(input=...) - my_function_two_task = my_function_two(input=my_function_one_task.outputs) - """ - if output_component_file is not None: - warnings.warn( - 'output_component_file parameter is deprecated and will eventually be removed. Please use `Compiler().compile()` to compile a component instead.', - DeprecationWarning, - stacklevel=2) - - if func is None: - return functools.partial( - component, - base_image=base_image, - target_image=target_image, - packages_to_install=packages_to_install, - pip_index_urls=pip_index_urls, - output_component_file=output_component_file, - install_kfp_package=install_kfp_package, - kfp_package_path=kfp_package_path) - - return component_factory.create_component_from_func( - func, - base_image=base_image, - target_image=target_image, - packages_to_install=packages_to_install, - pip_index_urls=pip_index_urls, - output_component_file=output_component_file, - install_kfp_package=install_kfp_package, - kfp_package_path=kfp_package_path) diff --git a/sdk/python/kfp-dsl/kfp/dsl/component_factory.py b/sdk/python/kfp-dsl/kfp/dsl/component_factory.py deleted file mode 100644 index 9c0d1dac51..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/component_factory.py +++ /dev/null @@ -1,645 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import dataclasses -import inspect -import itertools -import pathlib -import re -import textwrap -from typing import Callable, List, Mapping, Optional, Tuple, Type, Union -import warnings - -from kfp import dsl -from kfp.dsl import container_component_artifact_channel -from kfp.dsl import container_component_class -from kfp.dsl import graph_component -from kfp.dsl import placeholders -from kfp.dsl import python_component -from kfp.dsl import structures -from kfp.dsl import task_final_status -from kfp.dsl.types import artifact_types -from kfp.dsl.types import custom_artifact_types -from kfp.dsl.types import type_annotations -from kfp.dsl.types import type_utils - -_DEFAULT_BASE_IMAGE = 'python:3.7' - - -@dataclasses.dataclass -class ComponentInfo(): - """A dataclass capturing registered components. - - This will likely be subsumed/augmented with BaseComponent. - """ - name: str - function_name: str - func: Callable - target_image: str - module_path: pathlib.Path - component_spec: structures.ComponentSpec - output_component_file: Optional[str] = None - base_image: str = _DEFAULT_BASE_IMAGE - packages_to_install: Optional[List[str]] = None - pip_index_urls: Optional[List[str]] = None - - -# A map from function_name to components. This is always populated when a -# module containing KFP components is loaded. Primarily used by KFP CLI -# component builder to package components in a file into containers. -REGISTERED_MODULES = None - - -def _python_function_name_to_component_name(name): - name_with_spaces = re.sub(' +', ' ', name.replace('_', ' ')).strip(' ') - return name_with_spaces[0].upper() + name_with_spaces[1:] - - -def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str: - """Generates index url options for pip install command based on provided - pip_index_urls. - - Args: - pip_index_urls: Optional list of pip index urls - - Returns: - - Empty string if pip_index_urls is empty/None. - - '--index-url url --trusted-host url ' if pip_index_urls contains 1 - url - - the above followed by '--extra-index-url url --trusted-host url ' - for - each next url in pip_index_urls if pip_index_urls contains more than 1 - url - - Note: In case pip_index_urls is not empty, the returned string will - contain space at the end. - """ - if not pip_index_urls: - return '' - - index_url = pip_index_urls[0] - extra_index_urls = pip_index_urls[1:] - - options = [f'--index-url {index_url} --trusted-host {index_url}'] - options.extend( - f'--extra-index-url {extra_index_url} --trusted-host {extra_index_url}' - for extra_index_url in extra_index_urls) - - return ' '.join(options) + ' ' - - -_install_python_packages_script_template = ''' -if ! [ -x "$(command -v pip)" ]; then - python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip -fi - -PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \ - --no-warn-script-location {index_url_options}{concat_package_list} && "$0" "$@" -''' - - -def _get_packages_to_install_command( - package_list: Optional[List[str]] = None, - pip_index_urls: Optional[List[str]] = None) -> List[str]: - - if not package_list: - return [] - - concat_package_list = ' '.join( - [repr(str(package)) for package in package_list]) - index_url_options = make_index_url_options(pip_index_urls) - install_python_packages_script = _install_python_packages_script_template.format( - index_url_options=index_url_options, - concat_package_list=concat_package_list) - return ['sh', '-c', install_python_packages_script] - - -def _get_kfp_dsl_requirement() -> str: - import kfp - return f'kfp-dsl=={kfp.__version__}' - - -def _get_function_source_definition(func: Callable) -> str: - func_code = inspect.getsource(func) - - # Function might be defined in some indented scope (e.g. in another - # function). We need to handle this and properly dedent the function source - # code - func_code = textwrap.dedent(func_code) - func_code_lines = func_code.split('\n') - - # Removing possible decorators (can be multiline) until the function - # definition is found - func_code_lines = itertools.dropwhile(lambda x: not x.startswith('def'), - func_code_lines) - - if not func_code_lines: - raise ValueError( - f'Failed to dedent and clean up the source of function "{func.__name__}". It is probably not properly indented.' - ) - - return '\n'.join(func_code_lines) - - -def _maybe_make_unique(name: str, names: List[str]): - if name not in names: - return name - - for i in range(2, 100): - unique_name = f'{name}_{i}' - if unique_name not in names: - return unique_name - - raise RuntimeError(f'Too many arguments with the name {name}') - - -def extract_component_interface( - func: Callable, - containerized: bool = False, - description: Optional[str] = None, - name: Optional[str] = None, -) -> structures.ComponentSpec: - single_output_name_const = 'Output' - - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - - original_docstring = inspect.getdoc(func) - - try: - import docstring_parser - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - parsed_docstring = docstring_parser.parse(original_docstring) - - inputs = {} - outputs = {} - - input_names = set() - output_names = set() - for parameter in parameters: - parameter_type = type_annotations.maybe_strip_optional_from_annotation( - parameter.annotation) - passing_style = None - io_name = parameter.name - is_artifact_list = False - - if type_annotations.is_Input_Output_artifact_annotation(parameter_type): - # passing_style is either type_annotations.InputAnnotation or - # type_annotations.OutputAnnotation. - passing_style = type_annotations.get_io_artifact_annotation( - parameter_type) - - # parameter_type is a type like typing_extensions.Annotated[kfp.dsl.types.artifact_types.Artifact, ] OR typing_extensions.Annotated[typing.List[kfp.dsl.types.artifact_types.Artifact], ] - - is_artifact_list = type_annotations.is_list_of_artifacts( - parameter_type.__origin__) - - parameter_type = type_annotations.get_io_artifact_class( - parameter_type) - if not type_annotations.is_artifact_class(parameter_type): - raise ValueError( - f'Input[T] and Output[T] are only supported when T is an artifact or list of artifacts. Found `{io_name} with type {parameter_type}`' - ) - - if parameter.default is not inspect.Parameter.empty: - if passing_style in [ - type_annotations.OutputAnnotation, - type_annotations.OutputPath, - ]: - raise ValueError( - 'Default values for Output artifacts are not supported.' - ) - elif parameter.default is not None: - raise ValueError( - f'Optional Input artifacts may only have default value None. Got: {parameter.default}.' - ) - - elif isinstance( - parameter_type, - (type_annotations.InputPath, type_annotations.OutputPath)): - passing_style = type(parameter_type) - parameter_type = parameter_type.type - if parameter.default is not inspect.Parameter.empty and not ( - passing_style == type_annotations.InputPath and - parameter.default is None): - raise ValueError( - 'Path inputs only support default values of None. Default' - ' values for outputs are not supported.') - - type_struct = type_utils._annotation_to_type_struct(parameter_type) - if type_struct is None: - raise TypeError( - f'Missing type annotation for argument: {parameter.name}') - - if passing_style in [ - type_annotations.OutputAnnotation, type_annotations.OutputPath - ]: - if io_name == single_output_name_const: - raise ValueError( - f'"{single_output_name_const}" is an invalid parameter name.' - ) - io_name = _maybe_make_unique(io_name, output_names) - output_names.add(io_name) - if type_annotations.is_artifact_class(parameter_type): - schema_version = parameter_type.schema_version - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - type_struct, schema_version), - is_artifact_list=is_artifact_list) - else: - output_spec = structures.OutputSpec(type=type_struct) - outputs[io_name] = output_spec - else: - io_name = _maybe_make_unique(io_name, input_names) - input_names.add(io_name) - type_ = type_utils.create_bundled_artifact_type( - type_struct, parameter_type.schema_version - ) if type_annotations.is_artifact_class( - parameter_type) else type_struct - default = None if parameter.default == inspect.Parameter.empty or type_annotations.is_artifact_class( - parameter_type) else parameter.default - optional = parameter.default is not inspect.Parameter.empty or type_utils.is_task_final_status_type( - type_struct) - input_spec = structures.InputSpec( - type=type_, - default=default, - optional=optional, - is_artifact_list=is_artifact_list, - ) - - inputs[io_name] = input_spec - - #Analyzing the return type annotations. - return_ann = signature.return_annotation - if not containerized: - if hasattr(return_ann, '_fields'): #NamedTuple - # Getting field type annotations. - # __annotations__ does not exist in python 3.5 and earlier - # _field_types does not exist in python 3.9 and later - field_annotations = getattr(return_ann, '__annotations__', - None) or getattr( - return_ann, '_field_types', None) - for field_name in return_ann._fields: - output_name = _maybe_make_unique(field_name, output_names) - output_names.add(output_name) - type_var = field_annotations.get(field_name) - if type_annotations.is_list_of_artifacts(type_var): - artifact_cls = type_var.__args__[0] - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - artifact_cls.schema_title, - artifact_cls.schema_version), - is_artifact_list=True) - elif type_annotations.is_artifact_class(type_var): - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - type_var.schema_title, type_var.schema_version)) - else: - type_struct = type_utils._annotation_to_type_struct( - type_var) - output_spec = structures.OutputSpec(type=type_struct) - outputs[output_name] = output_spec - # Deprecated dict-based way of declaring multiple outputs. Was only used by - # the @component decorator - elif isinstance(return_ann, dict): - warnings.warn( - 'The ability to specify multiple outputs using the dict syntax' - ' has been deprecated. It will be removed soon after release' - ' 0.1.32. Please use typing.NamedTuple to declare multiple' - ' outputs.') - for output_name, output_type_annotation in return_ann.items(): - output_type_struct = type_utils._annotation_to_type_struct( - output_type_annotation) - output_spec = structures.OutputSpec(type=output_type_struct) - outputs[name] = output_spec - elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty: - output_name = _maybe_make_unique(single_output_name_const, - output_names) - # Fixes exotic, but possible collision: - # `def func(output_path: OutputPath()) -> str: ...` - output_names.add(output_name) - return_ann = signature.return_annotation - if type_annotations.is_list_of_artifacts(return_ann): - artifact_cls = return_ann.__args__[0] - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - artifact_cls.schema_title, artifact_cls.schema_version), - is_artifact_list=True) - elif type_annotations.is_artifact_class(return_ann): - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - return_ann.schema_title, return_ann.schema_version), - is_artifact_list=False) - else: - type_struct = type_utils._annotation_to_type_struct(return_ann) - output_spec = structures.OutputSpec(type=type_struct) - - outputs[output_name] = output_spec - elif return_ann != inspect.Parameter.empty and return_ann != structures.ContainerSpec: - raise TypeError( - 'Return annotation should be either ContainerSpec or omitted for container components.' - ) - - component_name = name or _python_function_name_to_component_name( - func.__name__) - - def assign_descriptions( - inputs_or_outputs: Mapping[str, Union[structures.InputSpec, - structures.OutputSpec]], - docstring_params: List[docstring_parser.DocstringParam], - ) -> None: - """Assigns descriptions to InputSpec or OutputSpec for each component - input/output found in the parsed docstring parameters.""" - docstring_inputs = {param.arg_name: param for param in docstring_params} - for name, spec in inputs_or_outputs.items(): - if name in docstring_inputs: - spec.description = docstring_inputs[name].description - - def parse_docstring_with_return_as_args( - docstring: Union[str, - None]) -> Union[docstring_parser.Docstring, None]: - """Modifies docstring so that a return section can be treated as an - args section, then parses the docstring.""" - if docstring is None: - return None - - # Returns and Return are the only two keywords docstring_parser uses for returns - # use newline to avoid replacements that aren't in the return section header - return_keywords = ['Returns:\n', 'Returns\n', 'Return:\n', 'Return\n'] - for keyword in return_keywords: - if keyword in docstring: - modified_docstring = docstring.replace(keyword.strip(), 'Args:') - return docstring_parser.parse(modified_docstring) - - return None - - assign_descriptions(inputs, parsed_docstring.params) - - modified_parsed_docstring = parse_docstring_with_return_as_args( - original_docstring) - if modified_parsed_docstring is not None: - assign_descriptions(outputs, modified_parsed_docstring.params) - - description = get_pipeline_description( - decorator_description=description, - docstring=parsed_docstring, - ) - - return structures.ComponentSpec( - name=component_name, - description=description, - inputs=inputs or None, - outputs=outputs or None, - implementation=structures.Implementation(), - ) - - -def _get_command_and_args_for_lightweight_component( - func: Callable) -> Tuple[List[str], List[str]]: - imports_source = [ - 'import kfp', - 'from kfp import dsl', - 'from kfp.dsl import *', - 'from typing import *', - ] + custom_artifact_types.get_custom_artifact_type_import_statements(func) - - func_source = _get_function_source_definition(func) - source = textwrap.dedent(''' - {imports_source} - - {func_source}\n''').format( - imports_source='\n'.join(imports_source), func_source=func_source) - command = [ - 'sh', - '-ec', - textwrap.dedent('''\ - program_path=$(mktemp -d) - printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main \ - --component_module_path \ - "$program_path/ephemeral_component.py" \ - "$@" - '''), - source, - ] - - args = [ - '--executor_input', - placeholders.ExecutorInputPlaceholder(), - '--function_to_execute', - func.__name__, - ] - - return command, args - - -def _get_command_and_args_for_containerized_component( - function_name: str) -> Tuple[List[str], List[str]]: - command = [ - 'python3', - '-m', - 'kfp.dsl.executor_main', - ] - - args = [ - '--executor_input', - placeholders.ExecutorInputPlaceholder()._to_string(), - '--function_to_execute', - function_name, - ] - return command, args - - -def create_component_from_func( - func: Callable, - base_image: Optional[str] = None, - target_image: Optional[str] = None, - packages_to_install: List[str] = None, - pip_index_urls: Optional[List[str]] = None, - output_component_file: Optional[str] = None, - install_kfp_package: bool = True, - kfp_package_path: Optional[str] = None, -) -> python_component.PythonComponent: - """Implementation for the @component decorator. - - The decorator is defined under component_decorator.py. See the - decorator for the canonical documentation for this function. - """ - packages_to_install = packages_to_install or [] - - if install_kfp_package and target_image is None: - if kfp_package_path is None: - kfp_package_path = _get_kfp_dsl_requirement() - packages_to_install.append(kfp_package_path) - - packages_to_install_command = _get_packages_to_install_command( - package_list=packages_to_install, pip_index_urls=pip_index_urls) - - command = [] - args = [] - if base_image is None: - base_image = _DEFAULT_BASE_IMAGE - - component_image = base_image - - if target_image: - component_image = target_image - command, args = _get_command_and_args_for_containerized_component( - function_name=func.__name__,) - else: - command, args = _get_command_and_args_for_lightweight_component( - func=func) - - component_spec = extract_component_interface(func) - component_spec.implementation = structures.Implementation( - container=structures.ContainerSpecImplementation( - image=component_image, - command=packages_to_install_command + command, - args=args, - )) - - module_path = pathlib.Path(inspect.getsourcefile(func)) - module_path.resolve() - - component_name = _python_function_name_to_component_name(func.__name__) - component_info = ComponentInfo( - name=component_name, - function_name=func.__name__, - func=func, - target_image=target_image, - module_path=module_path, - component_spec=component_spec, - output_component_file=output_component_file, - base_image=base_image, - packages_to_install=packages_to_install, - pip_index_urls=pip_index_urls) - - if REGISTERED_MODULES is not None: - REGISTERED_MODULES[component_name] = component_info - - if output_component_file: - component_spec.save_to_component_yaml(output_component_file) - - return python_component.PythonComponent( - component_spec=component_spec, python_func=func) - - -def make_input_for_parameterized_container_component_function( - name: str, annotation: Union[Type[List[artifact_types.Artifact]], - Type[artifact_types.Artifact]] -) -> Union[placeholders.Placeholder, container_component_artifact_channel - .ContainerComponentArtifactChannel]: - if type_annotations.is_input_artifact(annotation): - - if type_annotations.is_list_of_artifacts(annotation.__origin__): - return placeholders.InputListOfArtifactsPlaceholder(name) - else: - return container_component_artifact_channel.ContainerComponentArtifactChannel( - io_type='input', var_name=name) - - elif type_annotations.is_output_artifact(annotation): - - if type_annotations.is_list_of_artifacts(annotation.__origin__): - return placeholders.OutputListOfArtifactsPlaceholder(name) - else: - return container_component_artifact_channel.ContainerComponentArtifactChannel( - io_type='output', var_name=name) - - elif isinstance( - annotation, - (type_annotations.OutputAnnotation, type_annotations.OutputPath)): - return placeholders.OutputParameterPlaceholder(name) - - else: - placeholder = placeholders.InputValuePlaceholder(name) - # small hack to encode the runtime value's type for a custom json.dumps function - if (annotation == task_final_status.PipelineTaskFinalStatus or - type_utils.is_task_final_status_type(annotation)): - placeholder._ir_type = 'STRUCT' - else: - placeholder._ir_type = type_utils.get_parameter_type_name( - annotation) - return placeholder - - -def create_container_component_from_func( - func: Callable) -> container_component_class.ContainerComponent: - """Implementation for the @container_component decorator. - - The decorator is defined under container_component_decorator.py. See - the decorator for the canonical documentation for this function. - """ - - component_spec = extract_component_interface(func, containerized=True) - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - arg_list = [] - for parameter in parameters: - parameter_type = type_annotations.maybe_strip_optional_from_annotation( - parameter.annotation) - arg_list.append( - make_input_for_parameterized_container_component_function( - parameter.name, parameter_type)) - - container_spec = func(*arg_list) - container_spec_implementation = structures.ContainerSpecImplementation.from_container_spec( - container_spec) - component_spec.implementation = structures.Implementation( - container_spec_implementation) - component_spec._validate_placeholders() - return container_component_class.ContainerComponent(component_spec, func) - - -def create_graph_component_from_func( - func: Callable, - name: Optional[str] = None, - description: Optional[str] = None, - display_name: Optional[str] = None, -) -> graph_component.GraphComponent: - """Implementation for the @pipeline decorator. - - The decorator is defined under pipeline_context.py. See the - decorator for the canonical documentation for this function. - """ - - component_spec = extract_component_interface( - func, - description=description, - name=name, - ) - return graph_component.GraphComponent( - component_spec=component_spec, - pipeline_func=func, - display_name=display_name, - ) - - -def get_pipeline_description( - decorator_description: Union[str, None], - docstring: 'docstring_parser.Docstring', -) -> Union[str, None]: - """Obtains the correct pipeline description from the pipeline decorator's - description argument and the parsed docstring. - - Gives precedence to the decorator argument. - """ - if decorator_description: - return decorator_description - - short_description = docstring.short_description - long_description = docstring.long_description - docstring_description = short_description + '\n' + long_description if ( - short_description and long_description) else short_description - return docstring_description.strip() if docstring_description else None diff --git a/sdk/python/kfp-dsl/kfp/dsl/constants.py b/sdk/python/kfp-dsl/kfp/dsl/constants.py deleted file mode 100644 index 44b7a16fbb..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/constants.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Constants.""" - -# Unit constants for k8s size string. -_E = 10**18 # Exa -_EI = 1 << 60 # Exa: power-of-two approximate -_P = 10**15 # Peta -_PI = 1 << 50 # Peta: power-of-two approximate -# noinspection PyShadowingBuiltins -_T = 10**12 # Tera -_TI = 1 << 40 # Tera: power-of-two approximate -_G = 10**9 # Giga -_GI = 1 << 30 # Giga: power-of-two approximate -_M = 10**6 # Mega -_MI = 1 << 20 # Mega: power-of-two approximate -_K = 10**3 # Kilo -_KI = 1 << 10 # Kilo: power-of-two approximate diff --git a/sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py deleted file mode 100644 index 322752295f..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/container_component_artifact_channel.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Union - - -class ContainerComponentArtifactChannel: - """A class for passing in placeholders into container_component decorated - function.""" - - def __init__(self, io_type: str, var_name: str): - self._io_type = io_type - self._var_name = var_name - - def __getattr__(self, _name: str) -> Union['placeholders.Placeholder']: - # aviod circular imports - from kfp.dsl import placeholders - - attr_to_placeholder_dict = { - 'uri': { - 'input': placeholders.InputUriPlaceholder, - 'output': placeholders.OutputUriPlaceholder, - }, - 'path': { - 'input': placeholders.InputPathPlaceholder, - 'output': placeholders.OutputPathPlaceholder, - }, - 'metadata': { - 'input': placeholders.InputMetadataPlaceholder, - 'output': placeholders.OutputMetadataPlaceholder - }, - } - if _name not in ['uri', 'path', 'metadata']: - raise AttributeError(f'Cannot access artifact attribute "{_name}".') - return attr_to_placeholder_dict[_name][self._io_type](self._var_name) diff --git a/sdk/python/kfp-dsl/kfp/dsl/container_component_class.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_class.py deleted file mode 100644 index 7cd928036a..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/container_component_class.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Container-based component.""" - -from typing import Callable - -from kfp.dsl import base_component -from kfp.dsl import structures - - -class ContainerComponent(base_component.BaseComponent): - """Component defined via pre-built container. - - Attribute: - pipeline_func: The function that becomes the implementation of this component. - """ - - def __init__(self, component_spec: structures.ComponentSpec, - pipeline_func: Callable) -> None: - super().__init__(component_spec=component_spec) - self.pipeline_func = pipeline_func - - self._prevent_using_output_lists_of_artifacts() - - def execute(self, **kwargs): - # ContainerComponent`: Also inherits from `BaseComponent`. - # As its name suggests, this class backs (custom) container components. - # Its `execute()` method uses `docker run` for local component execution - raise NotImplementedError diff --git a/sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py b/sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py deleted file mode 100644 index 6ce43094ff..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/container_component_decorator.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Callable - -from kfp.dsl import component_factory -from kfp.dsl import container_component_class - - -def container_component( - func: Callable) -> container_component_class.ContainerComponent: - """Decorator for container-based components in KFP v2. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - - Example: - :: - - from kfp.dsl import container_component, ContainerSpec, InputPath, OutputPath, Output - - @container_component - def my_component( - dataset_path: InputPath(Dataset), - model: Output[Model], - num_epochs: int, - output_parameter: OutputPath(str), - ): - return ContainerSpec( - image='gcr.io/my-image', - command=['sh', 'my_component.sh'], - args=[ - '--dataset_path', dataset_path, - '--model_path', model.path, - '--output_parameter_path', output_parameter, - ] - ) - """ - return component_factory.create_container_component_from_func(func) diff --git a/sdk/python/kfp-dsl/kfp/dsl/executor.py b/sdk/python/kfp-dsl/kfp/dsl/executor.py deleted file mode 100644 index cc87f34b0a..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/executor.py +++ /dev/null @@ -1,364 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import inspect -import json -import os -from typing import Any, Callable, Dict, List, Optional, Union - -from kfp.dsl import python_component -from kfp.dsl import task_final_status -from kfp.dsl.types import artifact_types -from kfp.dsl.types import type_annotations - - -class Executor(): - """Executor executes v2-based Python function components.""" - - def __init__(self, executor_input: Dict, - function_to_execute: Union[Callable, - python_component.PythonComponent]): - if hasattr(function_to_execute, 'python_func'): - self._func = function_to_execute.python_func - else: - self._func = function_to_execute - - self._input = executor_input - self._input_artifacts: Dict[str, - Union[artifact_types.Artifact, - List[artifact_types.Artifact]]] = {} - self._output_artifacts: Dict[str, artifact_types.Artifact] = {} - - for name, artifacts in self._input.get('inputs', - {}).get('artifacts', {}).items(): - list_of_artifact_proto_structs = artifacts.get('artifacts') - if list_of_artifact_proto_structs: - annotation = self._func.__annotations__[name] - # InputPath has no attribute __origin__ and also should be handled as a single artifact - if type_annotations.is_Input_Output_artifact_annotation( - annotation) and type_annotations.is_list_of_artifacts( - annotation.__origin__): - self._input_artifacts[name] = [ - self.make_artifact( - msg, - name, - self._func, - ) for msg in list_of_artifact_proto_structs - ] - else: - self._input_artifacts[name] = self.make_artifact( - list_of_artifact_proto_structs[0], - name, - self._func, - ) - - for name, artifacts in self._input.get('outputs', - {}).get('artifacts', {}).items(): - list_of_artifact_proto_structs = artifacts.get('artifacts') - if list_of_artifact_proto_structs: - output_artifact = self.make_artifact( - list_of_artifact_proto_structs[0], - name, - self._func, - ) - self._output_artifacts[name] = output_artifact - self.makedirs_recursively(output_artifact.path) - - self._return_annotation = inspect.signature( - self._func).return_annotation - self._executor_output = {} - - def make_artifact( - self, - runtime_artifact: Dict, - name: str, - func: Callable, - ) -> Any: - annotation = func.__annotations__.get(name) - if isinstance(annotation, type_annotations.InputPath): - schema_title, _ = annotation.type.split('@') - if schema_title in artifact_types._SCHEMA_TITLE_TO_TYPE: - artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE[ - schema_title] - else: - raise TypeError( - f'Invalid type argument to {type_annotations.InputPath.__name__}: {annotation.type}' - ) - else: - artifact_cls = annotation - return create_artifact_instance( - runtime_artifact, artifact_cls=artifact_cls) - - def makedirs_recursively(self, path: str) -> None: - os.makedirs(os.path.dirname(path), exist_ok=True) - - def _get_input_artifact(self, name: str): - return self._input_artifacts.get(name) - - def _get_output_artifact(self, name: str): - return self._output_artifacts.get(name) - - def _get_input_parameter_value(self, parameter_name: str): - parameter_values = self._input.get('inputs', - {}).get('parameterValues', None) - - if parameter_values is not None: - return parameter_values.get(parameter_name, None) - - return None - - def _get_output_parameter_path(self, parameter_name: str): - parameter = self._input.get('outputs', - {}).get('parameters', - {}).get(parameter_name, None) - if parameter is None: - return None - - import os - path = parameter.get('outputFile', None) - if path: - os.makedirs(os.path.dirname(path), exist_ok=True) - return path - - def _get_output_artifact_path(self, artifact_name: str): - output_artifact = self._output_artifacts.get(artifact_name) - if not output_artifact: - raise ValueError( - f'Failed to get output artifact path for artifact name {artifact_name}' - ) - return output_artifact.path - - def _get_input_artifact_path(self, artifact_name: str): - input_artifact = self._input_artifacts.get(artifact_name) - if not input_artifact: - raise ValueError( - f'Failed to get input artifact path for artifact name {artifact_name}' - ) - return input_artifact.path - - def _write_output_parameter_value(self, name: str, - value: Union[str, int, float, bool, dict, - list, Dict, List]): - if isinstance(value, (float, int)): - output = str(value) - elif isinstance(value, str): - # value is already a string. - output = value - elif isinstance(value, (bool, list, dict)): - output = json.dumps(value) - else: - raise ValueError( - f'Unable to serialize unknown type `{value}` for parameter input with value `{type(value)}`' - ) - - if not self._executor_output.get('parameterValues'): - self._executor_output['parameterValues'] = {} - - self._executor_output['parameterValues'][name] = value - - def _write_output_artifact_payload(self, name: str, value: Any): - path = self._get_output_artifact_path(name) - with open(path, 'w') as f: - f.write(str(value)) - - # TODO: extract to a util - @classmethod - def _get_short_type_name(cls, type_name: str) -> str: - """Extracts the short form type name. - - This method is used for looking up serializer for a given type. - - For example: - typing.List -> List - typing.List[int] -> List - typing.Dict[str, str] -> Dict - List -> List - str -> str - - Args: - type_name: The original type name. - - Returns: - The short form type name or the original name if pattern doesn't match. - """ - import re - match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) - return match.group('type') if match else type_name - - # TODO: merge with type_utils.is_parameter_type - @classmethod - def _is_parameter(cls, annotation: Any) -> bool: - if type(annotation) == type: - return annotation in [str, int, float, bool, dict, list] - - # Annotation could be, for instance `typing.Dict[str, str]`, etc. - return cls._get_short_type_name(str(annotation)) in ['Dict', 'List'] - - @classmethod - def _is_artifact(cls, annotation: Any) -> bool: - if type(annotation) == type: - return type_annotations.is_artifact_class(annotation) - return False - - @classmethod - def _is_named_tuple(cls, annotation: Any) -> bool: - if type(annotation) == type: - return issubclass(annotation, tuple) and hasattr( - annotation, '_fields') and hasattr(annotation, - '__annotations__') - return False - - def _handle_single_return_value(self, output_name: str, - annotation_type: Any, return_value: Any): - if self._is_parameter(annotation_type): - origin_type = getattr(annotation_type, '__origin__', - None) or annotation_type - # relax float-typed return to allow both int and float. - if origin_type == float: - accepted_types = (int, float) - # TODO: relax str-typed return to allow all primitive types? - else: - accepted_types = origin_type - if not isinstance(return_value, accepted_types): - raise ValueError( - f'Function `{self._func.__name__}` returned value of type {type(return_value)}; want type {origin_type}' - ) - self._write_output_parameter_value(output_name, return_value) - elif self._is_artifact(annotation_type): - self._write_output_artifact_payload(output_name, return_value) - else: - raise RuntimeError( - f'Unknown return type: {annotation_type}. Must be one of the supported data types: https://www.kubeflow.org/docs/components/pipelines/v2/data-types/' - ) - - def _write_executor_output(self, func_output: Optional[Any] = None): - if self._output_artifacts: - self._executor_output['artifacts'] = {} - - for name, artifact in self._output_artifacts.items(): - runtime_artifact = { - 'name': artifact.name, - 'uri': artifact.uri, - 'metadata': artifact.metadata, - } - artifacts_list = {'artifacts': [runtime_artifact]} - - self._executor_output['artifacts'][name] = artifacts_list - - if func_output is not None: - if self._is_parameter(self._return_annotation) or self._is_artifact( - self._return_annotation): - # Note: single output is named `Output` in component.yaml. - self._handle_single_return_value('Output', - self._return_annotation, - func_output) - elif self._is_named_tuple(self._return_annotation): - if len(self._return_annotation._fields) != len(func_output): - raise RuntimeError( - f'Expected {len(self._return_annotation._fields)} return values from function `{self._func.__name__}`, got {len(func_output)}' - ) - for i in range(len(self._return_annotation._fields)): - field = self._return_annotation._fields[i] - field_type = self._return_annotation.__annotations__[field] - if type(func_output) == tuple: - field_value = func_output[i] - else: - field_value = getattr(func_output, field) - self._handle_single_return_value(field, field_type, - field_value) - else: - raise RuntimeError( - f'Unknown return type: {self._return_annotation}. Must be one of `str`, `int`, `float`, a subclass of `Artifact`, or a NamedTuple collection of these types.' - ) - - # This check is to ensure only one worker (in a mirrored, distributed training/compute strategy) attempts to write to the same executor output file at the same time using gcsfuse, which enforces immutability of files. - write_file = True - - CLUSTER_SPEC_ENV_VAR_NAME = 'CLUSTER_SPEC' - cluster_spec_string = os.environ.get(CLUSTER_SPEC_ENV_VAR_NAME) - if cluster_spec_string: - cluster_spec = json.loads(cluster_spec_string) - CHIEF_NODE_LABELS = {'workerpool0', 'chief', 'master'} - write_file = cluster_spec['task']['type'] in CHIEF_NODE_LABELS - - if write_file: - executor_output_path = self._input['outputs']['outputFile'] - os.makedirs(os.path.dirname(executor_output_path), exist_ok=True) - with open(executor_output_path, 'w') as f: - f.write(json.dumps(self._executor_output)) - - def execute(self): - annotations = inspect.getfullargspec(self._func).annotations - - # Function arguments. - func_kwargs = {} - - for k, v in annotations.items(): - if k == 'return': - continue - - # Annotations for parameter types could be written as, for example, - # `Optional[str]`. In this case, we need to strip off the part - # `Optional[]` to get the actual parameter type. - v = type_annotations.maybe_strip_optional_from_annotation(v) - - if v == task_final_status.PipelineTaskFinalStatus: - value = self._get_input_parameter_value(k) - func_kwargs[k] = task_final_status.PipelineTaskFinalStatus( - state=value.get('state'), - pipeline_job_resource_name=value.get( - 'pipelineJobResourceName'), - pipeline_task_name=value.get('pipelineTaskName'), - error_code=value.get('error').get('code', None), - error_message=value.get('error').get('message', None), - ) - - elif self._is_parameter(v): - value = self._get_input_parameter_value(k) - if value is not None: - func_kwargs[k] = value - - elif type_annotations.is_Input_Output_artifact_annotation(v): - if type_annotations.is_input_artifact(v): - func_kwargs[k] = self._get_input_artifact(k) - if type_annotations.is_output_artifact(v): - func_kwargs[k] = self._get_output_artifact(k) - - elif isinstance(v, type_annotations.OutputPath): - if self._is_parameter(v.type): - func_kwargs[k] = self._get_output_parameter_path(k) - else: - func_kwargs[k] = self._get_output_artifact_path(k) - - elif isinstance(v, type_annotations.InputPath): - func_kwargs[k] = self._get_input_artifact_path(k) - - result = self._func(**func_kwargs) - self._write_executor_output(result) - - -def create_artifact_instance( - runtime_artifact: Dict, - artifact_cls=artifact_types.Artifact, -) -> type: - """Creates an artifact class instances from a runtime artifact - dictionary.""" - schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '') - - artifact_cls = artifact_types._SCHEMA_TITLE_TO_TYPE.get( - schema_title, artifact_cls) - return artifact_cls( - uri=runtime_artifact.get('uri', ''), - name=runtime_artifact.get('name', ''), - metadata=runtime_artifact.get('metadata', {}), - ) diff --git a/sdk/python/kfp-dsl/kfp/dsl/executor_main.py b/sdk/python/kfp-dsl/kfp/dsl/executor_main.py deleted file mode 100644 index 1836ea5889..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/executor_main.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import json -import logging -import os -import sys - -from kfp.dsl import executor as component_executor -from kfp.dsl import kfp_config -from kfp.dsl import utils - - -def _setup_logging(): - logging_format = '[KFP Executor %(asctime)s %(levelname)s]: %(message)s' - logging.basicConfig( - stream=sys.stdout, format=logging_format, level=logging.INFO) - - -def executor_main(): - _setup_logging() - parser = argparse.ArgumentParser(description='KFP Component Executor.') - - parser.add_argument( - '--component_module_path', - type=str, - help='Path to a module containing the KFP component.') - - parser.add_argument( - '--function_to_execute', - type=str, - required=True, - help='The name of the component function in ' - '--component_module_path file that is to be executed.') - - parser.add_argument( - '--executor_input', - type=str, - help='JSON-serialized ExecutorInput from the orchestrator. ' - 'This should contain inputs and placeholders for outputs.') - - args, _ = parser.parse_known_args() - - func_name = args.function_to_execute - module_path = None - module_directory = None - module_name = None - - if args.component_module_path is not None: - logging.info( - f'Looking for component `{func_name}` in --component_module_path `{args.component_module_path}`' - ) - module_path = args.component_module_path - module_directory = os.path.dirname(args.component_module_path) - module_name = os.path.basename(args.component_module_path)[:-len('.py')] - else: - # Look for module directory using kfp_config.ini - logging.info( - f'--component_module_path is not specified. Looking for component `{func_name}` in config file `kfp_config.ini` instead' - ) - config = kfp_config.KFPConfig() - components = config.get_components() - if not components: - raise RuntimeError('No components found in `kfp_config.ini`') - try: - module_path = components[func_name] - except KeyError: - raise RuntimeError( - f'Could not find component `{func_name}` in `kfp_config.ini`. Found the following components instead:\n{components}' - ) - - module_directory = str(module_path.parent) - module_name = str(module_path.name)[:-len('.py')] - - logging.info( - f'Loading KFP component "{func_name}" from {module_path} (directory "{module_directory}" and module name "{module_name}")' - ) - - module = utils.load_module( - module_name=module_name, module_directory=module_directory) - - executor_input = json.loads(args.executor_input) - function_to_execute = getattr(module, func_name) - - logging.info(f'Got executor_input:\n{json.dumps(executor_input, indent=4)}') - - executor = component_executor.Executor( - executor_input=executor_input, function_to_execute=function_to_execute) - - executor.execute() - - -if __name__ == '__main__': - executor_main() diff --git a/sdk/python/kfp-dsl/kfp/dsl/for_loop.py b/sdk/python/kfp-dsl/kfp/dsl/for_loop.py deleted file mode 100644 index 5381576631..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/for_loop.py +++ /dev/null @@ -1,315 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Classes and methods that supports argument for ParallelFor.""" - -import re -from typing import Any, Dict, List, Optional, Union - -from kfp.dsl import pipeline_channel - -ItemList = List[Union[int, float, str, Dict[str, Any]]] - - -def _get_loop_item_type(type_name: str) -> Optional[str]: - """Extracts the loop item type. - - This method is used for extract the item type from a collection type. - For example: - - List[str] -> str - typing.List[int] -> int - typing.Sequence[str] -> str - List -> None - str -> None - - Args: - type_name: The collection type name, like `List`, Sequence`, etc. - - Returns: - The collection item type or None if no match found. - """ - match = re.match('(typing\.)?(?:\w+)(?:\[(?P.+)\])', type_name) - return match['item_type'].lstrip().rstrip() if match else None - - -def _get_subvar_type(type_name: str) -> Optional[str]: - """Extracts the subvar type. - - This method is used for extract the value type from a dictionary type. - For example: - - Dict[str, int] -> int - typing.Mapping[str, float] -> float - - Args: - type_name: The dictionary type. - - Returns: - The dictionary value type or None if no match found. - """ - match = re.match( - '(typing\.)?(?:\w+)(?:\[\s*(?:\w+)\s*,\s*(?P.+)\])', - type_name) - return match['value_type'].lstrip().rstrip() if match else None - - -class LoopArgument(pipeline_channel.PipelineParameterChannel): - """Represents the argument that are looped over in a ParallelFor loop. - - The class shouldn't be instantiated by the end user, rather it is - created automatically by a ParallelFor ops group. - - To create a LoopArgument instance, use one of its factory methods:: - - LoopArgument.from_pipeline_channel(...) - LoopArgument.from_raw_items(...) - - - Attributes: - items_or_pipeline_channel: The raw items or the PipelineChannel object - this LoopArgument is associated to. - """ - LOOP_ITEM_NAME_BASE = 'loop-item' - LOOP_ITEM_PARAM_NAME_BASE = 'loop-item-param' - - def __init__( - self, - items: Union[ItemList, pipeline_channel.PipelineChannel], - name_code: Optional[str] = None, - name_override: Optional[str] = None, - **kwargs, - ): - """Initializes a LoopArguments object. - - Args: - items: List of items to loop over. If a list of dicts then, all - dicts must have the same keys and every key must be a legal - Python variable name. - name_code: A unique code used to identify these loop arguments. - Should match the code for the ParallelFor ops_group which created - these LoopArguments. This prevents parameter name collisions. - name_override: The override name for PipelineChannel. - **kwargs: Any other keyword arguments passed down to PipelineChannel. - """ - if (name_code is None) == (name_override is None): - raise ValueError( - 'Expect one and only one of `name_code` and `name_override` to ' - 'be specified.') - - if name_override is None: - super().__init__(name=self._make_name(name_code), **kwargs) - else: - super().__init__(name=name_override, **kwargs) - - if not isinstance(items, - (list, tuple, pipeline_channel.PipelineChannel)): - raise TypeError( - f'Expected list, tuple, or PipelineChannel, got {items}.') - - if isinstance(items, tuple): - items = list(items) - - self.items_or_pipeline_channel = items - self.is_with_items_loop_argument = not isinstance( - items, pipeline_channel.PipelineChannel) - self._referenced_subvars: Dict[str, LoopArgumentVariable] = {} - - if isinstance(items, list) and isinstance(items[0], dict): - subvar_names = set(items[0].keys()) - # then this block creates loop_arg.variable_a and loop_arg.variable_b - for subvar_name in subvar_names: - loop_arg_var = LoopArgumentVariable( - loop_argument=self, - subvar_name=subvar_name, - ) - self._referenced_subvars[subvar_name] = loop_arg_var - setattr(self, subvar_name, loop_arg_var) - - def __getattr__(self, name: str): - # this is being overridden so that we can access subvariables of the - # LoopArgument (i.e.: item.a) without knowing the subvariable names ahead - # of time. - - return self._referenced_subvars.setdefault( - name, LoopArgumentVariable( - loop_argument=self, - subvar_name=name, - )) - - def _make_name(self, code: str): - """Makes a name for this loop argument from a unique code.""" - return f'{self.LOOP_ITEM_PARAM_NAME_BASE}-{code}' - - @classmethod - def from_pipeline_channel( - cls, - channel: pipeline_channel.PipelineChannel, - ) -> 'LoopArgument': - """Creates a LoopArgument object from a PipelineChannel object.""" - return LoopArgument( - items=channel, - name_override=channel.name + '-' + cls.LOOP_ITEM_NAME_BASE, - task_name=channel.task_name, - channel_type=_get_loop_item_type(channel.channel_type) or 'String', - ) - - @classmethod - def from_raw_items( - cls, - raw_items: ItemList, - name_code: str, - ) -> 'LoopArgument': - """Creates a LoopArgument object from raw item list.""" - if len(raw_items) == 0: - raise ValueError('Got an empty item list for loop argument.') - - return LoopArgument( - items=raw_items, - name_code=name_code, - channel_type=type(raw_items[0]).__name__, - ) - - @classmethod - def name_is_loop_argument(cls, name: str) -> bool: - """Returns True if the given channel name looks like a loop argument. - - Either it came from a withItems loop item or withParams loop - item. - """ - return ('-' + cls.LOOP_ITEM_NAME_BASE) in name \ - or (cls.LOOP_ITEM_PARAM_NAME_BASE + '-') in name - - -class LoopArgumentVariable(pipeline_channel.PipelineChannel): - """Represents a subvariable for a loop argument. - - This is used for cases where we're looping over maps, each of which contains - several variables. If the user ran: - - with dsl.ParallelFor([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]) as item: - ... - - Then there's one LoopArgumentVariable for 'a' and another for 'b'. - - Attributes: - loop_argument: The original LoopArgument object this subvariable is - attached to. - subvar_name: The subvariable name. - """ - SUBVAR_NAME_DELIMITER = '-subvar-' - LEGAL_SUBVAR_NAME_REGEX = re.compile(r'^[a-zA-Z_][0-9a-zA-Z_]*$') - - def __init__( - self, - loop_argument: LoopArgument, - subvar_name: str, - ): - """Initializes a LoopArgumentVariable instance. - - Args: - loop_argument: The LoopArgument object this subvariable is based on - a subvariable to. - subvar_name: The name of this subvariable, which is the name of the - dict key that spawned this subvariable. - - Raises: - ValueError is subvar name is illegal. - """ - if not self._subvar_name_is_legal(subvar_name): - raise ValueError( - f'Tried to create subvariable named {subvar_name}, but that is ' - 'not a legal Python variable name.') - - self.subvar_name = subvar_name - self.loop_argument = loop_argument - - super().__init__( - name=self._get_name_override( - loop_arg_name=loop_argument.name, - subvar_name=subvar_name, - ), - task_name=loop_argument.task_name, - channel_type=_get_subvar_type(loop_argument.channel_type) or - 'String', - ) - - @property - def items_or_pipeline_channel( - self) -> Union[ItemList, pipeline_channel.PipelineChannel]: - """Returns the loop argument items.""" - return self.loop_argument.items_or_pipeline_chanenl - - @property - def is_with_items_loop_argument(self) -> bool: - """Whether the loop argument is originated from raw items.""" - return self.loop_argument.is_with_items_loop_argument - - def _subvar_name_is_legal(self, proposed_variable_name: str) -> bool: - """Returns True if the subvar name is legal.""" - return re.match(self.LEGAL_SUBVAR_NAME_REGEX, - proposed_variable_name) is not None - - def _get_name_override(self, loop_arg_name: str, subvar_name: str) -> str: - """Gets the name. - - Args: - loop_arg_name: the name of the loop argument parameter that this - LoopArgumentVariable is attached to. - subvar_name: The name of this subvariable. - - Returns: - The name of this loop arg variable. - """ - return f'{loop_arg_name}{self.SUBVAR_NAME_DELIMITER}{subvar_name}' - - -class Collected(pipeline_channel.PipelineChannel): - """For collecting into a list the output from a task in dsl.ParallelFor - loops. - - Args: - output: The output of an upstream task within a dsl.ParallelFor loop. - - Example: - :: - - @dsl.pipeline - def math_pipeline() -> int: - with dsl.ParallelFor([1, 2, 3]) as x: - t = double(num=x) - - return add(nums=dsl.Collected(t.output)).output - """ - - def __init__( - self, - output: pipeline_channel.PipelineChannel, - ) -> None: - self.output = output - if isinstance(output, pipeline_channel.PipelineArtifactChannel): - channel_type = output.channel_type - self.is_artifact_channel = True - # we know all dsl.Collected instances are lists, so set to true - # for type checking, which occurs before dsl.Collected is updated to - # it's "correct" channel during compilation - self.is_artifact_list = True - else: - channel_type = 'LIST' - self.is_artifact_channel = False - - super().__init__( - output.name, - channel_type=channel_type, - task_name=output.task_name, - ) diff --git a/sdk/python/kfp-dsl/kfp/dsl/graph_component.py b/sdk/python/kfp-dsl/kfp/dsl/graph_component.py deleted file mode 100644 index d7ddffc65a..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/graph_component.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Pipeline as a component (aka graph component).""" - -import inspect -from typing import Callable, Optional -import uuid - -from kfp import dsl -from kfp.dsl import base_component -from kfp.dsl import pipeline_channel -from kfp.dsl import pipeline_context -from kfp.dsl import structures - - -class GraphComponent(base_component.BaseComponent): - """A component defined via @dsl.pipeline decorator. - - Attribute: - pipeline_func: The function that becomes the implementation of this component. - """ - - def __init__( - self, - component_spec: structures.ComponentSpec, - pipeline_func: Callable, - display_name: Optional[str] = None, - ): - super().__init__(component_spec=component_spec) - self.pipeline_func = pipeline_func - - args_list = [] - signature = inspect.signature(pipeline_func) - - for arg_name in signature.parameters: - input_spec = component_spec.inputs[arg_name] - args_list.append( - pipeline_channel.create_pipeline_channel( - name=arg_name, - channel_type=input_spec.type, - is_artifact_list=input_spec.is_artifact_list, - )) - - with pipeline_context.Pipeline( - self.component_spec.name) as dsl_pipeline: - pipeline_outputs = pipeline_func(*args_list) - - if not dsl_pipeline.tasks: - raise ValueError('Task is missing from pipeline.') - - # Making the pipeline group name unique to prevent name clashes with - # templates - pipeline_group = dsl_pipeline.groups[0] - pipeline_group.name = uuid.uuid4().hex - - try: - from kfp.compiler import pipeline_spec_builder as builder - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - pipeline_spec, platform_spec = builder.create_pipeline_spec( - pipeline=dsl_pipeline, - component_spec=self.component_spec, - pipeline_outputs=pipeline_outputs, - ) - - pipeline_root = getattr(pipeline_func, 'pipeline_root', None) - if pipeline_root is not None: - pipeline_spec.default_pipeline_root = pipeline_root - if display_name is not None: - pipeline_spec.pipeline_info.display_name = display_name - if component_spec.description is not None: - pipeline_spec.pipeline_info.description = component_spec.description - - self.component_spec.implementation.graph = pipeline_spec - self.component_spec.platform_spec = platform_spec - - @property - def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': - """Returns the pipeline spec of the component.""" - return self.component_spec.implementation.graph - - def execute(self, **kwargs): - raise RuntimeError('Graph component has no local execution mode.') diff --git a/sdk/python/kfp-dsl/kfp/dsl/importer_component.py b/sdk/python/kfp-dsl/kfp/dsl/importer_component.py deleted file mode 100644 index 168c7c6f73..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/importer_component.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Importer-based component.""" - -from kfp.dsl import base_component -from kfp.dsl import structures - - -class ImporterComponent(base_component.BaseComponent): - """Component defined via dsl.importer.""" - - def __init__( - self, - component_spec: structures.ComponentSpec, - ): - super().__init__(component_spec=component_spec) - - def execute(self, **kwargs): - raise NotImplementedError diff --git a/sdk/python/kfp-dsl/kfp/dsl/importer_node.py b/sdk/python/kfp-dsl/kfp/dsl/importer_node.py deleted file mode 100644 index 2a3e676daa..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/importer_node.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2020-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utility function for building Importer Node spec.""" - -from typing import Any, Dict, Mapping, Optional, Type, Union - -from kfp.dsl import importer_component -from kfp.dsl import pipeline_channel -from kfp.dsl import pipeline_task -from kfp.dsl import placeholders -from kfp.dsl import structures -from kfp.dsl import utils -from kfp.dsl.types import artifact_types -from kfp.dsl.types import type_utils - -URI_KEY = 'uri' -OUTPUT_KEY = 'artifact' -METADATA_KEY = 'metadata' - - -def importer( - artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str], - artifact_class: Type[artifact_types.Artifact], - reimport: bool = False, - metadata: Optional[Mapping[str, Any]] = None, -) -> pipeline_task.PipelineTask: - """Imports an existing artifact for use in a downstream component. - - Args: - artifact_uri: The URI of the artifact to import. - artifact_class: The artifact class being imported. - reimport: Whether to reimport the artifact. - metadata: Properties of the artifact. - - Returns: - A task with the artifact accessible via its ``.output`` attribute. - - Examples:: - - @dsl.pipeline(name='pipeline-with-importer') - def pipeline_with_importer(): - - importer1 = importer( - artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt', - artifact_class=Dataset, - reimport=False) - train(dataset=importer1.output) - """ - - component_inputs: Dict[str, structures.InputSpec] = {} - call_inputs: Dict[str, Any] = {} - - def traverse_dict_and_create_metadata_inputs(d: Any) -> Any: - if isinstance(d, pipeline_channel.PipelineParameterChannel): - reversed_call_inputs = { - pipeline_param_chan: name - for name, pipeline_param_chan in call_inputs.items() - } - - # minimizes importer spec interface by not creating new - # inputspec/parameters if the same input is used multiple places - # in metadata - unique_name = reversed_call_inputs.get( - d, - utils.make_name_unique_by_adding_index( - METADATA_KEY, - list(call_inputs), - '-', - ), - ) - - call_inputs[unique_name] = d - component_inputs[unique_name] = structures.InputSpec( - type=d.channel_type) - - return placeholders.InputValuePlaceholder( - input_name=unique_name)._to_string() - - elif isinstance(d, dict): - # use this instead of list comprehension to ensure compiles are identical across Python versions - res = {} - for k, v in d.items(): - new_k = traverse_dict_and_create_metadata_inputs(k) - new_v = traverse_dict_and_create_metadata_inputs(v) - res[new_k] = new_v - return res - - elif isinstance(d, list): - return [traverse_dict_and_create_metadata_inputs(el) for el in d] - - elif isinstance(d, str): - # extract pipeline channels from f-strings, if any - pipeline_channels = pipeline_channel.extract_pipeline_channels_from_any( - d) - - # pass the channel back into the recursive function to create the placeholder, component inputs, and call inputs, then replace the channel with the placeholder - for channel in pipeline_channels: - input_placeholder = traverse_dict_and_create_metadata_inputs( - channel) - d = d.replace(channel.pattern, input_placeholder) - return d - - else: - return d - - metadata_with_placeholders = traverse_dict_and_create_metadata_inputs( - metadata) - - component_spec = structures.ComponentSpec( - name='importer', - implementation=structures.Implementation( - importer=structures.ImporterSpec( - artifact_uri=placeholders.InputValuePlaceholder( - URI_KEY)._to_string(), - schema_title=type_utils.create_bundled_artifact_type( - artifact_class.schema_title, artifact_class.schema_version), - schema_version=artifact_class.schema_version, - reimport=reimport, - metadata=metadata_with_placeholders)), - inputs={ - URI_KEY: structures.InputSpec(type='String'), - **component_inputs - }, - outputs={ - OUTPUT_KEY: - structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - artifact_class.schema_title, - artifact_class.schema_version)) - }, - ) - importer = importer_component.ImporterComponent( - component_spec=component_spec) - return importer(uri=artifact_uri, **call_inputs) diff --git a/sdk/python/kfp-dsl/kfp/dsl/kfp_config.py b/sdk/python/kfp-dsl/kfp/dsl/kfp_config.py deleted file mode 100644 index 798249ed85..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/kfp_config.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import configparser -import pathlib -from typing import Dict, Optional -import warnings - -_KFP_CONFIG_FILE = 'kfp_config.ini' - -_COMPONENTS_SECTION = 'Components' - - -class KFPConfig(): - """Class for managing KFP component configuration. - - The configuration is .ini file named `kfp_config.ini` that can be parsed by - Python's native configparser module. Currently, this class supports a single - `Components` section, which lists components as key-value pairs. The key is - the component name (i.e. the function name), and the value is the path to - the file containing this function. The path is usually relative from the - location of the configuration file, but absolute paths should also work. - - At runtime, the KFP v2 Executor, defined in executor_main.py, will look - for this configuration file in its current working directory. If found, - it will load its contents, and use this to find the file containing the - component to execute. - - Example of the file's contents: - - [Components] - my_component_1 = my_dir_1/my_component_1.py - my_component_2 = my_dir_2/my_component_2.py - ... - """ - - def __init__(self, config_directory: Optional[pathlib.Path] = None): - """Creates a KFPConfig object. - - Loads the config from an existing `kfp_config.ini` file if found. - - Args: - config_directory: Looks for a file named `kfp_config.ini` in this - directory. Defaults to the current directory. - """ - self._config_parser = configparser.ConfigParser() - # Preserve case for keys. - self._config_parser.optionxform = lambda x: x - - if config_directory is None: - self._config_filepath = pathlib.Path(_KFP_CONFIG_FILE) - else: - self._config_filepath = config_directory / _KFP_CONFIG_FILE - - try: - with open(str(self._config_filepath), 'r') as f: - self._config_parser.read_file(f) - except IOError: - warnings.warn('No existing KFP Config file found') - - if not self._config_parser.has_section(_COMPONENTS_SECTION): - self._config_parser.add_section(_COMPONENTS_SECTION) - - self._components = {} - - def add_component(self, function_name: str, path: pathlib.Path): - """Adds a KFP component. - - Args: - function_name: The name of the component function. - path: A path to the file containing the component. - """ - self._components[function_name] = str(path) - - def save(self): - """Writes out a KFP config file.""" - # Always write out components in alphabetical order for determinism, - # especially in tests. - for function_name in sorted(self._components.keys()): - self._config_parser[_COMPONENTS_SECTION][ - function_name] = self._components[function_name] - - with open(str(self._config_filepath), 'w') as f: - self._config_parser.write(f) - - def get_components(self) -> Dict[str, pathlib.Path]: - """Returns a list of known KFP components. - - Returns: - A dictionary from component name (function name) to a pathlib.Path - pointing to the Python file with this component's definition. - """ - return { - function_name: pathlib.Path(module_path) for function_name, - module_path in self._config_parser[_COMPONENTS_SECTION].items() - } diff --git a/sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py deleted file mode 100644 index 66616103fb..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/pipeline_channel.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definition of PipelineChannel.""" - -import abc -import contextlib -import dataclasses -import json -import re -from typing import Dict, List, Optional, Union - -from kfp.dsl.types import type_utils - - -@dataclasses.dataclass -class ConditionOperator: - """Represents a condition expression to be used in dsl.Condition(). - - Attributes: - operator: The operator of the condition. - left_operand: The left operand. - right_operand: The right operand. - """ - operator: str - left_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] - right_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] - - -# The string template used to generate the placeholder of a PipelineChannel. -_PIPELINE_CHANNEL_PLACEHOLDER_TEMPLATE = ( - '{{channel:task=%s;name=%s;type=%s;}}') -# The regex for parsing PipelineChannel placeholders from a string. -_PIPELINE_CHANNEL_PLACEHOLDER_REGEX = ( - r'{{channel:task=([\w\s_-]*);name=([\w\s_-]+);type=([\w\s{}":_-]*);}}') - - -class PipelineChannel(abc.ABC): - """Represents a future value that is passed between pipeline components. - - A PipelineChannel object can be used as a pipeline function argument so that - it will be a pipeline artifact or parameter that shows up in ML Pipelines - system UI. It can also represent an intermediate value passed between - components. - - Attributes: - name: The name of the pipeline channel. - channel_type: The type of the pipeline channel. - task_name: The name of the task that produces the pipeline channel. - None means it is not produced by any task, so if None, either user - constructs it directly (for providing an immediate value), or it is - a pipeline function argument. - pattern: The serialized string regex pattern this pipeline channel - created from. - """ - - @abc.abstractmethod - def __init__( - self, - name: str, - channel_type: Union[str, Dict], - task_name: Optional[str] = None, - ): - """Initializes a PipelineChannel instance. - - Args: - name: The name of the pipeline channel. The name will be sanitized - to be k8s compatible. - channel_type: The type of the pipeline channel. - task_name: Optional; The name of the task that produces the pipeline - channel. If provided, the task name will be sanitized to be k8s - compatible. - - Raises: - ValueError: If name or task_name contains invalid characters. - ValueError: If both task_name and value are set. - """ - valid_name_regex = r'^[A-Za-z][A-Za-z0-9\s_-]*$' - if not re.match(valid_name_regex, name): - raise ValueError( - f'Only letters, numbers, spaces, "_", and "-" are allowed in the name. Must begin with a letter. Got name: {name}' - ) - - self.name = name - self.channel_type = channel_type - # ensure value is None even if empty string or empty list/dict - # so that serialization and unserialization remain consistent - # (i.e. None => '' => None) - self.task_name = task_name or None - from kfp.dsl import pipeline_context - - default_pipeline = pipeline_context.Pipeline.get_default_pipeline() - if self.task_name is not None and default_pipeline is not None and default_pipeline.tasks: - self.task = pipeline_context.Pipeline.get_default_pipeline().tasks[ - self.task_name] - else: - self.task = None - - @property - def full_name(self) -> str: - """Unique name for the PipelineChannel.""" - return f'{self.task_name}-{self.name}' if self.task_name else self.name - - @property - def pattern(self) -> str: - """Unique pattern for the PipelineChannel.""" - return str(self) - - def __str__(self) -> str: - """String representation of the PipelineChannel. - - The string representation is a string identifier so we can mix - the PipelineChannel inline with other strings such as arguments. - For example, we can support: ['echo %s' % param] as the - container command and later a compiler can replace the - placeholder '{{pipeline_channel:task=%s;name=%s;type=%s}}' with - its own parameter identifier. - """ - task_name = self.task_name or '' - name = self.name - channel_type = self.channel_type or '' - if isinstance(channel_type, dict): - channel_type = json.dumps(channel_type) - return _PIPELINE_CHANNEL_PLACEHOLDER_TEMPLATE % (task_name, name, - channel_type) - - def __repr__(self) -> str: - """Representation of the PipelineChannel. - - We make repr return the placeholder string so that if someone - uses str()-based serialization of complex objects containing - `PipelineChannel`, it works properly. (e.g. str([1, 2, 3, - kfp.pipeline_channel.PipelineParameterChannel("aaa"), 4, 5, 6,])) - """ - return str(self) - - def __hash__(self) -> int: - """Returns the hash of a PipelineChannel.""" - return hash(self.pattern) - - def __eq__(self, other): - return ConditionOperator('==', self, other) - - def __ne__(self, other): - return ConditionOperator('!=', self, other) - - def __lt__(self, other): - return ConditionOperator('<', self, other) - - def __le__(self, other): - return ConditionOperator('<=', self, other) - - def __gt__(self, other): - return ConditionOperator('>', self, other) - - def __ge__(self, other): - return ConditionOperator('>=', self, other) - - -class PipelineParameterChannel(PipelineChannel): - """Represents a pipeline parameter channel. - - Attributes: - name: The name of the pipeline channel. - channel_type: The type of the pipeline channel. - task_name: The name of the task that produces the pipeline channel. - None means it is not produced by any task, so if None, either user - constructs it directly (for providing an immediate value), or it is a - pipeline function argument. - pattern: The serialized string regex pattern this pipeline channel created - from. - value: The actual value of the pipeline channel. If provided, the - pipeline channel is "resolved" immediately. - """ - - def __init__( - self, - name: str, - channel_type: Union[str, Dict], - task_name: Optional[str] = None, - value: Optional[type_utils.PARAMETER_TYPES] = None, - ): - """Initializes a PipelineArtifactChannel instance. - - Args: - name: The name of the pipeline channel. - channel_type: The type of the pipeline channel. - task_name: Optional; The name of the task that produces the pipeline - channel. - value: Optional; The actual value of the pipeline channel. - - Raises: - ValueError: If name or task_name contains invalid characters. - ValueError: If both task_name and value are set. - TypeError: If the channel type is not a parameter type. - """ - if task_name and value: - raise ValueError('task_name and value cannot be both set.') - - if not type_utils.is_parameter_type(channel_type): - raise TypeError(f'{channel_type} is not a parameter type.') - - self.value = value - - super(PipelineParameterChannel, self).__init__( - name=name, - channel_type=channel_type, - task_name=task_name, - ) - - -class PipelineArtifactChannel(PipelineChannel): - """Represents a pipeline artifact channel. - - Attributes: - name: The name of the pipeline channel. - channel_type: The type of the pipeline channel. - task_name: The name of the task that produces the pipeline channel. - A pipeline artifact channel is always produced by some task. - pattern: The serialized string regex pattern this pipeline channel created - from. - """ - - def __init__( - self, - name: str, - channel_type: Union[str, Dict], - task_name: Optional[str], - is_artifact_list: bool, - ): - """Initializes a PipelineArtifactChannel instance. - - Args: - name: The name of the pipeline channel. - channel_type: The type of the pipeline channel. - task_name: Optional; the name of the task that produces the pipeline - channel. - - Raises: - ValueError: If name or task_name contains invalid characters. - TypeError: If the channel type is not an artifact type. - """ - if type_utils.is_parameter_type(channel_type): - raise TypeError(f'{channel_type} is not an artifact type.') - - self.is_artifact_list = is_artifact_list - - super(PipelineArtifactChannel, self).__init__( - name=name, - channel_type=channel_type, - task_name=task_name, - ) - - -def create_pipeline_channel( - name: str, - channel_type: Union[str, Dict], - task_name: Optional[str] = None, - value: Optional[type_utils.PARAMETER_TYPES] = None, - is_artifact_list: bool = False, -) -> PipelineChannel: - """Creates a PipelineChannel object. - - Args: - name: The name of the channel. - channel_type: The type of the channel, which decides whether it is an - PipelineParameterChannel or PipelineArtifactChannel - task_name: Optional; the task that produced the channel. - value: Optional; the realized value for a channel. - - Returns: - A PipelineParameterChannel or PipelineArtifactChannel object. - """ - if type_utils.is_parameter_type(channel_type): - return PipelineParameterChannel( - name=name, - channel_type=channel_type, - task_name=task_name, - value=value, - ) - else: - return PipelineArtifactChannel( - name=name, - channel_type=channel_type, - task_name=task_name, - is_artifact_list=is_artifact_list, - ) - - -def extract_pipeline_channels_from_string( - payload: str) -> List[PipelineChannel]: - """Extracts a list of PipelineChannel instances from the payload string. - - Note: this function removes all duplicate matches. - - Args: - payload: A string that may contain serialized PipelineChannels. - - Returns: - A list of PipelineChannels found from the payload. - """ - matches = re.findall(_PIPELINE_CHANNEL_PLACEHOLDER_REGEX, payload) - unique_channels = set() - for match in matches: - task_name, name, channel_type = match - - # channel_type could be either a string (e.g. "Integer") or a dictionary - # (e.g.: {"custom_type": {"custom_property": "some_value"}}). - # Try loading it into dictionary, if failed, it means channel_type is a - # string. - with contextlib.suppress(json.JSONDecodeError): - channel_type = json.loads(channel_type) - - if type_utils.is_parameter_type(channel_type): - pipeline_channel = PipelineParameterChannel( - name=name, - channel_type=channel_type, - task_name=task_name, - ) - else: - pipeline_channel = PipelineArtifactChannel( - name=name, - channel_type=channel_type, - task_name=task_name, - # currently no support for getting the index from a list of artifacts (e.g., my_datasets[0].uri), so this will always be False until accessing a single artifact element is supported - is_artifact_list=False, - ) - unique_channels.add(pipeline_channel) - - return list(unique_channels) - - -def extract_pipeline_channels_from_any( - payload: Union[PipelineChannel, str, list, tuple, dict] -) -> List[PipelineChannel]: - """Recursively extract PipelineChannels from any object or list of objects. - - Args: - payload: An object that contains serialized PipelineChannels or k8 - definition objects. - - Returns: - A list of PipelineChannels found from the payload. - """ - if not payload: - return [] - - if isinstance(payload, PipelineChannel): - return [payload] - - if isinstance(payload, str): - return list(set(extract_pipeline_channels_from_string(payload))) - - if isinstance(payload, (list, tuple)): - pipeline_channels = [] - for item in payload: - pipeline_channels += extract_pipeline_channels_from_any(item) - return list(set(pipeline_channels)) - - if isinstance(payload, dict): - pipeline_channels = [] - for key, value in payload.items(): - pipeline_channels += extract_pipeline_channels_from_any(key) - pipeline_channels += extract_pipeline_channels_from_any(value) - return list(set(pipeline_channels)) - - # TODO(chensun): extract PipelineChannel from v2 container spec? - - return [] diff --git a/sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py deleted file mode 100644 index c1304c39ba..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/pipeline_context.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definition for Pipeline.""" - -import functools -from typing import Callable, Optional - -from kfp.dsl import component_factory -from kfp.dsl import pipeline_task -from kfp.dsl import tasks_group -from kfp.dsl import utils - - -def pipeline(func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None, - pipeline_root: Optional[str] = None, - display_name: Optional[str] = None) -> Callable: - """Decorator used to construct a pipeline. - - Example - :: - - @pipeline( - name='my-pipeline', - description='My ML Pipeline.' - pipeline_root='gs://my-bucket/my-output-path' - ) - def my_pipeline(a: str, b: int): - ... - - Args: - func: The Python function that defines a pipeline. - name: The pipeline name. Defaults to a sanitized version of the - decorated function name. - description: A human-readable description of the pipeline. - pipeline_root: The root directory from which to read input and output - parameters and artifacts. - display_name: A human-readable name for the pipeline. - """ - if func is None: - return functools.partial( - pipeline, - name=name, - description=description, - pipeline_root=pipeline_root, - display_name=display_name, - ) - - if pipeline_root: - func.pipeline_root = pipeline_root - - return component_factory.create_graph_component_from_func( - func, - name=name, - description=description, - display_name=display_name, - ) - - -class Pipeline: - """A pipeline contains a list of tasks. - - This class is not supposed to be used by pipeline authors since pipeline - authors can use pipeline functions (decorated with @pipeline) to reference - their pipelines. - This class is useful for implementing a compiler. For example, the compiler - can use the following to get the pipeline object and its tasks: - - Example: - :: - - with Pipeline() as p: - pipeline_func(*args_list) - - traverse(p.tasks) - - Attributes: - name: - tasks: - groups: - """ - - # _default_pipeline is set when the compiler runs "with Pipeline()" - _default_pipeline = None - - @staticmethod - def get_default_pipeline(): - """Gets the default pipeline.""" - return Pipeline._default_pipeline - - def __init__(self, name: str): - """Creates a new instance of Pipeline. - - Args: - name: The name of the pipeline. - """ - self.name = name - self.tasks = {} - # Add the root group. - self.groups = [ - tasks_group.TasksGroup( - group_type=tasks_group.TasksGroupType.PIPELINE, - name=name, - is_root=True) - ] - self._group_id = 0 - - def __enter__(self): - - if Pipeline._default_pipeline: - raise Exception('Nested pipelines are not allowed.') - - Pipeline._default_pipeline = self - - def register_task_and_generate_id(task: pipeline_task.PipelineTask): - return self.add_task( - task=task, - add_to_group=not getattr(task, 'is_exit_handler', False)) - - self._old_register_task_handler = ( - pipeline_task.PipelineTask._register_task_handler) - pipeline_task.PipelineTask._register_task_handler = ( - register_task_and_generate_id) - return self - - def __exit__(self, *unused_args): - - Pipeline._default_pipeline = None - pipeline_task.PipelineTask._register_task_handler = ( - self._old_register_task_handler) - - def add_task( - self, - task: pipeline_task.PipelineTask, - add_to_group: bool, - ) -> str: - """Adds a new task. - - Args: - task: A PipelineTask instance. - add_to_group: Whether add the task into the current group. Expect - True for all tasks expect for exit handler. - - Returns: - A unique task name. - """ - # Sanitizing the task name. - # Technically this could be delayed to the compilation stage, but string - # serialization of PipelineChannels make unsanitized names problematic. - task_name = utils.maybe_rename_for_k8s(task.component_spec.name) - #If there is an existing task with this name then generate a new name. - task_name = utils.make_name_unique_by_adding_index( - task_name, list(self.tasks.keys()), '-') - if task_name == '': - task_name = utils.make_name_unique_by_adding_index( - 'task', list(self.tasks.keys()), '-') - - self.tasks[task_name] = task - if add_to_group: - task.parent_task_group = self.groups[-1] - self.groups[-1].tasks.append(task) - - return task_name - - def push_tasks_group(self, group: 'tasks_group.TasksGroup'): - """Pushes a TasksGroup into the stack. - - Args: - group: A TasksGroup. Typically it is one of ExitHandler, Condition, - and ParallelFor. - """ - self.groups[-1].groups.append(group) - self.groups.append(group) - - def pop_tasks_group(self): - """Removes the current TasksGroup from the stack.""" - del self.groups[-1] - - def remove_task_from_groups(self, task: pipeline_task.PipelineTask): - """Removes a task from the pipeline. - - This is useful for excluding exit handler from the pipeline. - """ - for group in self.groups: - group.remove_task_recursive(task) - - def get_next_group_id(self) -> str: - """Gets the next id for a new group.""" - self._group_id += 1 - return str(self._group_id) diff --git a/sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py b/sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py deleted file mode 100644 index 26081f75e1..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/pipeline_task.py +++ /dev/null @@ -1,686 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Pipeline task class and operations.""" - -import copy -import inspect -import itertools -import re -from typing import Any, Dict, List, Mapping, Optional, Union -import warnings - -import kfp -from kfp.dsl import constants -from kfp.dsl import pipeline_channel -from kfp.dsl import placeholders -from kfp.dsl import structures -from kfp.dsl import utils -from kfp.dsl.types import type_utils - -_register_task_handler = lambda task: utils.maybe_rename_for_k8s( - task.component_spec.name) - - -class PipelineTask: - """Represents a pipeline task (instantiated component). - - **Note:** ``PipelineTask`` should not be constructed by pipeline authors directly, but instead obtained via an instantiated component (see example). - - Replaces ``ContainerOp`` from ``kfp`` v1. Holds operations available on a task object, such as - ``.after()``, ``.set_memory_limit()``, ``.enable_caching()``, etc. - - Args: - component_spec: The component definition. - args: The dictionary of arguments on which the component was called to instantiate this task. - - Example: - :: - - @dsl.component - def identity(message: str) -> str: - return message - - @dsl.pipeline(name='my_pipeline') - def my_pipeline(): - # task is an instance of PipelineTask - task = identity(message='my string') - """ - _register_task_handler = _register_task_handler - - # Fallback behavior for compiling a component. This should be overriden by - # pipeline `register_task_and_generate_id` if compiling a pipeline (more - # than one component). - - def __init__( - self, - component_spec: structures.ComponentSpec, - args: Mapping[str, Any], - ): - """Initilizes a PipelineTask instance.""" - # import within __init__ to avoid circular import - from kfp.dsl.tasks_group import TasksGroup - - self.parent_task_group: Union[None, TasksGroup] = None - args = args or {} - - for input_name, argument_value in args.items(): - - if input_name not in component_spec.inputs: - raise ValueError( - f'Component {component_spec.name!r} got an unexpected input:' - f' {input_name!r}.') - - input_spec = component_spec.inputs[input_name] - - type_utils.verify_type_compatibility( - given_value=argument_value, - expected_spec=input_spec, - error_message_prefix=( - f'Incompatible argument passed to the input ' - f'{input_name!r} of component {component_spec.name!r}: '), - raise_on_error=kfp.TYPE_CHECK, - ) - - self.component_spec = component_spec - - self._task_spec = structures.TaskSpec( - name=self._register_task_handler(), - inputs=dict(args.items()), - dependent_tasks=[], - component_ref=component_spec.name, - enable_caching=True) - self._run_after: List[str] = [] - - self.importer_spec = None - self.container_spec = None - self.pipeline_spec = None - self._ignore_upstream_failure_tag = False - # platform_config for this primitive task; empty if task is for a graph component - self.platform_config = {} - - def validate_placeholder_types( - component_spec: structures.ComponentSpec) -> None: - inputs_dict = component_spec.inputs or {} - outputs_dict = component_spec.outputs or {} - for arg in itertools.chain( - (component_spec.implementation.container.command or []), - (component_spec.implementation.container.args or [])): - check_primitive_placeholder_is_used_for_correct_io_type( - inputs_dict, outputs_dict, arg) - - if component_spec.implementation.container is not None: - validate_placeholder_types(component_spec) - self.container_spec = self._extract_container_spec_and_convert_placeholders( - component_spec=component_spec) - elif component_spec.implementation.importer is not None: - self.importer_spec = component_spec.implementation.importer - self.importer_spec.artifact_uri = args['uri'] - else: - self.pipeline_spec = self.component_spec.implementation.graph - - self._outputs = { - output_name: pipeline_channel.create_pipeline_channel( - name=output_name, - channel_type=output_spec.type, - task_name=self._task_spec.name, - is_artifact_list=output_spec.is_artifact_list, - ) for output_name, output_spec in ( - component_spec.outputs or {}).items() - } - - self._inputs = args - - self._channel_inputs = [ - value for _, value in args.items() - if isinstance(value, pipeline_channel.PipelineChannel) - ] + pipeline_channel.extract_pipeline_channels_from_any([ - value for _, value in args.items() - if not isinstance(value, pipeline_channel.PipelineChannel) - ]) - - @property - def platform_spec(self) -> 'pipeline_spec_pb2.PlatformSpec': - """PlatformSpec for all tasks in the pipeline as task. - - Only for use on tasks created from GraphComponents. - """ - if self.pipeline_spec: - return self.component_spec.platform_spec - - # can only create primitive task platform spec at compile-time, since the executor label is not known until then - raise ValueError( - f'Can only access {".platform_spec"!r} property on a tasks created from pipelines. Use {".platform_config"!r} for tasks created from primitive components.' - ) - - @property - def name(self) -> str: - """The name of the task. - - Unique within its parent group. - """ - return self._task_spec.name - - @property - def inputs( - self - ) -> List[Union[type_utils.PARAMETER_TYPES, - pipeline_channel.PipelineChannel]]: - """The list of actual inputs passed to the task.""" - return self._inputs - - @property - def channel_inputs(self) -> List[pipeline_channel.PipelineChannel]: - """The list of all channel inputs passed to the task. - - :meta private: - """ - return self._channel_inputs - - @property - def output(self) -> pipeline_channel.PipelineChannel: - """The single output of the task. - - Used when a task has exactly one output parameter. - """ - if len(self._outputs) != 1: - raise AttributeError( - 'The task has multiple outputs. Please reference the output by its name.' - ) - return list(self._outputs.values())[0] - - @property - def outputs(self) -> Mapping[str, pipeline_channel.PipelineChannel]: - """The dictionary of outputs of the task. - - Used when a task has more the one output or uses an - ``OutputPath`` or ``Output[Artifact]`` type annotation. - """ - return self._outputs - - @property - def dependent_tasks(self) -> List[str]: - """A list of the dependent task names.""" - return self._task_spec.dependent_tasks - - def _extract_container_spec_and_convert_placeholders( - self, component_spec: structures.ComponentSpec - ) -> structures.ContainerSpecImplementation: - """Extracts a ContainerSpec from a ComponentSpec and converts - placeholder objects to strings. - - Args: - component_spec: The component definition. - """ - container_spec = copy.deepcopy(component_spec.implementation.container) - if container_spec is None: - raise ValueError( - '_extract_container_spec_and_convert_placeholders used incorrectly. ComponentSpec.implementation.container is None.' - ) - container_spec.command = [ - placeholders.convert_command_line_element_to_string(e) - for e in container_spec.command or [] - ] - container_spec.args = [ - placeholders.convert_command_line_element_to_string(e) - for e in container_spec.args or [] - ] - return container_spec - - def set_caching_options(self, enable_caching: bool) -> 'PipelineTask': - """Sets caching options for the task. - - Args: - enable_caching: Whether to enable caching. - - Returns: - Self return to allow chained setting calls. - """ - self._task_spec.enable_caching = enable_caching - return self - - def _ensure_container_spec_exists(self) -> None: - """Ensures that the task has a container spec.""" - caller_method_name = inspect.stack()[1][3] - - if self.container_spec is None: - raise ValueError( - f'{caller_method_name} can only be used on single-step components, not pipelines used as components, or special components like importers.' - ) - - def _validate_cpu_request_limit(self, cpu: str) -> float: - """Validates cpu request/limit string and converts to its numeric - value. - - Args: - cpu: CPU requests or limits. This string should be a number or a - number followed by an "m" to indicate millicores (1/1000). For - more information, see `Specify a CPU Request and a CPU Limit - `_. - - Raises: - ValueError if the cpu request/limit string value is invalid. - - Returns: - The numeric value (float) of the cpu request/limit. - """ - if re.match(r'([0-9]*[.])?[0-9]+m?$', cpu) is None: - raise ValueError( - 'Invalid cpu string. Should be float or integer, or integer' - ' followed by "m".') - - return float(cpu[:-1]) / 1000 if cpu.endswith('m') else float(cpu) - - def set_cpu_request(self, cpu: str) -> 'PipelineTask': - """Sets CPU request (minimum) for the task. - - Args: - cpu: Minimum CPU requests required. This string should be a number - or a number followed by an "m" to indicate millicores (1/1000). - For more information, see `Specify a CPU Request and a CPU Limit - `_. - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - cpu = self._validate_cpu_request_limit(cpu) - - if self.container_spec.resources is not None: - self.container_spec.resources.cpu_request = cpu - else: - self.container_spec.resources = structures.ResourceSpec( - cpu_request=cpu) - - return self - - def set_cpu_limit(self, cpu: str) -> 'PipelineTask': - """Sets CPU limit (maximum) for the task. - - Args: - cpu: Maximum CPU requests allowed. This string should be a number - or a number followed by an "m" to indicate millicores (1/1000). - For more information, see `Specify a CPU Request and a CPU Limit - `_. - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - cpu = self._validate_cpu_request_limit(cpu) - - if self.container_spec.resources is not None: - self.container_spec.resources.cpu_limit = cpu - else: - self.container_spec.resources = structures.ResourceSpec( - cpu_limit=cpu) - - return self - - def set_accelerator_limit(self, limit: int) -> 'PipelineTask': - """Sets accelerator limit (maximum) for the task. Only applies if - accelerator type is also set via .set_accelerator_type(). - - Args: - limit: Maximum number of accelerators allowed. - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - if isinstance(limit, str): - if re.match(r'[1-9]\d*$', limit) is None: - raise ValueError(f'{"limit"!r} must be positive integer.') - limit = int(limit) - - if self.container_spec.resources is not None: - self.container_spec.resources.accelerator_count = limit - else: - self.container_spec.resources = structures.ResourceSpec( - accelerator_count=limit) - - return self - - def set_gpu_limit(self, gpu: str) -> 'PipelineTask': - """Sets GPU limit (maximum) for the task. Only applies if accelerator - type is also set via .add_accelerator_type(). - - Args: - gpu: The maximum GPU reuqests allowed. This string should be a positive integer number of GPUs. - - Returns: - Self return to allow chained setting calls. - - :meta private: - """ - warnings.warn( - f'{self.set_gpu_limit.__name__!r} is deprecated. Please use {self.set_accelerator_limit.__name__!r} instead.', - category=DeprecationWarning) - return self.set_accelerator_limit(gpu) - - def _validate_memory_request_limit(self, memory: str) -> float: - """Validates memory request/limit string and converts to its numeric - value. - - Args: - memory: Memory requests or limits. This string should be a number or - a number followed by one of "E", "Ei", "P", "Pi", "T", "Ti", "G", - "Gi", "M", "Mi", "K", or "Ki". - - Raises: - ValueError if the memory request/limit string value is invalid. - - Returns: - The numeric value (float) of the memory request/limit. - """ - if re.match(r'^[0-9]+(E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki){0,1}$', - memory) is None: - raise ValueError( - 'Invalid memory string. Should be a number or a number ' - 'followed by one of "E", "Ei", "P", "Pi", "T", "Ti", "G", ' - '"Gi", "M", "Mi", "K", "Ki".') - - if memory.endswith('E'): - memory = float(memory[:-1]) * constants._E / constants._G - elif memory.endswith('Ei'): - memory = float(memory[:-2]) * constants._EI / constants._G - elif memory.endswith('P'): - memory = float(memory[:-1]) * constants._P / constants._G - elif memory.endswith('Pi'): - memory = float(memory[:-2]) * constants._PI / constants._G - elif memory.endswith('T'): - memory = float(memory[:-1]) * constants._T / constants._G - elif memory.endswith('Ti'): - memory = float(memory[:-2]) * constants._TI / constants._G - elif memory.endswith('G'): - memory = float(memory[:-1]) - elif memory.endswith('Gi'): - memory = float(memory[:-2]) * constants._GI / constants._G - elif memory.endswith('M'): - memory = float(memory[:-1]) * constants._M / constants._G - elif memory.endswith('Mi'): - memory = float(memory[:-2]) * constants._MI / constants._G - elif memory.endswith('K'): - memory = float(memory[:-1]) * constants._K / constants._G - elif memory.endswith('Ki'): - memory = float(memory[:-2]) * constants._KI / constants._G - else: - # By default interpret as a plain integer, in the unit of Bytes. - memory = float(memory) / constants._G - - return memory - - def set_memory_request(self, memory: str) -> 'PipelineTask': - """Sets memory request (minimum) for the task. - - Args: - memory: The minimum memory requests required. This string should be - a number or a number followed by one of "E", "Ei", "P", "Pi", - "T", "Ti", "G", "Gi", "M", "Mi", "K", or "Ki". - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - memory = self._validate_memory_request_limit(memory) - - if self.container_spec.resources is not None: - self.container_spec.resources.memory_request = memory - else: - self.container_spec.resources = structures.ResourceSpec( - memory_request=memory) - - return self - - def set_memory_limit(self, memory: str) -> 'PipelineTask': - """Sets memory limit (maximum) for the task. - - Args: - memory: The maximum memory requests allowed. This string should be - a number or a number followed by one of "E", "Ei", "P", "Pi", - "T", "Ti", "G", "Gi", "M", "Mi", "K", or "Ki". - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - memory = self._validate_memory_request_limit(memory) - - if self.container_spec.resources is not None: - self.container_spec.resources.memory_limit = memory - else: - self.container_spec.resources = structures.ResourceSpec( - memory_limit=memory) - - return self - - def set_retry(self, - num_retries: int, - backoff_duration: Optional[str] = None, - backoff_factor: Optional[float] = None, - backoff_max_duration: Optional[str] = None) -> 'PipelineTask': - """Sets task retry parameters. - - Args: - num_retries : Number of times to retry on failure. - backoff_duration: Number of seconds to wait before triggering a retry. Defaults to ``'0s'`` (immediate retry). - backoff_factor: Exponential backoff factor applied to ``backoff_duration``. For example, if ``backoff_duration="60"`` (60 seconds) and ``backoff_factor=2``, the first retry will happen after 60 seconds, then again after 120, 240, and so on. Defaults to ``2.0``. - backoff_max_duration: Maximum duration during which the task will be retried. Maximum duration is 1 hour (3600s). Defaults to ``'3600s'``. - - Returns: - Self return to allow chained setting calls. - """ - self._task_spec.retry_policy = structures.RetryPolicy( - max_retry_count=num_retries, - backoff_duration=backoff_duration, - backoff_factor=backoff_factor, - backoff_max_duration=backoff_max_duration, - ) - return self - - def add_node_selector_constraint(self, accelerator: str) -> 'PipelineTask': - """Sets accelerator type to use when executing this task. - - Args: - accelerator: The name of the accelerator, such as ``'NVIDIA_TESLA_K80'``, ``'TPU_V3'``, ``'nvidia.com/gpu'`` or ``'cloud-tpus.google.com/v3'``. - - Returns: - Self return to allow chained setting calls. - """ - warnings.warn( - f'{self.add_node_selector_constraint.__name__!r} is deprecated. Please use {self.set_accelerator_type.__name__!r} instead.', - category=DeprecationWarning) - return self.set_accelerator_type(accelerator) - - def set_accelerator_type(self, accelerator: str) -> 'PipelineTask': - """Sets accelerator type to use when executing this task. - - Args: - accelerator: The name of the accelerator, such as ``'NVIDIA_TESLA_K80'``, ``'TPU_V3'``, ``'nvidia.com/gpu'`` or ``'cloud-tpus.google.com/v3'``. - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - if self.container_spec.resources is not None: - self.container_spec.resources.accelerator_type = accelerator - if self.container_spec.resources.accelerator_count is None: - self.container_spec.resources.accelerator_count = 1 - else: - self.container_spec.resources = structures.ResourceSpec( - accelerator_count=1, accelerator_type=accelerator) - - return self - - def set_display_name(self, name: str) -> 'PipelineTask': - """Sets display name for the task. - - Args: - name: Display name. - - Returns: - Self return to allow chained setting calls. - """ - self._task_spec.display_name = name - return self - - def set_env_variable(self, name: str, value: str) -> 'PipelineTask': - """Sets environment variable for the task. - - Args: - name: Environment variable name. - value: Environment variable value. - - Returns: - Self return to allow chained setting calls. - """ - self._ensure_container_spec_exists() - - if self.container_spec.env is not None: - self.container_spec.env[name] = value - else: - self.container_spec.env = {name: value} - return self - - def after(self, *tasks) -> 'PipelineTask': - """Specifies an explicit dependency on other tasks by requiring this - task be executed after other tasks finish completion. - - Args: - *tasks: Tasks after which this task should be executed. - - Returns: - Self return to allow chained setting calls. - - Example: - :: - - @dsl.pipeline(name='my-pipeline') - def my_pipeline(): - task1 = my_component(text='1st task') - task2 = my_component(text='2nd task').after(task1) - """ - for task in tasks: - self._run_after.append(task.name) - self._task_spec.dependent_tasks.append(task.name) - return self - - def ignore_upstream_failure(self) -> 'PipelineTask': - """If called, the pipeline task will run when any specified upstream - tasks complete, even if unsuccessful. - - This method effectively turns the caller task into an exit task - if the caller task has upstream dependencies. - - If the task has no upstream tasks, either via data exchange or an explicit dependency via .after(), this method has no effect. - - Returns: - Self return to allow chained setting calls. - - Example: - :: - - @dsl.pipeline() - def my_pipeline(text: str = 'message'): - task = fail_op(message=text) - clean_up_task = print_op( - message=task.output).ignore_upstream_failure() - """ - - for input_spec_name, input_spec in (self.component_spec.inputs or - {}).items(): - argument_value = self._inputs[input_spec_name] - if (isinstance(argument_value, pipeline_channel.PipelineChannel) - ) and (not input_spec.optional) and (argument_value.task_name - is not None): - raise ValueError( - f'Tasks can only use .ignore_upstream_failure() if all input parameters that accept arguments created by an upstream task have a default value, in case the upstream task fails to produce its output. Input parameter task {self.name!r}`s {input_spec_name!r} argument is an output of an upstream task {argument_value.task_name!r}, but {input_spec_name!r} has no default value.' - ) - - self._ignore_upstream_failure_tag = True - - return self - - -# TODO: this function should ideally be in the function kfp.dsl.structures.check_placeholder_references_valid_io_name, which does something similar, but this causes the exception to be raised at component definition time, rather than compile time. This would break tests that load v1 component YAML, even though that YAML is invalid. -def check_primitive_placeholder_is_used_for_correct_io_type( - inputs_dict: Dict[str, structures.InputSpec], - outputs_dict: Dict[str, structures.OutputSpec], - arg: Union[placeholders.CommandLineElement, Any], -): - """Validates input/output placeholders refer to an input/output with an - appropriate type for the placeholder. This should only apply to components - loaded from v1 component YAML, where the YAML is authored directly. For v2 - YAML, this is encapsulated in the DSL logic which does not permit writing - incorrect placeholders. - - Args: - inputs_dict: The existing input names. - outputs_dict: The existing output names. - arg: The command line element, which may be a placeholder. - """ - - if isinstance(arg, placeholders.InputValuePlaceholder): - input_name = arg.input_name - if not type_utils.is_parameter_type(inputs_dict[input_name].type): - raise TypeError( - f'Input "{input_name}" with type ' - f'"{inputs_dict[input_name].type}" cannot be paired with ' - 'InputValuePlaceholder.') - - elif isinstance( - arg, - (placeholders.InputUriPlaceholder, placeholders.InputPathPlaceholder)): - input_name = arg.input_name - if type_utils.is_parameter_type(inputs_dict[input_name].type): - raise TypeError( - f'Input "{input_name}" with type ' - f'"{inputs_dict[input_name].type}" cannot be paired with ' - f'{arg.__class__.__name__}.') - - elif isinstance(arg, placeholders.OutputUriPlaceholder): - output_name = arg.output_name - if type_utils.is_parameter_type(outputs_dict[output_name].type): - raise TypeError( - f'Output "{output_name}" with type ' - f'"{outputs_dict[output_name].type}" cannot be paired with ' - f'{arg.__class__.__name__}.') - elif isinstance(arg, placeholders.IfPresentPlaceholder): - all_normalized_args: List[placeholders.CommandLineElement] = [] - if arg.then is None: - pass - elif isinstance(arg.then, list): - all_normalized_args.extend(arg.then) - else: - all_normalized_args.append(arg.then) - - if arg.else_ is None: - pass - elif isinstance(arg.else_, list): - all_normalized_args.extend(arg.else_) - else: - all_normalized_args.append(arg.else_) - - for arg in all_normalized_args: - check_primitive_placeholder_is_used_for_correct_io_type( - inputs_dict, outputs_dict, arg) - elif isinstance(arg, placeholders.ConcatPlaceholder): - for arg in arg.items: - check_primitive_placeholder_is_used_for_correct_io_type( - inputs_dict, outputs_dict, arg) diff --git a/sdk/python/kfp-dsl/kfp/dsl/placeholders.py b/sdk/python/kfp-dsl/kfp/dsl/placeholders.py deleted file mode 100644 index 39a2617cff..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/placeholders.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains data structures and functions for handling input and output -placeholders.""" - -import abc -import json -from typing import Any, Dict, List, Optional, Union - -from kfp.dsl import utils -from kfp.dsl.types import type_utils - - -class Placeholder(abc.ABC): - - @abc.abstractmethod - def _to_string(self) -> str: - raise NotImplementedError - - def __str__(self) -> str: - """Enables use of placeholders in f-strings. - - To be overridden by container placeholders ConcatPlaceholder and - IfPresentPlaceholder, which cannot be used in an f-string. - """ - return self._to_string() - - def __eq__(self, other: Any) -> bool: - """Used for comparing placeholders in tests.""" - return isinstance(other, - self.__class__) and self.__dict__ == other.__dict__ - - -class ExecutorInputPlaceholder(Placeholder): - - def _to_string(self) -> str: - return '{{$}}' - - -class InputValuePlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.input_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.inputs.parameters['{self.input_name}']}}}}" - - -class InputListOfArtifactsPlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.input_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.inputs.artifacts['{self.input_name}']}}}}" - - def __getattribute__(self, name: str) -> Any: - if name in {'name', 'uri', 'metadata', 'path'}: - raise AttributeError( - f'Cannot access an attribute on a list of artifacts in a Custom Container Component. Found reference to attribute {name!r} on {self.input_name!r}. Please pass the whole list of artifacts only.' - ) - else: - return object.__getattribute__(self, name) - - def __getitem__(self, k: int) -> None: - raise KeyError( - f'Cannot access individual artifacts in a list of artifacts. Found access to element {k} on {self.input_name!r}. Please pass the whole list of artifacts only.' - ) - - -class OutputListOfArtifactsPlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.output_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.outputs.artifacts['{self.output_name}']}}}}" - - def __getattribute__(self, name: str) -> Any: - if name in {'name', 'uri', 'metadata', 'path'}: - raise AttributeError( - f'Cannot access an attribute on a list of artifacts in a Custom Container Component. Found reference to attribute {name!r} on {self.output_name!r}. Please pass the whole list of artifacts only.' - ) - else: - return object.__getattribute__(self, name) - - def __getitem__(self, k: int) -> None: - raise KeyError( - f'Cannot access individual artifacts in a list of artifacts. Found access to element {k} on {self.output_name!r}. Please pass the whole list of artifacts only.' - ) - - -class InputPathPlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.input_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.inputs.artifacts['{self.input_name}'].path}}}}" - - -class InputUriPlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.input_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.inputs.artifacts['{self.input_name}'].uri}}}}" - - -class InputMetadataPlaceholder(Placeholder): - - def __init__(self, input_name: str) -> None: - self.input_name = input_name - - def _to_string(self) -> str: - return f"{{{{$.inputs.artifacts['{self.input_name}'].metadata}}}}" - - def __getitem__(self, key: str) -> str: - return f"{{{{$.inputs.artifacts['{self.input_name}'].metadata['{key}']}}}}" - - -class OutputParameterPlaceholder(Placeholder): - - def __init__(self, output_name: str) -> None: - self.output_name = output_name - - def _to_string(self) -> str: - return f"{{{{$.outputs.parameters['{self.output_name}'].output_file}}}}" - - -class OutputPathPlaceholder(Placeholder): - - def __init__(self, output_name: str) -> None: - self.output_name = output_name - - def _to_string(self) -> str: - return f"{{{{$.outputs.artifacts['{self.output_name}'].path}}}}" - - -class OutputUriPlaceholder(Placeholder): - - def __init__(self, output_name: str) -> None: - self.output_name = output_name - - def _to_string(self) -> str: - return f"{{{{$.outputs.artifacts['{self.output_name}'].uri}}}}" - - -class OutputMetadataPlaceholder(Placeholder): - - def __init__(self, output_name: str) -> None: - self.output_name = output_name - - def _to_string(self) -> str: - return f"{{{{$.outputs.artifacts['{self.output_name}'].metadata}}}}" - - def __getitem__(self, key: str) -> str: - return f"{{{{$.outputs.artifacts['{self.output_name}'].metadata['{key}']}}}}" - - -class ConcatPlaceholder(Placeholder): - """Placeholder for concatenating multiple strings. May contain other - placeholders. - - Args: - items: Elements to concatenate. - - Examples: - :: - - @container_component - def container_with_concat_placeholder(text1: str, text2: Output[Dataset], - output_path: OutputPath(str)): - return ContainerSpec( - image='python:3.7', - command=[ - 'my_program', - ConcatPlaceholder(['prefix-', text1, text2.uri]) - ], - args=['--output_path', output_path] - ) - """ - - def __init__(self, items: List['CommandLineElement']) -> None: - for item in items: - if isinstance(item, IfPresentPlaceholder): - item._validate_then_and_else_are_only_single_element() - self.items = items - - def _to_dict(self) -> Dict[str, Any]: - return { - 'Concat': [ - convert_command_line_element_to_string_or_struct(item) - for item in self.items - ] - } - - def _to_string(self) -> str: - return json.dumps(self._to_dict()) - - def __str__(self) -> str: - raise ValueError( - f'Cannot use {self.__class__.__name__} in an f-string.') - - -class IfPresentPlaceholder(Placeholder): - """Placeholder for handling cases where an input may or may not be passed. - May contain other placeholders. - - Args: - input_name: Name of the input/output. - then: If the input/output specified in name is present, the command-line argument will be replaced at run-time by the value of then. - else_: If the input/output specified in name is not present, the command-line argument will be replaced at run-time by the value of else_. - - Examples: - :: - - @container_component - def container_with_if_placeholder(output_path: OutputPath(str), - dataset: Output[Dataset], - optional_input: str = 'default'): - return ContainerSpec( - image='python:3.7', - command=[ - 'my_program', - IfPresentPlaceholder( - input_name='optional_input', - then=[optional_input], - else_=['no_input']), '--dataset', - IfPresentPlaceholder( - input_name='optional_input', then=[dataset.uri], else_=['no_dataset']) - ], - args=['--output_path', output_path] - ) - """ - - def __init__( - self, - input_name: str, - then: Union['CommandLineElement', List['CommandLineElement']], - else_: Optional[Union['CommandLineElement', - List['CommandLineElement']]] = None, - ) -> None: - self.input_name = input_name - self.then = then - self.else_ = else_ - - def _validate_then_and_else_are_only_single_element(self) -> None: - """Rercursively validate that then and else contain only a single - element. - - This method should only be called by a ConcatPlaceholder, which - cannot have an IfPresentPlaceholder with a list in either 'then' - or 'else_'. - """ - - # the illegal state - if isinstance(self.then, list) or isinstance(self.else_, list): - raise ValueError( - f'Cannot use {IfPresentPlaceholder.__name__} within {ConcatPlaceholder.__name__} when `then` and `else_` arguments to {IfPresentPlaceholder.__name__} are lists. Please use a single element for `then` and `else_` only.' - ) - - # check that there is no illegal state found recursively - if isinstance(self.then, ConcatPlaceholder): - for item in self.then.items: - if isinstance(item, IfPresentPlaceholder): - item._validate_then_and_else_are_only_single_element() - elif isinstance(self.then, IfPresentPlaceholder): - self.then._validate_then_and_else_are_only_single_element() - - if isinstance(self.else_, ConcatPlaceholder): - for item in self.else_.items: - if isinstance(item, IfPresentPlaceholder): - item._validate_then_and_else_are_only_single_element() - elif isinstance(self.else_, IfPresentPlaceholder): - self.else_._validate_then_and_else_are_only_single_element() - - def _to_dict(self) -> Dict[str, Any]: - struct = { - 'IfPresent': { - 'InputName': - self.input_name, - 'Then': [ - convert_command_line_element_to_string_or_struct(e) - for e in self.then - ] if isinstance(self.then, list) else - convert_command_line_element_to_string_or_struct( - self.then) - } - } - if self.else_: - struct['IfPresent']['Else'] = [ - convert_command_line_element_to_string_or_struct(e) - for e in self.else_ - ] if isinstance( - self.else_, - list) else convert_command_line_element_to_string_or_struct( - self.else_) - return struct - - def _to_string(self) -> str: - return json.dumps(self._to_dict()) - - def __str__(self) -> str: - raise ValueError( - f'Cannot use {self.__class__.__name__} in an f-string.') - - -_CONTAINER_PLACEHOLDERS = (IfPresentPlaceholder, ConcatPlaceholder) -PRIMITIVE_INPUT_PLACEHOLDERS = (InputValuePlaceholder, InputPathPlaceholder, - InputUriPlaceholder, InputMetadataPlaceholder, - InputListOfArtifactsPlaceholder) -PRIMITIVE_OUTPUT_PLACEHOLDERS = (OutputParameterPlaceholder, - OutputPathPlaceholder, OutputUriPlaceholder, - OutputMetadataPlaceholder, - OutputListOfArtifactsPlaceholder) - -CommandLineElement = Union[str, Placeholder] - - -def convert_command_line_element_to_string( - element: Union[str, Placeholder]) -> str: - return element._to_string() if isinstance(element, Placeholder) else element - - -def convert_command_line_element_to_string_or_struct( - element: Union[Placeholder, Any]) -> Any: - if isinstance(element, Placeholder): - return element._to_dict() if isinstance( - element, _CONTAINER_PLACEHOLDERS) else element._to_string() - - return element - - -def maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - arg: Dict[str, Any], - component_dict: Dict[str, Any]) -> Union[CommandLineElement, Any]: - if isinstance(arg, str): - return arg - - if not isinstance(arg, dict): - raise ValueError - - has_one_entry = len(arg) == 1 - - if not has_one_entry: - raise ValueError( - f'Got unexpected dictionary {arg}. Expected a dictionary with one entry.' - ) - - first_key = list(arg.keys())[0] - first_value = list(arg.values())[0] - if first_key == 'inputValue': - return InputValuePlaceholder( - input_name=utils.sanitize_input_name(first_value)) - - elif first_key == 'inputPath': - return InputPathPlaceholder( - input_name=utils.sanitize_input_name(first_value)) - - elif first_key == 'inputUri': - return InputUriPlaceholder( - input_name=utils.sanitize_input_name(first_value)) - - elif first_key == 'outputPath': - outputs = component_dict['outputs'] - for output in outputs: - if output['name'] == first_value: - type_ = output.get('type') - is_parameter = type_utils.is_parameter_type(type_) - if is_parameter: - return OutputParameterPlaceholder( - output_name=utils.sanitize_input_name(first_value)) - else: - return OutputPathPlaceholder( - output_name=utils.sanitize_input_name(first_value)) - raise ValueError( - f'{first_value} not found in component outputs. Could not process placeholders. Component spec: {component_dict}.' - ) - - elif first_key == 'outputUri': - return OutputUriPlaceholder( - output_name=utils.sanitize_input_name(first_value)) - - elif first_key == 'ifPresent': - structure_kwargs = arg['ifPresent'] - structure_kwargs['input_name'] = structure_kwargs.pop('inputName') - structure_kwargs['otherwise'] = structure_kwargs.pop('else') - structure_kwargs['then'] = [ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - e, component_dict=component_dict) - for e in structure_kwargs['then'] - ] - structure_kwargs['otherwise'] = [ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - e, component_dict=component_dict) - for e in structure_kwargs['otherwise'] - ] - return IfPresentPlaceholder(**structure_kwargs) - - elif first_key == 'concat': - return ConcatPlaceholder(items=[ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - e, component_dict=component_dict) for e in arg['concat'] - ]) - - elif first_key == 'executorInput': - return ExecutorInputPlaceholder() - - elif 'if' in arg: - if_ = arg['if'] - input_name = utils.sanitize_input_name(if_['cond']['isPresent']) - then = if_['then'] - else_ = if_.get('else') - - if isinstance(then, list): - then = [ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - val, component_dict=component_dict) for val in then - ] - else: - then = maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - then, component_dict=component_dict) - - if else_ is None: - pass - elif isinstance(else_, list): - else_ = [ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - val, component_dict=component_dict) for val in else_ - ] - else: - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - else_, component_dict=component_dict) - - return IfPresentPlaceholder( - input_name=input_name, then=then, else_=else_) - - elif 'concat' in arg: - - return ConcatPlaceholder(items=[ - maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - val, component_dict=component_dict) for val in arg['concat'] - ]) - else: - raise TypeError(f'Unexpected argument {arg} of type {type(arg)}.') diff --git a/sdk/python/kfp-dsl/kfp/dsl/python_component.py b/sdk/python/kfp-dsl/kfp/dsl/python_component.py deleted file mode 100644 index faa4c44740..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/python_component.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Python function-based component.""" - -from typing import Callable - -from kfp.dsl import base_component -from kfp.dsl import structures - - -class PythonComponent(base_component.BaseComponent): - """A component defined via Python function. - - **Note:** ``PythonComponent`` is not intended to be used to construct components directly. Use ``@kfp.dsl.component`` instead. - - Args: - component_spec: Component definition. - python_func: Python function that becomes the implementation of this component. - """ - - def __init__( - self, - component_spec: structures.ComponentSpec, - python_func: Callable, - ): - super().__init__(component_spec=component_spec) - self.python_func = python_func - - self._prevent_using_output_lists_of_artifacts() - - def execute(self, **kwargs): - """Executes the Python function that defines the component.""" - return self.python_func(**kwargs) diff --git a/sdk/python/kfp-dsl/kfp/dsl/structures.py b/sdk/python/kfp-dsl/kfp/dsl/structures.py deleted file mode 100644 index f3a379ea39..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/structures.py +++ /dev/null @@ -1,1049 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definitions for component spec.""" - -import ast -import collections -import dataclasses -import itertools -import re -from typing import Any, Dict, List, Mapping, Optional, Union -import uuid - -from kfp import dsl -from kfp.dsl import placeholders -from kfp.dsl import utils -from kfp.dsl import v1_structures -from kfp.dsl.container_component_artifact_channel import \ - ContainerComponentArtifactChannel -from kfp.dsl.types import artifact_types -from kfp.dsl.types import type_annotations -from kfp.dsl.types import type_utils - - -@dataclasses.dataclass -class InputSpec: - """Component input definitions. - - Attributes: - type: The type of the input. - default (optional): the default value for the input. - optional: Wether the input is optional. An input is optional when it has an explicit default value. - is_artifact_list: True if `type` represents a list of the artifact type. Only applies when `type` is an artifact. - description: Input description. - """ - type: Union[str, dict] - default: Optional[Any] = None - optional: bool = False - # This special flag for lists of artifacts allows type to be used the same way for list of artifacts and single artifacts. This is aligned with how IR represents lists of artifacts (same as for single artifacts), as well as simplifies downstream type handling/checking operations in the SDK since we don't need to parse the string `type` to determine if single artifact or list. - is_artifact_list: bool = False - description: Optional[str] = None - - def __post_init__(self) -> None: - self._validate_type() - self._validate_usage_of_optional() - - @classmethod - def from_ir_component_inputs_dict( - cls, ir_component_inputs_dict: Dict[str, Any]) -> 'InputSpec': - """Creates an InputSpec from a ComponentInputsSpec message in dict - format (pipeline_spec.components..inputDefinitions.parameters.). - - Args: - ir_component_inputs_dict (Dict[str, Any]): The ComponentInputsSpec - message in dict format. - - Returns: - InputSpec: The InputSpec object. - """ - if 'parameterType' in ir_component_inputs_dict: - type_string = ir_component_inputs_dict['parameterType'] - type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string) - if type_ is None: - raise ValueError(f'Unknown type {type_string} found in IR.') - default_value = ir_component_inputs_dict.get('defaultValue') - # fallback to checking if the parameter has a default value, - # since some IR compiled with kfp<=2.0.0b8 will have defaults - # without isOptional=True - optional = ir_component_inputs_dict.get( - 'isOptional', 'defaultValue' in ir_component_inputs_dict) - return InputSpec( - type=type_, default=default_value, optional=optional) - - else: - type_ = ir_component_inputs_dict['artifactType']['schemaTitle'] - schema_version = ir_component_inputs_dict['artifactType'][ - 'schemaVersion'] - # TODO: would be better to extract these fields from the proto - # message, as False default would be preserved - optional = ir_component_inputs_dict.get('isOptional', False) - is_artifact_list = ir_component_inputs_dict.get( - 'isArtifactList', False) - return InputSpec( - type=type_utils.create_bundled_artifact_type( - type_, schema_version), - optional=optional, - is_artifact_list=is_artifact_list) - - def __eq__(self, other: Any) -> bool: - """Equality comparison for InputSpec. Robust to different type - representations, such that it respects the maximum amount of - information possible to encode in IR. That is, because - `typing.List[str]` can only be represented a `List` in IR, - 'typing.List' == 'List' in this comparison. - - Args: - other (Any): The object to compare to InputSpec. - - Returns: - bool: True if the objects are equal, False otherwise. - """ - if isinstance(other, InputSpec): - return type_utils.get_canonical_name_for_outer_generic( - self.type) == type_utils.get_canonical_name_for_outer_generic( - other.type) and self.default == other.default - else: - return False - - def _validate_type(self) -> None: - """Type should either be a parameter or a valid bundled artifact type - by the time it gets to InputSpec. - - This allows us to perform fewer checks downstream. - """ - # TODO: add transformation logic so that we don't have to transform inputs at every place they are used, including v1 back compat support - if not spec_type_is_parameter(self.type): - type_utils.validate_bundled_artifact_type(self.type) - - def _validate_usage_of_optional(self) -> None: - """Validates that the optional and default properties are in consistent - states.""" - # Because None can be the default value, None cannot be used to to indicate no default. This is why we need the optional field. This check prevents users of InputSpec from setting these two values to an inconsistent state, forcing users of InputSpec to be explicit about optionality. - if self.optional is False and self.default is not None: - raise ValueError( - f'`optional` argument to {self.__class__.__name__} must be True if `default` is not None.' - ) - - -@dataclasses.dataclass -class OutputSpec: - """Component output definitions. - - Attributes: - type: The type of the output. - is_artifact_list: True if `type` represents a list of the artifact type. Only applies when `type` is an artifact. - description: Output description. - """ - type: Union[str, dict] - # This special flag for lists of artifacts allows type to be used the same way for list of artifacts and single artifacts. This is aligned with how IR represents lists of artifacts (same as for single artifacts), as well as simplifies downstream type handling/checking operations in the SDK since we don't need to parse the string `type` to determine if single artifact or list. - is_artifact_list: bool = False - description: Optional[str] = None - - def __post_init__(self) -> None: - self._validate_type() - - @classmethod - def from_ir_component_outputs_dict( - cls, ir_component_outputs_dict: Dict[str, Any]) -> 'OutputSpec': - """Creates an OutputSpec from a ComponentOutputsSpec message in dict - format (pipeline_spec.components..outputDefinitions.parameters|artifacts.). - - Args: - ir_component_outputs_dict (Dict[str, Any]): The ComponentOutputsSpec - in dict format. - - Returns: - OutputSpec: The OutputSpec object. - """ - if 'parameterType' in ir_component_outputs_dict: - type_string = ir_component_outputs_dict['parameterType'] - type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string) - if type_ is None: - raise ValueError(f'Unknown type {type_string} found in IR.') - return OutputSpec(type=type_,) - else: - type_ = ir_component_outputs_dict['artifactType']['schemaTitle'] - schema_version = ir_component_outputs_dict['artifactType'][ - 'schemaVersion'] - is_artifact_list = ir_component_outputs_dict.get( - 'isArtifactList', False) - return OutputSpec( - type=type_utils.create_bundled_artifact_type( - type_, schema_version), - is_artifact_list=is_artifact_list) - - def __eq__(self, other: Any) -> bool: - """Equality comparison for OutputSpec. Robust to different type - representations, such that it respects the maximum amount of - information possible to encode in IR. That is, because - `typing.List[str]` can only be represented a `List` in IR, - 'typing.List' == 'List' in this comparison. - - Args: - other (Any): The object to compare to OutputSpec. - - Returns: - bool: True if the objects are equal, False otherwise. - """ - if isinstance(other, OutputSpec): - return type_utils.get_canonical_name_for_outer_generic( - self.type) == type_utils.get_canonical_name_for_outer_generic( - other.type) - else: - return False - - def _validate_type(self): - """Type should either be a parameter or a valid bundled artifact type - by the time it gets to OutputSpec. - - This allows us to perform fewer checks downstream. - """ - # TODO: add transformation logic so that we don't have to transform outputs at every place they are used, including v1 back compat support - if not spec_type_is_parameter(self.type): - type_utils.validate_bundled_artifact_type(self.type) - - -def spec_type_is_parameter(type_: str) -> bool: - in_memory_type = type_annotations.maybe_strip_optional_from_annotation_string( - type_utils.get_canonical_name_for_outer_generic(type_)) - - return in_memory_type in type_utils.IN_MEMORY_SPEC_TYPE_TO_IR_TYPE or in_memory_type == 'PipelineTaskFinalStatus' - - -@dataclasses.dataclass -class ResourceSpec: - """The resource requirements of a container execution. - - Attributes: - cpu_request (optional): the requirement of the number of vCPU cores. - cpu_limit (optional): the limit of the number of vCPU cores. - memory_request (optional): the memory requirement in GB. - memory_limit (optional): the memory limit in GB. - accelerator_type (optional): the type of accelerators attached to the - container. - accelerator_count (optional): the number of accelerators attached. - """ - cpu_request: Optional[float] = None - cpu_limit: Optional[float] = None - memory_request: Optional[float] = None - memory_limit: Optional[float] = None - accelerator_type: Optional[str] = None - accelerator_count: Optional[int] = None - - -@dataclasses.dataclass -class ContainerSpec: - """Container definition. - - This is only used for pipeline authors when constructing a containerized component - using @container_component decorator. - - Examples: - :: - - @container_component - def container_with_artifact_output( - num_epochs: int, # built-in types are parsed as inputs - model: Output[Model], - model_config_path: OutputPath(str), - ): - return ContainerSpec( - image='gcr.io/my-image', - command=['sh', 'run.sh'], - args=[ - '--epochs', - num_epochs, - '--model_path', - model.uri, - '--model_config_path', - model_config_path, - ]) - """ - image: str - """Container image.""" - - command: Optional[List[placeholders.CommandLineElement]] = None - """Container entrypoint.""" - - args: Optional[List[placeholders.CommandLineElement]] = None - """Arguments to the container entrypoint.""" - - -@dataclasses.dataclass -class ContainerSpecImplementation: - """Container implementation definition.""" - image: str - """Container image.""" - - command: Optional[List[placeholders.CommandLineElement]] = None - """Container entrypoint.""" - - args: Optional[List[placeholders.CommandLineElement]] = None - """Arguments to the container entrypoint.""" - - env: Optional[Mapping[str, placeholders.CommandLineElement]] = None - """Environment variables to be passed to the container.""" - - resources: Optional[ResourceSpec] = None - """Specification on the resource requirements.""" - - def __post_init__(self) -> None: - self._transform_command() - self._transform_args() - self._transform_env() - - def _transform_command(self) -> None: - """Use None instead of empty list for command.""" - self.command = None if self.command == [] else self.command - - def _transform_args(self) -> None: - """Use None instead of empty list for args.""" - self.args = None if self.args == [] else self.args - - def _transform_env(self) -> None: - """Use None instead of empty dict for env.""" - self.env = None if self.env == {} else self.env - - @classmethod - def from_container_spec( - cls, - container_spec: ContainerSpec) -> 'ContainerSpecImplementation': - return ContainerSpecImplementation( - image=container_spec.image, - command=container_spec.command, - args=container_spec.args, - env=None, - resources=None) - - @classmethod - def from_container_dict( - cls, container_dict: Dict[str, - Any]) -> 'ContainerSpecImplementation': - """Creates a ContainerSpecImplementation from a PipelineContainerSpec - message in dict format - (pipeline_spec.deploymentSpec.executors..container). - - Args: - container_dict (Dict[str, Any]): PipelineContainerSpec message in dict format. - - Returns: - ContainerSpecImplementation: The ContainerSpecImplementation instance. - """ - - return ContainerSpecImplementation( - image=container_dict['image'], - command=container_dict.get('command'), - args=container_dict.get('args'), - env=container_dict.get('env'), - resources=None) # can only be set on tasks - - -@dataclasses.dataclass -class RetryPolicy: - """The retry policy of a container execution. - - Attributes: - num_retries (int): Number of times to retry on failure. - backoff_duration (int): The the number of seconds to wait before triggering a retry. - backoff_factor (float): The exponential backoff factor applied to backoff_duration. For example, if backoff_duration="60" (60 seconds) and backoff_factor=2, the first retry will happen after 60 seconds, then after 120, 240, and so on. - backoff_max_duration (int): The maximum duration during which the task will be retried. - """ - max_retry_count: Optional[int] = None - backoff_duration: Optional[str] = None - backoff_factor: Optional[float] = None - backoff_max_duration: Optional[str] = None - - def to_proto(self) -> 'pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy': - # include defaults so that IR is more reflective of runtime behavior - max_retry_count = self.max_retry_count or 0 - backoff_duration = self.backoff_duration or '0s' - backoff_factor = self.backoff_factor or 2.0 - backoff_max_duration = self.backoff_max_duration or '3600s' - - # include max duration seconds cap so that IR is more reflective of runtime behavior - backoff_duration_seconds = f'{convert_duration_to_seconds(backoff_duration)}s' - backoff_max_duration_seconds = f'{min(convert_duration_to_seconds(backoff_max_duration), 3600)}s' - - try: - from google.protobuf import json_format - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - return json_format.ParseDict( - { - 'max_retry_count': max_retry_count, - 'backoff_duration': backoff_duration_seconds, - 'backoff_factor': backoff_factor, - 'backoff_max_duration': backoff_max_duration_seconds, - }, pipeline_spec_pb2.PipelineTaskSpec.RetryPolicy()) - - -@dataclasses.dataclass -class TaskSpec: - """The spec of a pipeline task. - - Attributes: - name: The name of the task. - inputs: The sources of task inputs. Constant values or PipelineParams. - dependent_tasks: The list of upstream tasks. - component_ref: The name of a component spec this task is based on. - trigger_condition (optional): an expression which will be evaluated into - a boolean value. True to trigger the task to run. - trigger_strategy (optional): when the task will be ready to be triggered. - Valid values include: "TRIGGER_STRATEGY_UNSPECIFIED", - "ALL_UPSTREAM_TASKS_SUCCEEDED", and "ALL_UPSTREAM_TASKS_COMPLETED". - iterator_items (optional): the items to iterate on. A constant value or - a PipelineParam. - iterator_item_input (optional): the name of the input which has the item - from the [items][] collection. - enable_caching (optional): whether or not to enable caching for the task. - Default is True. - display_name (optional): the display name of the task. If not specified, - the task name will be used as the display name. - """ - name: str - inputs: Mapping[str, Any] - dependent_tasks: List[str] - component_ref: str - trigger_condition: Optional[str] = None - trigger_strategy: Optional[str] = None - iterator_items: Optional[Any] = None - iterator_item_input: Optional[str] = None - enable_caching: bool = True - display_name: Optional[str] = None - retry_policy: Optional[RetryPolicy] = None - - -@dataclasses.dataclass -class ImporterSpec: - """ImporterSpec definition. - - Attributes: - artifact_uri: The URI of the artifact. - schema_title: The schema_title of the artifact. - schema_version: The schema_version of the artifact. - reimport: Whether or not import an artifact regardless it has been - imported before. - metadata (optional): the properties of the artifact. - """ - artifact_uri: str - schema_title: str - schema_version: str - reimport: bool - metadata: Optional[Mapping[str, Any]] = None - - -@dataclasses.dataclass -class Implementation: - """Implementation definition. - - Attributes: - container: container implementation details. - graph: graph implementation details. - importer: importer implementation details. - """ - container: Optional[ContainerSpecImplementation] = None - importer: Optional[ImporterSpec] = None - # Use type forward reference to skip the type validation in BaseModel. - graph: Optional['pipeline_spec_pb2.PipelineSpec'] = None - - @classmethod - def from_pipeline_spec_dict(cls, pipeline_spec_dict: Dict[str, Any], - component_name: str) -> 'Implementation': - """Creates an Implementation object from a PipelineSpec message in dict - format. - - Args: - pipeline_spec_dict (Dict[str, Any]): PipelineSpec message in dict format. - component_name (str): The name of the component. - - Returns: - Implementation: An implementation object. - """ - executor_key = utils.sanitize_executor_label(component_name) - executor = pipeline_spec_dict['deploymentSpec']['executors'].get( - executor_key) - if executor is not None: - container_spec = ContainerSpecImplementation.from_container_dict( - executor['container']) if executor else None - return Implementation(container=container_spec) - else: - - try: - from google.protobuf import json_format - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - pipeline_spec = json_format.ParseDict( - pipeline_spec_dict, pipeline_spec_pb2.PipelineSpec()) - return Implementation(graph=pipeline_spec) - - -def check_placeholder_references_valid_io_name( - inputs_dict: Dict[str, InputSpec], - outputs_dict: Dict[str, OutputSpec], - arg: placeholders.CommandLineElement, -) -> None: - """Validates input/output placeholders refer to an existing input/output. - - Args: - valid_inputs: The existing input names. - valid_outputs: The existing output names. - arg: The placeholder argument for checking. - - Raises: - ValueError: if any placeholder references a nonexistant input or - output. - TypeError: if any argument is neither a str nor a placeholder - instance. - """ - if isinstance(arg, ContainerComponentArtifactChannel): - raise ValueError( - 'Cannot access artifact by itself in the container definition. Please use .uri or .path instead to access the artifact.' - ) - elif isinstance(arg, placeholders.PRIMITIVE_INPUT_PLACEHOLDERS): - if arg.input_name not in inputs_dict: - raise ValueError( - f'Argument "{arg.__class__.__name__}" references nonexistant input: "{arg.input_name}".' - ) - elif isinstance(arg, placeholders.PRIMITIVE_OUTPUT_PLACEHOLDERS): - if arg.output_name not in outputs_dict: - raise ValueError( - f'Argument "{arg.__class__.__name__}" references nonexistant output: "{arg.output_name}".' - ) - elif isinstance(arg, placeholders.IfPresentPlaceholder): - if arg.input_name not in inputs_dict: - raise ValueError( - f'Argument "{arg.__class__.__name__}" references nonexistant input: "{arg.input_name}".' - ) - - all_normalized_args: List[placeholders.CommandLineElement] = [] - if arg.then is None: - pass - elif isinstance(arg.then, list): - all_normalized_args.extend(arg.then) - else: - all_normalized_args.append(arg.then) - - if arg.else_ is None: - pass - elif isinstance(arg.else_, list): - all_normalized_args.extend(arg.else_) - else: - all_normalized_args.append(arg.else_) - - for arg in all_normalized_args: - check_placeholder_references_valid_io_name(inputs_dict, - outputs_dict, arg) - elif isinstance(arg, placeholders.ConcatPlaceholder): - for arg in arg.items: - check_placeholder_references_valid_io_name(inputs_dict, - outputs_dict, arg) - elif not isinstance( - arg, placeholders.ExecutorInputPlaceholder) and not isinstance( - arg, str): - raise TypeError(f'Unexpected argument "{arg}" of type {type(arg)}.') - - -def _import_and_make_platform_spec() -> 'pipeline_spec_pb2.PlatformSpec': - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - return pipeline_spec_pb2.PlatformSpec() - - -@dataclasses.dataclass -class ComponentSpec: - """The definition of a component. - - Attributes: - name: The name of the component. - description (optional): the description of the component. - inputs (optional): the input definitions of the component. - outputs (optional): the output definitions of the component. - implementation: The implementation of the component. Either an executor - (container, importer) or a DAG consists of other components. - """ - name: str - implementation: Implementation - description: Optional[str] = None - inputs: Optional[Dict[str, InputSpec]] = None - outputs: Optional[Dict[str, OutputSpec]] = None - platform_spec: Optional[ - 'pipeline_spec_pb2.PlatformSpec'] = dataclasses.field( - default_factory=_import_and_make_platform_spec) - - def __post_init__(self) -> None: - self._transform_name() - self._transform_inputs() - self._transform_outputs() - self._validate_placeholders() - - def _transform_name(self) -> None: - """Converts the name to a valid name.""" - self.name = utils.maybe_rename_for_k8s(self.name) - - def _transform_inputs(self) -> None: - """Use None instead of empty list for inputs.""" - self.inputs = None if self.inputs == {} else self.inputs - - def _transform_outputs(self) -> None: - """Use None instead of empty list for outputs.""" - self.outputs = None if self.outputs == {} else self.outputs - - def _validate_placeholders(self): - """Validates that input/output placeholders refer to an existing - input/output.""" - if self.implementation.container is None: - return - - valid_inputs = {} if self.inputs is None else self.inputs - valid_outputs = {} if self.outputs is None else self.outputs - for arg in itertools.chain( - (self.implementation.container.command or []), - (self.implementation.container.args or [])): - check_placeholder_references_valid_io_name(valid_inputs, - valid_outputs, arg) - - @classmethod - def from_v1_component_spec( - cls, - v1_component_spec: v1_structures.ComponentSpec) -> 'ComponentSpec': - """Converts V1 ComponentSpec to V2 ComponentSpec. - - Args: - v1_component_spec: The V1 ComponentSpec. - - Returns: - Component spec in the form of V2 ComponentSpec. - - Raises: - ValueError: If implementation is not found. - TypeError: If any argument is neither a str nor Dict. - """ - component_dict = v1_component_spec.to_dict() - if component_dict.get('implementation') is None: - raise ValueError('Implementation field not found') - - if 'implementation' not in component_dict or 'container' not in component_dict[ - 'implementation']: - raise NotImplementedError('Container implementation not found.') - - container = component_dict['implementation']['container'] - command = [ - placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - command, component_dict=component_dict) - for command in container.get('command', []) - ] - args = [ - placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - command, component_dict=component_dict) - for command in container.get('args', []) - ] - env = { - key: - placeholders.maybe_convert_v1_yaml_placeholder_to_v2_placeholder( - command, component_dict=component_dict) - for key, command in container.get('env', {}).items() - } - container_spec = ContainerSpecImplementation.from_container_dict({ - 'image': container['image'], - 'command': command, - 'args': args, - 'env': env - }) - - inputs = {} - - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - for spec in component_dict.get('inputs', []): - type_ = spec.get('type') - optional = spec.get('optional', False) or 'default' in spec - default = spec.get('default') - default = type_utils.deserialize_v1_component_yaml_default( - type_=type_, default=default) - - if isinstance(type_, str): - type_ = type_utils.get_canonical_name_for_outer_generic(type_) - - if isinstance(type_, str) and type_ == 'PipelineTaskFinalStatus': - inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( - type=type_, optional=True) - continue - - elif isinstance(type_, str) and type_.lower( - ) in type_utils.PARAMETER_TYPES_MAPPING: - type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] - ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( - type_enum) - in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ - ir_parameter_type_name] - inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( - type=in_memory_parameter_type_name, - default=default, - optional=optional, - ) - continue - - elif isinstance(type_, str) and re.match( - type_utils._GOOGLE_TYPES_PATTERN, type_): - schema_title = type_ - schema_version = type_utils._GOOGLE_TYPES_VERSION - - elif isinstance(type_, str) and type_.lower( - ) in type_utils._ARTIFACT_CLASSES_MAPPING: - artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[ - type_.lower()] - schema_title = artifact_class.schema_title - schema_version = artifact_class.schema_version - - elif type_ is None or isinstance(type_, dict) or type_.lower( - ) not in type_utils._ARTIFACT_CLASSES_MAPPING: - schema_title = artifact_types.Artifact.schema_title - schema_version = artifact_types.Artifact.schema_version - - else: - raise ValueError(f'Unknown input: {type_}') - - if optional: - # handles optional artifacts with no default value - inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( - type=type_utils.create_bundled_artifact_type( - schema_title, schema_version), - default=default, - optional=optional, - ) - else: - inputs[utils.sanitize_input_name(spec['name'])] = InputSpec( - type=type_utils.create_bundled_artifact_type( - schema_title, schema_version)) - - outputs = {} - for spec in component_dict.get('outputs', []): - type_ = spec.get('type') - if isinstance(type_, str): - type_ = type_utils.get_canonical_name_for_outer_generic(type_) - - if isinstance(type_, str) and type_.lower( - ) in type_utils.PARAMETER_TYPES_MAPPING: - type_enum = type_utils.PARAMETER_TYPES_MAPPING[type_.lower()] - ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( - type_enum) - in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[ - ir_parameter_type_name] - outputs[utils.sanitize_input_name(spec['name'])] = OutputSpec( - type=in_memory_parameter_type_name) - continue - - elif isinstance(type_, str) and re.match( - type_utils._GOOGLE_TYPES_PATTERN, type_): - schema_title = type_ - schema_version = type_utils._GOOGLE_TYPES_VERSION - - elif isinstance(type_, str) and type_.lower( - ) in type_utils._ARTIFACT_CLASSES_MAPPING: - artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[ - type_.lower()] - schema_title = artifact_class.schema_title - schema_version = artifact_class.schema_version - - elif type_ is None or isinstance(type_, dict) or type_.lower( - ) not in type_utils._ARTIFACT_CLASSES_MAPPING: - schema_title = artifact_types.Artifact.schema_title - schema_version = artifact_types.Artifact.schema_version - - else: - raise ValueError(f'Unknown output: {type_}') - - outputs[utils.sanitize_input_name(spec['name'])] = OutputSpec( - type=type_utils.create_bundled_artifact_type( - schema_title, schema_version)) - - return ComponentSpec( - name=component_dict.get('name', 'name'), - description=component_dict.get('description'), - implementation=Implementation(container=container_spec), - inputs=inputs, - outputs=outputs, - ) - - @classmethod - def from_ir_dicts( - cls, - pipeline_spec_dict: dict, - platform_spec_dict: dict, - ) -> 'ComponentSpec': - """Creates a ComponentSpec from the PipelineSpec and PlatformSpec - messages as dicts.""" - raw_name = pipeline_spec_dict['pipelineInfo']['name'] - - def inputs_dict_from_component_spec_dict( - component_spec_dict: Dict[str, Any]) -> Dict[str, InputSpec]: - parameters = component_spec_dict.get('inputDefinitions', - {}).get('parameters', {}) - artifacts = component_spec_dict.get('inputDefinitions', - {}).get('artifacts', {}) - all_inputs = {**parameters, **artifacts} - return { - name: InputSpec.from_ir_component_inputs_dict(input_dict) - for name, input_dict in all_inputs.items() - } - - def outputs_dict_from_component_spec_dict( - components_spec_dict: Dict[str, Any]) -> Dict[str, OutputSpec]: - parameters = component_spec_dict.get('outputDefinitions', - {}).get('parameters', {}) - artifacts = components_spec_dict.get('outputDefinitions', - {}).get('artifacts', {}) - all_outputs = {**parameters, **artifacts} - return { - name: OutputSpec.from_ir_component_outputs_dict(output_dict) - for name, output_dict in all_outputs.items() - } - - def extract_description_from_command( - commands: List[str]) -> Union[str, None]: - for command in commands: - if isinstance(command, str) and 'import kfp' in command: - for node in ast.walk(ast.parse(command)): - if isinstance( - node, - (ast.FunctionDef, ast.ClassDef, ast.Module)): - docstring = ast.get_docstring(node) - if docstring: - return docstring - return None - - component_key = utils.sanitize_component_name(raw_name) - component_spec_dict = pipeline_spec_dict['components'].get( - component_key, pipeline_spec_dict['root']) - - inputs = inputs_dict_from_component_spec_dict(component_spec_dict) - outputs = outputs_dict_from_component_spec_dict(component_spec_dict) - - implementation = Implementation.from_pipeline_spec_dict( - pipeline_spec_dict, raw_name) - - description = extract_description_from_command( - implementation.container.command or - []) if implementation.container else None - - try: - from google.protobuf import json_format - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - platform_spec = pipeline_spec_pb2.PlatformSpec() - json_format.ParseDict(platform_spec_dict, platform_spec) - - return ComponentSpec( - name=raw_name, - implementation=implementation, - description=description, - inputs=inputs, - outputs=outputs, - platform_spec=platform_spec, - ) - - def save_to_component_yaml(self, output_file: str) -> None: - """Saves ComponentSpec into IR YAML file. - - Args: - output_file: File path to store the component yaml. - """ - from kfp.compiler import pipeline_spec_builder as builder - - pipeline_spec = self.to_pipeline_spec() - - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - builder.write_pipeline_spec_to_file( - pipeline_spec, - None, - pipeline_spec_pb2.PlatformSpec(), - output_file, - ) - - def to_pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': - """Creates a pipeline instance and constructs the pipeline spec for a - single component. - - Args: - component_spec: The ComponentSpec to convert to PipelineSpec. - - Returns: - A PipelineSpec proto representing the compiled component. - """ - # import here to aviod circular module dependency - from kfp.compiler import compiler_utils - from kfp.compiler import pipeline_spec_builder as builder - from kfp.dsl import pipeline_channel - from kfp.dsl import pipeline_task - from kfp.dsl import tasks_group - - args_dict = {} - pipeline_inputs = self.inputs or {} - - for arg_name, input_spec in pipeline_inputs.items(): - args_dict[arg_name] = pipeline_channel.create_pipeline_channel( - name=arg_name, - channel_type=input_spec.type, - is_artifact_list=input_spec.is_artifact_list) - - task = pipeline_task.PipelineTask(self, args_dict) - - # instead of constructing a pipeline with pipeline_context.Pipeline, - # just build the single task group - group = tasks_group.TasksGroup( - group_type=tasks_group.TasksGroupType.PIPELINE) - group.tasks.append(task) - - group.name = uuid.uuid4().hex - - pipeline_name = self.name - task_group = group - - pipeline_outputs = {} - pipeline_output_spec = self.outputs or {} - - for arg_name, output_spec in pipeline_output_spec.items(): - pipeline_outputs[ - arg_name] = pipeline_channel.create_pipeline_channel( - name=arg_name, - channel_type=output_spec.type, - task_name=task.name) - - utils.validate_pipeline_name(pipeline_name) - - try: - import kfp - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - pipeline_spec = pipeline_spec_pb2.PipelineSpec() - pipeline_spec.pipeline_info.name = pipeline_name - pipeline_spec.sdk_version = f'kfp-{kfp.__version__}' - # Schema version 2.1.0 is required for kfp-pipeline-spec>0.1.13 - pipeline_spec.schema_version = '2.1.0' - - # if we decide to surface component outputs to pipeline level, - # can just assign the component_spec_proto directly to .root - component_spec_proto = builder._build_component_spec_from_component_spec_structure( - self) - pipeline_spec.root.CopyFrom(component_spec_proto) - - builder._build_dag_outputs( - component_spec=pipeline_spec.root, dag_outputs=pipeline_outputs) - - deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() - root_group = task_group - - task_name_to_parent_groups, group_name_to_parent_groups = compiler_utils.get_parent_groups( - root_group) - - def get_inputs(task_group: tasks_group.TasksGroup, - task_name_to_parent_groups): - inputs = collections.defaultdict(set) - if len(task_group.tasks) != 1: - raise ValueError( - f'Error compiling component. Expected one task in task group, got {len(task_group.tasks)}.' - ) - only_task = task_group.tasks[0] - if only_task.channel_inputs: - for group_name in task_name_to_parent_groups[only_task.name]: - inputs[group_name].add((only_task.channel_inputs[-1], None)) - return inputs - - inputs = get_inputs(task_group, task_name_to_parent_groups) - - builder.build_spec_by_group( - pipeline_spec=pipeline_spec, - deployment_config=deployment_config, - group=root_group, - inputs=inputs, - outputs=collections.defaultdict( - dict), # empty -- no sub-DAG outputs to surface - dependencies={}, # no dependencies for single-component pipeline - rootgroup_name=root_group.name, - task_name_to_parent_groups=task_name_to_parent_groups, - group_name_to_parent_groups=group_name_to_parent_groups, - name_to_for_loop_group={}, # no for loop in single-component pipeline - platform_spec=pipeline_spec_pb2.PlatformSpec( - ), # no PlatformSpec single-component pipeline - is_compiled_component=True, - ) - - return pipeline_spec - - -def normalize_time_string(duration: str) -> str: - """Normalizes a time string. - Examples: - - '1 hour' -> '1h' - - '2 hours' -> '2h' - - '2hours' -> '2h' - - '2 w' -> '2w' - - '2w' -> '2w' - Args: - duration (str): The unnormalized duration string. - Returns: - str: The normalized duration string. - """ - no_ws_duration = duration.replace(' ', '') - duration_split = [el for el in re.split(r'(\D+)', no_ws_duration) if el] - - if len(duration_split) != 2: - raise ValueError( - f"Invalid duration string: '{duration}'. Expected one value (as integer in string) and one unit, such as '1 hour'." - ) - - value = duration_split[0] - unit = duration_split[1] - - first_letter_of_unit = unit[0] - return value + first_letter_of_unit - - -def convert_duration_to_seconds(duration: str) -> int: - """Converts a duration string to seconds. - - Args: - duration (str): The unnormalized duration string. (e.g. '1h', '1 hour', '2 - hours', '2w', '2 weeks', '2d', etc.) - Raises: - ValueError: If the time unit is not one of seconds, minutes, hours, days, - or weeks. - Returns: - int: The number of seconds in the duration. - """ - duration = normalize_time_string(duration) - seconds_per_unit = {'s': 1, 'm': 60, 'h': 3_600, 'd': 86_400, 'w': 604_800} - if duration[-1] not in seconds_per_unit.keys(): - raise ValueError( - f"Unsupported duration unit: '{duration[-1]}' for '{duration}'.") - return int(duration[:-1]) * seconds_per_unit[duration[-1]] diff --git a/sdk/python/kfp-dsl/kfp/dsl/task_final_status.py b/sdk/python/kfp-dsl/kfp/dsl/task_final_status.py deleted file mode 100644 index bd2386d2d6..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/task_final_status.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definition for PipelineTaskFinalStatus.""" - -import dataclasses -from typing import Optional - - -@dataclasses.dataclass -class PipelineTaskFinalStatus: - """A final status of a pipeline task. Annotate a component parameter with - this class to obtain a handle to a task's status (see example). - - This is the Python representation of the proto message `PipelineTaskFinalStatus `_. - - Examples: - :: - - @dsl.component - def task_status(user_input: str, status: PipelineTaskFinalStatus): - print('Pipeline status: ', status.state) - print('Job resource name: ', status.pipeline_job_resource_name) - print('Pipeline task name: ', status.pipeline_task_name) - print('Error code: ', status.error_code) - print('Error message: ', status.error_message) - - @dsl.pipeline(name='my_pipeline') - def my_pipeline(): - task = task_status(user_input='my_input') - """ - state: str - """Final state of the task. The value could be one of ``'SUCCEEDED'``, ``'FAILED'`` or ``'CANCELLED'``.""" - - pipeline_job_resource_name: str - """Pipeline job resource name, in the format of ``projects/{project}/locations/{location}/pipelineJobs/{pipeline_job}``.""" - - pipeline_task_name: str - """Name of the task that produced this status.""" - - error_code: Optional[int] - """The `google.rpc.Code `_ in case of error. If state is ``'SUCCEEDED'``, this is ``None``.""" - - error_message: Optional[str] - """In case of error, the detailed error message. If state is ``'SUCCEEDED'``, this is ``None``.""" diff --git a/sdk/python/kfp-dsl/kfp/dsl/tasks_group.py b/sdk/python/kfp-dsl/kfp/dsl/tasks_group.py deleted file mode 100644 index 42d1446a9d..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/tasks_group.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definition for TasksGroup.""" - -import enum -from typing import Optional, Union - -from kfp.dsl import for_loop -from kfp.dsl import pipeline_channel -from kfp.dsl import pipeline_context -from kfp.dsl import pipeline_task - - -class TasksGroupType(str, enum.Enum): - """Types of TasksGroup.""" - PIPELINE = 'pipeline' - CONDITION = 'condition' - FOR_LOOP = 'for-loop' - EXIT_HANDLER = 'exit-handler' - - -class TasksGroup: - """Represents a logical group of tasks and groups of TasksGroups. - - This class is the base class for groups of tasks, such as tasks - sharing an exit handler, a condition branch, or a loop. This class - is not supposed to be used by pipeline authors. It is useful for - implementing a compiler. - - Attributes: - group_type: The type of the TasksGroup. - tasks: A list of all PipelineTasks in this group. - groups: A list of TasksGroups in this group. - display_name: The optional user given name of the group. - dependencies: A list of tasks or groups this group depends on. - is_root: If TasksGroup is root group. - """ - - def __init__( - self, - group_type: TasksGroupType, - name: Optional[str] = None, - is_root: bool = False, - ): - """Create a new instance of TasksGroup. - - Args: - group_type: The type of the group. - name: The name of the group. Used as display name in UI. - """ - self.group_type = group_type - self.tasks = [] - self.groups = [] - self.display_name = name - self.dependencies = [] - self.is_root = is_root - - def __enter__(self): - if not pipeline_context.Pipeline.get_default_pipeline(): - raise ValueError('Default pipeline not defined.') - - self._make_name_unique() - - pipeline_context.Pipeline.get_default_pipeline().push_tasks_group(self) - return self - - def __exit__(self, *unused_args): - pipeline_context.Pipeline.get_default_pipeline().pop_tasks_group() - - def _make_name_unique(self): - """Generates a unique TasksGroup name in the pipeline.""" - if not pipeline_context.Pipeline.get_default_pipeline(): - raise ValueError('Default pipeline not defined.') - - group_id = pipeline_context.Pipeline.get_default_pipeline( - ).get_next_group_id() - self.name = f'{self.group_type.value}-{group_id}' - self.name = self.name.replace('_', '-') - - def remove_task_recursive(self, task: pipeline_task.PipelineTask): - """Removes a task from the group recursively.""" - if self.tasks and task in self.tasks: - self.tasks.remove(task) - for group in self.groups or []: - group.remove_task_recursive(task) - - -class ExitHandler(TasksGroup): - """A class for setting an exit handler task that is invoked upon exiting a - group of other tasks. - - Args: - exit_task: The task that is invoked after exiting a group of other tasks. - name: The name of the exit handler group. - - Example: - :: - - exit_task = ExitComponent(...) - with ExitHandler(exit_task): - task1 = my_component1(...) - task2 = my_component2(...) - """ - - def __init__( - self, - exit_task: pipeline_task.PipelineTask, - name: Optional[str] = None, - ): - """Initializes a Condition task group.""" - super().__init__( - group_type=TasksGroupType.EXIT_HANDLER, - name=name, - is_root=False, - ) - - if exit_task.dependent_tasks: - raise ValueError('exit_task cannot depend on any other tasks.') - - # Removing exit_task form any group - pipeline_context.Pipeline.get_default_pipeline( - ).remove_task_from_groups(exit_task) - - # Set is_exit_handler since the compiler might be using this attribute. - exit_task.is_exit_handler = True - - self.exit_task = exit_task - - -class Condition(TasksGroup): - """A class for creating conditional control flow within a pipeline - definition. - - Args: - condition: A comparative expression that evaluates to True or False. At least one of the operands must be an output from an upstream task or a pipeline parameter. - name: The name of the condition group. - - Example: - :: - - task1 = my_component1(...) - with Condition(task1.output=='pizza', 'pizza-condition'): - task2 = my_component2(...) - """ - - def __init__( - self, - condition: pipeline_channel.ConditionOperator, - name: Optional[str] = None, - ): - """Initializes a conditional task group.""" - super().__init__( - group_type=TasksGroupType.CONDITION, - name=name, - is_root=False, - ) - self.condition = condition - - -class ParallelFor(TasksGroup): - """A class for creating parallelized for loop control flow over a static - set of items within a pipeline definition. - - Args: - items: The items to loop over. It can be either a constant Python list or a list output from an upstream task. - name: The name of the for loop group. - parallelism: The maximum number of concurrent iterations that can be scheduled for execution. A value of 0 represents unconstrained parallelism (default is unconstrained). - - Example: - :: - - with dsl.ParallelFor( - items=[{'a': 1, 'b': 10}, {'a': 2, 'b': 20}], - parallelism=1 - ) as item: - task1 = my_component(..., number=item.a) - task2 = my_component(..., number=item.b) - - In the example, the group of tasks containing ``task1`` and ``task2`` would - be executed twice, once with case ``args=[{'a': 1, 'b': 10}]`` and once with - case ``args=[{'a': 2, 'b': 20}]``. The ``parallelism=1`` setting causes only - 1 execution to be scheduled at a time. - """ - - def __init__( - self, - items: Union[for_loop.ItemList, pipeline_channel.PipelineChannel], - name: Optional[str] = None, - parallelism: Optional[int] = None, - ): - """Initializes a for loop task group.""" - parallelism = parallelism or 0 - if parallelism < 0: - raise ValueError( - f'ParallelFor parallelism must be >= 0. Got: {parallelism}.') - - super().__init__( - group_type=TasksGroupType.FOR_LOOP, - name=name, - is_root=False, - ) - - if isinstance(items, pipeline_channel.PipelineChannel): - self.loop_argument = for_loop.LoopArgument.from_pipeline_channel( - items) - self.items_is_pipeline_channel = True - else: - self.loop_argument = for_loop.LoopArgument.from_raw_items( - raw_items=items, - name_code=pipeline_context.Pipeline.get_default_pipeline() - .get_next_group_id(), - ) - self.items_is_pipeline_channel = False - - self.parallelism_limit = parallelism - - def __enter__(self) -> for_loop.LoopArgument: - super().__enter__() - return self.loop_argument diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/__init__.py b/sdk/python/kfp-dsl/kfp/dsl/types/__init__.py deleted file mode 100644 index b4447dd583..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/types/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py b/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py deleted file mode 100644 index 2c6999c2d8..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/types/artifact_types.py +++ /dev/null @@ -1,472 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Classes for input/output Artifacts in KFP SDK.""" - -from typing import Dict, List, Optional, Type - -_GCS_LOCAL_MOUNT_PREFIX = '/gcs/' -_MINIO_LOCAL_MOUNT_PREFIX = '/minio/' -_S3_LOCAL_MOUNT_PREFIX = '/s3/' - - -class Artifact: - """Represents a generic machine learning artifact. - - This class and all artifact classes store the name, uri, and metadata for a machine learning artifact. Use this artifact type when an artifact does not fit into another more specific artifact type (e.g., ``Model``, ``Dataset``). - - Args: - name: Name of the artifact. - uri: The artifact's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the artifact. - - Example: - :: - - from kfp import dsl - from kfp.dsl import Output, Artifact, Input - - - @dsl.component - def create_artifact( - data: str, - output_artifact: Output[Artifact], - ): - with open(output_artifact.path, 'w') as f: - f.write(data) - - - @dsl.component - def use_artifact(input_artifact: Input[Artifact]): - with open(input_artifact.path) as input_file: - artifact_contents = input_file.read() - print(artifact_contents) - - - @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my/storage') - def my_pipeline(): - create_task = create_artifact(data='my data') - use_artifact(input_artifact=create_task.outputs['output_artifact']) - - Note: Other artifacts are used similarly to the usage of ``Artifact`` in the example above (within ``Input[]`` and ``Output[]``). - """ - schema_title = 'system.Artifact' - schema_version = '0.0.1' - - def __init__(self, - name: Optional[str] = None, - uri: Optional[str] = None, - metadata: Optional[Dict] = None) -> None: - """Initializes the Artifact with the given name, URI and metadata.""" - self.uri = uri or '' - self.name = name or '' - self.metadata = metadata or {} - - @property - def path(self) -> str: - return self._get_path() - - @path.setter - def path(self, path: str) -> None: - self._set_path(path) - - def _get_path(self) -> Optional[str]: - if self.uri.startswith('gs://'): - return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):] - elif self.uri.startswith('minio://'): - return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):] - elif self.uri.startswith('s3://'): - return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):] - return None - - def _set_path(self, path: str) -> None: - if path.startswith(_GCS_LOCAL_MOUNT_PREFIX): - path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):] - elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX): - path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):] - elif path.startswith(_S3_LOCAL_MOUNT_PREFIX): - path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):] - self.uri = path - - -class Model(Artifact): - """An artifact representing a machine learning model. - - Args: - name: Name of the model. - uri: The model's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the model. - """ - schema_title = 'system.Model' - - @property - def framework(self) -> str: - return self._get_framework() - - def _get_framework(self) -> str: - return self.metadata.get('framework', '') - - @framework.setter - def framework(self, framework: str) -> None: - self._set_framework(framework) - - def _set_framework(self, framework: str) -> None: - self.metadata['framework'] = framework - - -class Dataset(Artifact): - """An artifact representing a machine learning dataset. - - Args: - name: Name of the dataset. - uri: The dataset's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the dataset. - """ - schema_title = 'system.Dataset' - - -class Metrics(Artifact): - """An artifact for storing key-value scalar metrics. - - Args: - name: Name of the metrics artifact. - uri: The metrics artifact's location on disk or cloud storage. - metadata: Key-value scalar metrics. - """ - schema_title = 'system.Metrics' - - def log_metric(self, metric: str, value: float) -> None: - """Sets a custom scalar metric in the artifact's metadata. - - Args: - metric: The metric key. - value: The metric value. - """ - self.metadata[metric] = value - - -class ClassificationMetrics(Artifact): - """An artifact for storing classification metrics. - - Args: - name: Name of the metrics artifact. - uri: The metrics artifact's location on disk or cloud storage. - metadata: The key-value scalar metrics. - """ - schema_title = 'system.ClassificationMetrics' - - def log_roc_data_point(self, fpr: float, tpr: float, - threshold: float) -> None: - """Logs a single data point in the ROC curve to metadata. - - Args: - fpr: False positive rate value of the data point. - tpr: True positive rate value of the data point. - threshold: Threshold value for the data point. - """ - - roc_reading = { - 'confidenceThreshold': threshold, - 'recall': tpr, - 'falsePositiveRate': fpr - } - if 'confidenceMetrics' not in self.metadata.keys(): - self.metadata['confidenceMetrics'] = [] - - self.metadata['confidenceMetrics'].append(roc_reading) - - def log_roc_curve(self, fpr: List[float], tpr: List[float], - threshold: List[float]) -> None: - """Logs an ROC curve to metadata. - - Args: - fpr: List of false positive rate values. - tpr: List of true positive rate values. - threshold: List of threshold values. - - Raises: - ValueError: If the lists ``fpr``, ``tpr`` and ``threshold`` are not the same length. - """ - if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len( - tpr) != len(threshold): - raise ValueError( - f'Length of fpr, tpr and threshold must be the same. Got lengths {len(fpr)}, {len(tpr)} and {len(threshold)} respectively.' - ) - - for i in range(len(fpr)): - self.log_roc_data_point( - fpr=fpr[i], tpr=tpr[i], threshold=threshold[i]) - - def set_confusion_matrix_categories(self, categories: List[str]) -> None: - """Stores confusion matrix categories to metadata. - - Args: - categories: List of strings specifying the categories. - """ - - self._categories = [] - annotation_specs = [] - for category in categories: - annotation_spec = {'displayName': category} - self._categories.append(category) - annotation_specs.append(annotation_spec) - - self._matrix = [] - for row in range(len(self._categories)): - self._matrix.append({'row': [0] * len(self._categories)}) - - self._confusion_matrix = { - 'annotationSpecs': annotation_specs, - 'rows': self._matrix - } - - self.metadata['confusionMatrix'] = self._confusion_matrix - - def log_confusion_matrix_row(self, row_category: str, - row: List[float]) -> None: - """Logs a confusion matrix row to metadata. - - Args: - row_category: Category to which the row belongs. - row: List of integers specifying the values for the row. - - Raises: - ValueError: If ``row_category`` is not in the list of categories - set in ``set_categories`` call. - """ - if row_category not in self._categories: - raise ValueError( - f'Invalid category: {row_category} passed. Expected one of: {self._categories}' - ) - - if len(row) != len(self._categories): - raise ValueError( - f'Invalid row. Expected size: {len(self._categories)} got: {len(row)}' - ) - - self._matrix[self._categories.index(row_category)] = {'row': row} - self.metadata['confusionMatrix'] = self._confusion_matrix - - def log_confusion_matrix_cell(self, row_category: str, col_category: str, - value: int) -> None: - """Logs a cell in the confusion matrix to metadata. - - Args: - row_category: String representing the name of the row category. - col_category: String representing the name of the column category. - value: Value of the cell. - - Raises: - ValueError: If ``row_category`` or ``col_category`` is not in the list of - categories set in ``set_categories``. - """ - if row_category not in self._categories: - raise ValueError( - f'Invalid category: {row_category} passed. Expected one of: {self._categories}' - ) - - if col_category not in self._categories: - raise ValueError( - f'Invalid category: {row_category} passed. Expected one of: {self._categories}' - ) - - self._matrix[self._categories.index(row_category)]['row'][ - self._categories.index(col_category)] = value - self.metadata['confusionMatrix'] = self._confusion_matrix - - def log_confusion_matrix(self, categories: List[str], - matrix: List[List[int]]) -> None: - """Logs a confusion matrix to metadata. - - Args: - categories: List of the category names. - matrix: Complete confusion matrix. - - Raises: - ValueError: If the length of ``categories`` does not match number of rows or columns of ``matrix``. - """ - self.set_confusion_matrix_categories(categories) - - if len(matrix) != len(categories): - raise ValueError( - f'Invalid matrix: {matrix} passed for categories: {categories}') - - for index in range(len(categories)): - if len(matrix[index]) != len(categories): - raise ValueError( - f'Invalid matrix: {matrix} passed for categories: {categories}' - ) - - self.log_confusion_matrix_row(categories[index], matrix[index]) - - self.metadata['confusionMatrix'] = self._confusion_matrix - - -class SlicedClassificationMetrics(Artifact): - """An artifact for storing sliced classification metrics. - - Similar to ``ClassificationMetrics``, tasks using this class are - expected to use log methods of the class to log metrics with the - difference being each log method takes a slice to associate the - ``ClassificationMetrics``. - - Args: - name: Name of the metrics artifact. - uri: The metrics artifact's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the metrics artifact. - """ - - schema_title = 'system.SlicedClassificationMetrics' - - def _upsert_classification_metrics_for_slice(self, slice: str) -> None: - """Upserts the classification metrics instance for a slice.""" - if slice not in self._sliced_metrics: - self._sliced_metrics[slice] = ClassificationMetrics() - - def _update_metadata(self, slice: str) -> None: - """Updates metadata to adhere to the metrics schema.""" - self.metadata = {'evaluationSlices': []} - for slice in self._sliced_metrics.keys(): - slice_metrics = { - 'slice': - slice, - 'sliceClassificationMetrics': - self._sliced_metrics[slice].metadata - } - self.metadata['evaluationSlices'].append(slice_metrics) - - def log_roc_reading(self, slice: str, threshold: float, tpr: float, - fpr: float) -> None: - """Logs a single data point in the ROC curve of a slice to metadata. - - Args: - slice: String representing slice label. - threshold: Thresold value for the data point. - tpr: True positive rate value of the data point. - fpr: False positive rate value of the data point. - """ - - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr) - self._update_metadata(slice) - - def load_roc_readings(self, slice: str, - readings: List[List[float]]) -> None: - """Bulk loads ROC curve readings for a slice. - - Args: - slice: String representing slice label. - readings: A 2-dimensional list providing ROC curve data points. The expected order of the data points is: threshold, true positive rate, false positive rate. - """ - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].load_roc_readings(readings) - self._update_metadata(slice) - - def set_confusion_matrix_categories(self, slice: str, - categories: List[str]) -> None: - """Logs confusion matrix categories for a slice to metadata. - - Categories are stored in the internal ``metrics_utils.ConfusionMatrix`` - instance of the slice. - - Args: - slice: String representing slice label. - categories: List of strings specifying the categories. - """ - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].set_confusion_matrix_categories(categories) - self._update_metadata(slice) - - def log_confusion_matrix_row(self, slice: str, row_category: str, - row: List[int]) -> None: - """Logs a confusion matrix row for a slice to metadata. - - Row is updated on the internal ``metrics_utils.ConfusionMatrix`` - instance of the slice. - - Args: - slice: String representing slice label. - row_category: Category to which the row belongs. - row: List of integers specifying the values for the row. - """ - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row) - self._update_metadata(slice) - - def log_confusion_matrix_cell(self, slice: str, row_category: str, - col_category: str, value: int) -> None: - """Logs a confusion matrix cell for a slice to metadata. - - Cell is updated on the internal ``metrics_utils.ConfusionMatrix`` - instance of the slice. - - Args: - slice: String representing slice label. - row_category: String representing the name of the row category. - col_category: String representing the name of the column category. - value: Value of the cell. - """ - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].log_confusion_matrix_cell( - row_category, col_category, value) - self._update_metadata(slice) - - def load_confusion_matrix(self, slice: str, categories: List[str], - matrix: List[List[int]]) -> None: - """Bulk loads the whole confusion matrix for a slice. - - Args: - slice: String representing slice label. - categories: List of the category names. - matrix: Complete confusion matrix. - """ - self._upsert_classification_metrics_for_slice(slice) - self._sliced_metrics[slice].log_confusion_matrix_cell( - categories, matrix) - self._update_metadata(slice) - - -class HTML(Artifact): - """An artifact representing an HTML file. - - Args: - name: Name of the HTML file. - uri: The HTML file's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the HTML file. - """ - schema_title = 'system.HTML' - - -class Markdown(Artifact): - """An artifact representing a markdown file. - - Args: - name: Name of the markdown file. - uri: The markdown file's location on disk or cloud storage. - metadata: Arbitrary key-value pairs about the markdown file. - """ - schema_title = 'system.Markdown' - - -_SCHEMA_TITLE_TO_TYPE: Dict[str, Type[Artifact]] = { - x.schema_title: x for x in [ - Artifact, - Model, - Dataset, - Metrics, - ClassificationMetrics, - SlicedClassificationMetrics, - HTML, - Markdown, - ] -} diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py b/sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py deleted file mode 100644 index 484dfa6508..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/types/custom_artifact_types.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ast -import inspect -from typing import Callable, Dict, List, Union - -from kfp.dsl import component_factory -from kfp.dsl.types import type_annotations -from kfp.dsl.types import type_utils - -RETURN_PREFIX = 'return-' - - -def get_custom_artifact_type_import_statements(func: Callable) -> List[str]: - """Gets a list of custom artifact type import statements from a lightweight - Python component function.""" - artifact_imports = get_custom_artifact_import_items_from_function(func) - imports_source = [] - for obj_str in artifact_imports: - if '.' in obj_str: - path, name = obj_str.rsplit('.', 1) - imports_source.append(f'from {path} import {name}') - else: - imports_source.append(f'import {obj_str}') - return imports_source - - -def get_param_to_custom_artifact_class(func: Callable) -> Dict[str, type]: - """Gets a map of parameter names to custom artifact classes. - - Return key is 'return-' for normal returns and 'return-' for - typing.NamedTuple returns. - """ - param_to_artifact_cls: Dict[str, type] = {} - kfp_artifact_classes = set(type_utils._ARTIFACT_CLASSES_MAPPING.values()) - - signature = inspect.signature(func) - for name, param in signature.parameters.items(): - annotation = param.annotation - if type_annotations.is_Input_Output_artifact_annotation(annotation): - artifact_class = type_annotations.get_io_artifact_class(annotation) - if artifact_class not in kfp_artifact_classes: - param_to_artifact_cls[name] = artifact_class - elif type_annotations.is_artifact_class(annotation): - param_to_artifact_cls[name] = annotation - if artifact_class not in kfp_artifact_classes: - param_to_artifact_cls[name] = artifact_class - - return_annotation = signature.return_annotation - - if return_annotation is inspect.Signature.empty: - pass - - elif type_utils.is_typed_named_tuple_annotation(return_annotation): - for name, annotation in return_annotation.__annotations__.items(): - if type_annotations.is_artifact_class( - annotation) and annotation not in kfp_artifact_classes: - param_to_artifact_cls[f'{RETURN_PREFIX}{name}'] = annotation - - elif type_annotations.is_artifact_class( - return_annotation - ) and return_annotation not in kfp_artifact_classes: - param_to_artifact_cls[RETURN_PREFIX] = return_annotation - - return param_to_artifact_cls - - -def get_full_qualname_for_artifact(obj: type) -> str: - """Gets the fully qualified name for an object. For example, for class Foo - in module bar.baz, this function returns bar.baz.Foo. - - Note: typing.get_type_hints purports to do the same thing, but it behaves - differently when executed within the scope of a test, so preferring this - approach instead. - - Args: - obj: The class or module for which to get the fully qualified name. - - Returns: - The fully qualified name for the class. - """ - module = obj.__module__ - name = obj.__qualname__ - if module is not None: - name = module + '.' + name - return name - - -def get_symbol_import_path(artifact_class_base_symbol: str, - qualname: str) -> str: - """Gets the fully qualified name of the symbol that must be imported for - the custom artifact type annotation to be referenced successfully. - - Args: - artifact_class_base_symbol: The base symbol from which the artifact class is referenced (e.g., aiplatform for aiplatform.VertexDataset). - qualname: The fully qualified type annotation name as a string. - - Returns: - The fully qualified names of the module or type to import. - """ - split_qualname = qualname.split('.') - if artifact_class_base_symbol in split_qualname: - name_to_import = '.'.join( - split_qualname[:split_qualname.index(artifact_class_base_symbol) + - 1]) - else: - raise TypeError( - f"Module or type name aliases are not supported. You appear to be using an alias in your type annotation: '{qualname}'. This may be due to use of an 'as' statement in an import statement or a reassignment of a module or type to a new name. Reference the module and/or type using the name as defined in the source from which the module or type is imported." - ) - return name_to_import - - -def traverse_ast_node_values_to_get_id(obj: Union[ast.Slice, None]) -> str: - while not hasattr(obj, 'id'): - obj = getattr(obj, 'value') - return obj.id - - -def get_custom_artifact_base_symbol_for_parameter(func: Callable, - arg_name: str) -> str: - """Gets the symbol required for the custom artifact type annotation to be - referenced correctly.""" - module_node = ast.parse( - component_factory._get_function_source_definition(func)) - args = module_node.body[0].args.args - args = {arg.arg: arg for arg in args} - annotation = args[arg_name].annotation - return traverse_ast_node_values_to_get_id(annotation.slice) - - -def get_custom_artifact_base_symbol_for_return(func: Callable, - return_name: str) -> str: - """Gets the symbol required for the custom artifact type return annotation - to be referenced correctly.""" - module_node = ast.parse( - component_factory._get_function_source_definition(func)) - return_ann = module_node.body[0].returns - - if return_name == RETURN_PREFIX: - if isinstance(return_ann, (ast.Name, ast.Attribute)): - return traverse_ast_node_values_to_get_id(return_ann) - elif isinstance(return_ann, ast.Call): - func = return_ann.func - # handles NamedTuple and typing.NamedTuple - if (isinstance(func, ast.Attribute) and func.value.id == 'typing' and - func.attr == 'NamedTuple') or (isinstance(func, ast.Name) and - func.id == 'NamedTuple'): - nt_field_list = return_ann.args[1].elts - for el in nt_field_list: - if f'{RETURN_PREFIX}{el.elts[0].s}' == return_name: - return traverse_ast_node_values_to_get_id(el.elts[1]) - - raise TypeError(f"Unexpected type annotation '{return_ann}' for {func}.") - - -def get_custom_artifact_import_items_from_function(func: Callable) -> List[str]: - """Gets the fully qualified name of the symbol that must be imported for - the custom artifact type annotation to be referenced successfully from a - component function.""" - - param_to_ann_obj = get_param_to_custom_artifact_class(func) - import_items = [] - for param_name, artifact_class in param_to_ann_obj.items(): - - base_symbol = get_custom_artifact_base_symbol_for_return( - func, param_name - ) if param_name.startswith( - RETURN_PREFIX) else get_custom_artifact_base_symbol_for_parameter( - func, param_name) - artifact_qualname = get_full_qualname_for_artifact(artifact_class) - symbol_import_path = get_symbol_import_path(base_symbol, - artifact_qualname) - - # could use set here, but want to be have deterministic import ordering - # in compilation - if symbol_import_path not in import_items: - import_items.append(symbol_import_path) - - return import_items diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py b/sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py deleted file mode 100644 index aa39d2002e..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/types/type_annotations.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Classes for input/output type annotations in KFP SDK. - -These are only compatible with v2 Pipelines. -""" - -import re -from typing import List, Type, TypeVar, Union - -from kfp.dsl.types import artifact_types -from kfp.dsl.types import type_annotations -from kfp.dsl.types import type_utils - - -class OutputPath: - """Type annotation used in component definitions for indicating a parameter - is a path to an output. The path parameter typed with this annotation can - be treated as a locally accessible filepath within the component body. - - The argument typed with this annotation is provided at runtime by the executing backend and does not need to be passed as an input by the pipeline author (see example). - - - Args: - type: The type of the value written to the output path. - - Example: - :: - - @dsl.component - def create_parameter( - message: str, - output_parameter_path: OutputPath(str), - ): - with open(output_parameter_path, 'w') as f: - f.write(message) - - - @dsl.component - def consume_parameter(message: str): - print(message) - - - @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my-bucket') - def my_pipeline(message: str = 'default message'): - create_param_op = create_parameter(message=message) - consume_parameter(message=create_param_op.outputs['output_parameter_path']) - """ - - def __init__(self, type=None): - self.type = construct_type_for_inputpath_or_outputpath(type) - - def __eq__(self, other): - return isinstance(other, OutputPath) and self.type == other.type - - -class InputPath: - """Type annotation used in component definitions for indicating a parameter - is a path to an input. - - Example: - :: - - @dsl.component - def create_dataset(dataset_path: OutputPath('Dataset'),): - import json - dataset = {'my_dataset': [[1, 2, 3], [4, 5, 6]]} - with open(dataset_path, 'w') as f: - json.dump(dataset, f) - - - @dsl.component - def consume_dataset(dataset: InputPath('Dataset')): - print(dataset) - - - @dsl.pipeline(name='my-pipeline', pipeline_root='gs://my-bucket') - def my_pipeline(): - create_dataset_op = create_dataset() - consume_dataset(dataset=create_dataset_op.outputs['dataset_path']) - """ - - def __init__(self, type=None): - self.type = construct_type_for_inputpath_or_outputpath(type) - - def __eq__(self, other): - return isinstance(other, InputPath) and self.type == other.type - - -def construct_type_for_inputpath_or_outputpath( - type_: Union[str, Type, None]) -> Union[str, None]: - if type_annotations.is_artifact_class(type_): - return type_utils.create_bundled_artifact_type(type_.schema_title, - type_.schema_version) - elif isinstance( - type_, - str) and type_.lower() in type_utils._ARTIFACT_CLASSES_MAPPING: - # v1 artifact backward compat, e.g. dsl.OutputPath('Dataset') - return type_utils.create_bundled_artifact_type( - type_utils._ARTIFACT_CLASSES_MAPPING[type_.lower()].schema_title) - elif type_utils.get_parameter_type(type_): - return type_ - else: - # v1 unknown type dsl.OutputPath('MyCustomType') - return type_utils.create_bundled_artifact_type( - artifact_types.Artifact.schema_title) - - -class InputAnnotation: - """Marker type for input artifacts.""" - - -class OutputAnnotation: - """Marker type for output artifacts.""" - - -def is_Input_Output_artifact_annotation(typ) -> bool: - if not hasattr(typ, '__metadata__'): - return False - - if typ.__metadata__[0] not in [InputAnnotation, OutputAnnotation]: - return False - - return True - - -def is_input_artifact(typ) -> bool: - """Returns True if typ is of type Input[T].""" - if not is_Input_Output_artifact_annotation(typ): - return False - - return typ.__metadata__[0] == InputAnnotation - - -def is_output_artifact(typ) -> bool: - """Returns True if typ is of type Output[T].""" - if not is_Input_Output_artifact_annotation(typ): - return False - - return typ.__metadata__[0] == OutputAnnotation - - -def get_io_artifact_class(typ): - from kfp.dsl import Input - from kfp.dsl import Output - if not is_Input_Output_artifact_annotation(typ): - return None - if typ == Input or typ == Output: - return None - - # extract inner type from list of artifacts - inner = typ.__args__[0] - if hasattr(inner, '__origin__') and inner.__origin__ == list: - return inner.__args__[0] - - return inner - - -def get_io_artifact_annotation(typ): - if not is_Input_Output_artifact_annotation(typ): - return None - - return typ.__metadata__[0] - - -T = TypeVar('T') - - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - - Args: - annotation: The original type annotation which may or may not has - `Optional`. - - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', - None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - return annotation - - -def maybe_strip_optional_from_annotation_string(annotation: str) -> str: - if annotation.startswith('Optional[') and annotation.endswith(']'): - return annotation.lstrip('Optional[').rstrip(']') - return annotation - - -def get_short_type_name(type_name: str) -> str: - """Extracts the short form type name. - - This method is used for looking up serializer for a given type. - - For example:: - typing.List -> List - typing.List[int] -> List - typing.Dict[str, str] -> Dict - List -> List - str -> str - - Args: - type_name: The original type name. - - Returns: - The short form type name or the original name if pattern doesn't match. - """ - match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) - return match['type'] if match else type_name - - -def is_artifact_class(artifact_class_or_instance: Type) -> bool: - # we do not yet support non-pre-registered custom artifact types with instance_schema attribute - return hasattr(artifact_class_or_instance, 'schema_title') and hasattr( - artifact_class_or_instance, 'schema_version') - - -def is_list_of_artifacts( - type_var: Union[Type[List[artifact_types.Artifact]], - Type[artifact_types.Artifact]] -) -> bool: - # the type annotation for this function's `type_var` parameter may not actually be a subclass of the KFP SDK's Artifact class for custom artifact types - is_list_or_list_generic = getattr(type_var, '__origin__', None) == list - # in >= python3.9, List wont have .__args__ if it's used as `-> List` with no inner type argument - contains_artifact = hasattr( - type_var, '__args__') and type_annotations.is_artifact_class( - type_var.__args__[0]) - return is_list_or_list_generic and contains_artifact diff --git a/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py b/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py deleted file mode 100644 index 6f07fbfcb8..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/types/type_utils.py +++ /dev/null @@ -1,558 +0,0 @@ -# Copyright 2020-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utilities for component I/O type mapping.""" - -from distutils import util -import inspect -import json -from typing import Any, Callable, Dict, Optional, Type, Union -import warnings - -from kfp import dsl -from kfp.dsl import structures -from kfp.dsl import task_final_status -from kfp.dsl.types import artifact_types -from kfp.dsl.types import type_annotations - -DEFAULT_ARTIFACT_SCHEMA_VERSION = '0.0.1' -PARAMETER_TYPES = Union[str, int, float, bool, dict, list] - -# ComponentSpec I/O types to DSL ontology artifact classes mapping. -_ARTIFACT_CLASSES_MAPPING = { - 'artifact': artifact_types.Artifact, - 'model': artifact_types.Model, - 'dataset': artifact_types.Dataset, - 'metrics': artifact_types.Metrics, - 'classificationmetrics': artifact_types.ClassificationMetrics, - 'slicedclassificationmetrics': artifact_types.SlicedClassificationMetrics, - 'html': artifact_types.HTML, - 'markdown': artifact_types.Markdown, -} - -_GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$' -_GOOGLE_TYPES_VERSION = DEFAULT_ARTIFACT_SCHEMA_VERSION - -# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping. -# The keys are normalized (lowercased). These are types viewed as Parameters. -# The values are the corresponding IR parameter primitive types. - -# pipeline_spec_pb2.ParameterType enum values -NUMBER_DOUBLE = 1 -NUMBER_INTEGER = 2 -STRING = 3 -BOOLEAN = 4 -LIST = 5 -STRUCT = 6 -PARAMETER_TYPES_MAPPING = { - 'integer': 2, - 'int': NUMBER_INTEGER, - 'double': NUMBER_DOUBLE, - 'float': NUMBER_DOUBLE, - 'string': STRING, - 'str': STRING, - 'text': STRING, - 'bool': BOOLEAN, - 'boolean': BOOLEAN, - 'dict': STRUCT, - 'list': LIST, - 'jsonobject': STRUCT, - 'jsonarray': LIST, -} - - -def bool_cast_fn(default: Union[str, bool]) -> bool: - if isinstance(default, str): - default = util.strtobool(default) == 1 - return default - - -def try_loading_json(default: str) -> Union[dict, list, str]: - try: - return json.loads(default) - except: - return default - - -_V1_DEFAULT_DESERIALIZER_MAPPING: Dict[str, Callable] = { - 'integer': int, - 'int': int, - 'double': float, - 'float': float, - 'string': str, - 'str': str, - 'text': str, - 'bool': bool_cast_fn, - 'boolean': bool_cast_fn, - 'dict': try_loading_json, - 'list': try_loading_json, - 'jsonobject': try_loading_json, - 'jsonarray': try_loading_json, -} - - -def deserialize_v1_component_yaml_default(type_: str, default: Any) -> Any: - """Deserializes v1 default values to correct in-memory types. - - Typecasts for primitive types. Tries to load JSON for arrays and - structs. - """ - if default is None: - return default - if isinstance(type_, str): - cast_fn = _V1_DEFAULT_DESERIALIZER_MAPPING.get(type_.lower(), - lambda x: x) - return cast_fn(default) - return default - - -def is_task_final_status_type(type_name: Optional[Union[str, dict]]) -> bool: - """Check if a ComponentSpec I/O type is PipelineTaskFinalStatus. - - Args: - type_name: type name of the ComponentSpec I/O type. - - Returns: - True if the type name is 'PipelineTaskFinalStatus'. - """ - return isinstance(type_name, str) and ( - type_name == task_final_status.PipelineTaskFinalStatus.__name__) - - -def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool: - """Check if a ComponentSpec I/O type is considered as a parameter type. - - Args: - type_name: type name of the ComponentSpec I/O type. - - Returns: - True if the type name maps to a parameter type else False. - """ - if isinstance(type_name, str): - type_name = type_annotations.get_short_type_name(type_name) - elif isinstance(type_name, dict): - type_name = list(type_name.keys())[0] - else: - return False - - return type_name.lower( - ) in PARAMETER_TYPES_MAPPING or is_task_final_status_type(type_name) - - -def bundled_artifact_to_artifact_proto( - bundled_artifact_str: str) -> 'pipeline_spec_pb2.ArtifactTypeSchema': - """Gets the IR ArtifactTypeSchema proto for a bundled artifact in form - `.@x.x.x` (e.g., system.Artifact@0.0.1).""" - bundled_artifact_str, schema_version = bundled_artifact_str.split('@') - - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - return pipeline_spec_pb2.ArtifactTypeSchema( - schema_title=bundled_artifact_str, - schema_version=schema_version, - ) - - -def get_parameter_type( - param_type: Optional[Union[Type, str, dict]] -) -> 'pipeline_spec_pb2.ParameterType': - """Get the IR I/O parameter type for the given ComponentSpec I/O type. - - Args: - param_type: type of the ComponentSpec I/O type. Can be a primitive Python - builtin type or a type name. - - Returns: - The enum value of the mapped IR I/O primitive type. - - Raises: - AttributeError: if type_name is not a string type. - """ - # Special handling for PipelineTaskFinalStatus, treat it as Dict type. - if is_task_final_status_type(param_type): - param_type = 'dict' - if type(param_type) == type: - type_name = param_type.__name__ - elif isinstance(param_type, dict): - type_name = list(param_type.keys())[0] - else: - type_name = type_annotations.get_short_type_name(str(param_type)) - return PARAMETER_TYPES_MAPPING.get(type_name.lower()) - - -def get_parameter_type_name( - param_type: Optional[Union[Type, str, dict]]) -> str: - """Gets the parameter type name.""" - try: - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - return pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( - get_parameter_type(param_type)) - - -class InconsistentTypeException(Exception): - """InconsistencyTypeException is raised when two types are not - consistent.""" - - -class InconsistentTypeWarning(Warning): - """InconsistentTypeWarning is issued when two types are not consistent.""" - - -def _get_type_string_from_component_argument( - argument_value: Union['pipeline_channel.PipelineChannel', str, bool, int, - float, dict, list] -) -> str: - # avoid circular imports - from kfp.dsl import pipeline_channel - if isinstance(argument_value, pipeline_channel.PipelineChannel): - return argument_value.channel_type - - # argument is a constant - argument_type = type(argument_value) - if argument_type in _TYPE_TO_TYPE_NAME: - return _TYPE_TO_TYPE_NAME[argument_type] - - raise ValueError( - f'Constant argument inputs must be one of type {list(_TYPE_TO_TYPE_NAME.values())} Got: {argument_value!r} of type {type(argument_value)!r}.' - ) - - -def verify_type_compatibility( - given_value: Union['pipeline_channel.PipelineChannel', str, bool, int, - float, dict, list], - expected_spec: Union[structures.InputSpec, structures.OutputSpec], - error_message_prefix: str, - checks_input: bool = True, - raise_on_error: bool = True, -) -> bool: - """Verifies the given argument type is compatible with the expected type. - - Args: - given_value: The channel or constant provided as an argument. - expected_spec: The InputSpec or OutputSpec that describes the expected type of given_value. - error_message_prefix: The prefix for the error message. - checks_input: True if checks an argument (given_value) against a component/pipeline input type (expected_spec). False if checks a component output (argument_value) against the pipeline output type (expected_spec). - raise_on_error: Whether to raise on type compatibility error. Should be passed kfp.TYPE_CHECK. - - Returns: - True if types are compatible, and False if otherwise. - - Raises: - InconsistentTypeException if raise_on_error=True. - """ - # extract and normalize types - expected_type = expected_spec.type - given_type = _get_type_string_from_component_argument(given_value) - - given_is_param = is_parameter_type(str(given_type)) - if given_is_param: - given_type = get_parameter_type_name(given_type) - given_is_artifact_list = False - else: - given_is_artifact_list = given_value.is_artifact_list - - expected_is_param = is_parameter_type(expected_type) - if expected_is_param: - expected_type = get_parameter_type_name(expected_type) - expected_is_artifact_list = False - else: - expected_is_artifact_list = expected_spec.is_artifact_list - - # compare the normalized types - if given_is_param != expected_is_param: - types_are_compatible = False - elif given_is_param and expected_is_param: - types_are_compatible = check_parameter_type_compatibility( - given_type, expected_type) - else: - types_are_compatible = check_artifact_type_compatibility( - given_type=given_type, - given_is_artifact_list=given_is_artifact_list, - expected_type=expected_type, - expected_is_artifact_list=expected_is_artifact_list) - - # maybe raise, maybe warn, return bool - if not types_are_compatible: - # update the types for lists of artifacts for error message - given_type = f'List[{given_type}]' if given_is_artifact_list else given_type - expected_type = f'List[{expected_type}]' if expected_is_artifact_list else expected_type - if checks_input: - error_message_suffix = f'Argument type {given_type!r} is incompatible with the input type {expected_type!r}' - else: - error_message_suffix = f'Output of type {given_type!r} cannot be surfaced as pipeline output type {expected_type!r}' - error_text = error_message_prefix + error_message_suffix - if raise_on_error: - raise InconsistentTypeException(error_text) - else: - warnings.warn(InconsistentTypeWarning(error_text)) - - return types_are_compatible - - -def check_artifact_type_compatibility(given_type: str, - given_is_artifact_list: bool, - expected_type: str, - expected_is_artifact_list: bool) -> bool: - given_schema_title, given_schema_version = given_type.split('@') - expected_schema_title, expected_schema_version = expected_type.split('@') - same_list_of_artifacts_status = expected_is_artifact_list == given_is_artifact_list - if not same_list_of_artifacts_status: - return False - elif artifact_types.Artifact.schema_title in { - given_schema_title, expected_schema_title - }: - return True - else: - schema_title_compatible = given_schema_title == expected_schema_title - schema_version_compatible = given_schema_version.split( - '.')[0] == expected_schema_version.split('.')[0] - - return schema_title_compatible and schema_version_compatible - - -def check_parameter_type_compatibility(given_type: str, - expected_type: str) -> bool: - if isinstance(given_type, str) and isinstance(expected_type, str): - return given_type == expected_type - else: - return check_v1_struct_parameter_type_compatibility( - given_type, expected_type) - - -def check_v1_struct_parameter_type_compatibility( - given_type: Union[str, dict], - expected_type: Union[str, dict], -) -> bool: - if isinstance(given_type, str): - given_type = {given_type: {}} - if isinstance(expected_type, str): - expected_type = {expected_type: {}} - return _check_dict_types(given_type, expected_type) - - -def _check_dict_types( - given_type: dict, - expected_type: dict, -) -> bool: - given_type_name, _ = list(given_type.items())[0] - expected_type_name, _ = list(expected_type.items())[0] - if given_type_name == '' or expected_type_name == '': - # If the type name is empty, it matches any types - return True - if given_type_name != expected_type_name: - print('type name ' + str(given_type_name) + - ' is different from expected: ' + str(expected_type_name)) - return False - type_name = given_type_name - for type_property in given_type[type_name]: - if type_property not in expected_type[type_name]: - print(type_name + ' has a property ' + str(type_property) + - ' that the latter does not.') - return False - if given_type[type_name][type_property] != expected_type[type_name][ - type_property]: - print(type_name + ' has a property ' + str(type_property) + - ' with value: ' + str(given_type[type_name][type_property]) + - ' and ' + str(expected_type[type_name][type_property])) - return False - return True - - -_TYPE_TO_TYPE_NAME = { - str: 'String', - int: 'Integer', - float: 'Float', - bool: 'Boolean', - list: 'List', - dict: 'Dict', -} - - -def get_canonical_type_name_for_type(typ: Type) -> Optional[str]: - """Find the canonical type name for a given type. - - Args: - typ: The type to search for. - - Returns: - The canonical name of the type found. - """ - return _TYPE_TO_TYPE_NAME.get(typ, None) - - -class TypeCheckManager: - """Context manager to set a type check mode within context, then restore - mode to original value upon exiting the context.""" - - def __init__(self, enable: bool) -> None: - """TypeCheckManager constructor. - - Args: - enable: Type check mode used within context. - """ - self._enable = enable - - def __enter__(self) -> 'TypeCheckManager': - """Set type check mode to self._enable. - - Returns: - TypeCheckManager: Returns itself. - """ - try: - import kfp - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - - self._prev = kfp.TYPE_CHECK - kfp.TYPE_CHECK = self._enable - return self - - def __exit__(self, *unused_args) -> None: - - try: - import kfp - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - """Restore type check mode to its previous state.""" - kfp.TYPE_CHECK = self._prev - - -# for reading in IR back to in-memory data structures -IR_TYPE_TO_IN_MEMORY_SPEC_TYPE = { - 'STRING': 'String', - 'NUMBER_INTEGER': 'Integer', - 'NUMBER_DOUBLE': 'Float', - 'LIST': 'List', - 'STRUCT': 'Dict', - 'BOOLEAN': 'Boolean', - 'TASK_FINAL_STATUS': task_final_status.PipelineTaskFinalStatus.__name__, -} - -IR_TYPE_TO_COMMENT_TYPE_STRING = { - 'STRING': str.__name__, - 'NUMBER_INTEGER': int.__name__, - 'NUMBER_DOUBLE': float.__name__, - 'LIST': list.__name__, - 'STRUCT': dict.__name__, - 'BOOLEAN': bool.__name__, - 'TASK_FINAL_STATUS': task_final_status.PipelineTaskFinalStatus.__name__, -} - -IN_MEMORY_SPEC_TYPE_TO_IR_TYPE = { - v: k for k, v in IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.items() -} - - -def get_canonical_name_for_outer_generic(type_name: Any) -> str: - """Maps a complex/nested type name back to a canonical type. - - E.g. - get_canonical_name_for_outer_generic('typing.List[str]') - 'List' - - get_canonical_name_for_outer_generic('typing.Dict[typing.List[str], str]') - 'Dict' - - Args: - type_name (Any): The type. Returns input if not a string. - - Returns: - str: The canonical type. - """ - if not isinstance(type_name, str): - return type_name - - if type_name.startswith('typing.'): - type_name = type_name.lstrip('typing.') - - if type_name.lower().startswith('list') or type_name.lower().startswith( - 'dict'): - return type_name.split('[')[0] - - else: - return type_name - - -def create_bundled_artifact_type(schema_title: str, - schema_version: Optional[str] = None) -> str: - if not isinstance(schema_title, str): - raise ValueError - return schema_title + '@' + ( - schema_version or DEFAULT_ARTIFACT_SCHEMA_VERSION) - - -def validate_schema_version(schema_version: str) -> None: - split_schema_version = schema_version.split('.') - if len(split_schema_version) != 3: - raise TypeError( - f'Artifact schema_version must use three-part semantic versioning. Got: {schema_version}' - ) - - -def validate_schema_title(schema_title: str) -> None: - split_schema_title = schema_title.split('.') - if len(split_schema_title) != 2: - raise TypeError( - f'Artifact schema_title must have both a namespace and a name, separated by a `.`. Got: {schema_title}' - ) - namespace, _ = split_schema_title - if namespace not in {'system', 'google'}: - raise TypeError( - f'Artifact schema_title must belong to `system` or `google` namespace. Got: {schema_title}' - ) - - -def validate_bundled_artifact_type(type_: str) -> None: - split_type = type_.split('@') - # two parts and neither are empty strings - if len(split_type) != 2 or not all(split_type): - raise TypeError( - f'Artifacts must have both a schema_title and a schema_version, separated by `@`. Got: {type_}' - ) - schema_title, schema_version = split_type - validate_schema_title(schema_title) - validate_schema_version(schema_version) - - -def _annotation_to_type_struct(annotation): - if not annotation or annotation == inspect.Parameter.empty: - return None - if hasattr(annotation, 'to_dict'): - annotation = annotation.to_dict() - if isinstance(annotation, dict): - return annotation - if isinstance(annotation, type): - type_struct = get_canonical_type_name_for_type(annotation) - if type_struct: - return type_struct - elif type_annotations.is_artifact_class(annotation): - schema_title = annotation.schema_title - else: - schema_title = str(annotation.__name__) - elif hasattr(annotation, '__forward_arg__'): - schema_title = str(annotation.__forward_arg__) - else: - schema_title = str(annotation) - type_struct = get_canonical_type_name_for_type(schema_title) - return type_struct or schema_title - - -def is_typed_named_tuple_annotation(annotation: Any) -> bool: - return hasattr(annotation, '_fields') and hasattr(annotation, - '__annotations__') diff --git a/sdk/python/kfp-dsl/kfp/dsl/utils.py b/sdk/python/kfp-dsl/kfp/dsl/utils.py deleted file mode 100644 index 781ddd0de5..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/utils.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Definitions of utils methods.""" - -import importlib -import os -import re -import sys -import types -from typing import List - -_COMPONENT_NAME_PREFIX = 'comp-' -_EXECUTOR_LABEL_PREFIX = 'exec-' - - -def load_module(module_name: str, module_directory: str) -> types.ModuleType: - """Dynamically imports the Python module with the given name and package - path. - - E.g., Assuming there is a file called `my_module.py` under - `/some/directory/my_module`, we can use:: - - load_module('my_module', '/some/directory') - - to effectively `import mymodule`. - - Args: - module_name: The name of the module. - package_path: The package under which the specified module resides. - """ - module_spec = importlib.util.spec_from_file_location( - name=module_name, - location=os.path.join(module_directory, f'{module_name}.py')) - module = importlib.util.module_from_spec(module_spec) - sys.modules[module_spec.name] = module - sys.path.insert(0, str(module_directory)) - module_spec.loader.exec_module(module) - return module - - -def maybe_rename_for_k8s(name: str) -> str: - """Cleans and converts a name to be k8s compatible. - - Args: - name: The original name. - - Returns: - A sanitized name. - """ - return re.sub('-+', '-', re.sub('[^-0-9a-z]+', '-', - name.lower())).lstrip('-').rstrip('-') - - -def sanitize_input_name(name: str) -> str: - """Sanitizes input name.""" - return re.sub('[^_0-9a-z]+', '_', name.lower()).lstrip('_').rstrip('_') - - -def sanitize_component_name(name: str) -> str: - """Sanitizes component name.""" - return _COMPONENT_NAME_PREFIX + maybe_rename_for_k8s(name) - - -def sanitize_task_name(name: str) -> str: - """Sanitizes task name.""" - return maybe_rename_for_k8s(name) - - -def sanitize_executor_label(label: str) -> str: - """Sanitizes executor label.""" - return _EXECUTOR_LABEL_PREFIX + maybe_rename_for_k8s(label) - - -def make_name_unique_by_adding_index( - name: str, - collection: List[str], - delimiter: str, -) -> str: - """Makes a unique name by adding index. - - The index starts from 2 and increase by 1 until we find a unique name. - - Args: - name: The original name. - collection: The collection of existing names. - delimiter: The delimiter to connect the original name and an index. - - Returns: - A unique name composed of name+delimiter+next index - """ - unique_name = name - if unique_name in collection: - for i in range(2, sys.maxsize**10): - unique_name = name + delimiter + str(i) - if unique_name not in collection: - break - return unique_name - - -def validate_pipeline_name(name: str) -> None: - """Validate pipeline name. - - A valid pipeline name should match ^[a-z0-9][a-z0-9-]{0,127}$. - - Args: - name: The pipeline name. - - Raises: - ValueError if the pipeline name doesn't conform to the regular expression. - """ - pattern = re.compile(r'^[a-z0-9][a-z0-9-]{0,127}$') - if not pattern.match(name): - raise ValueError( - 'Invalid pipeline name: %s.\n' - 'Please specify a pipeline name that matches the regular ' - 'expression "^[a-z0-9][a-z0-9-]{0,127}$" using ' - '`dsl.pipeline(name=...)` decorator.' % name) diff --git a/sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py b/sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py deleted file mode 100644 index c1facf6c8e..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/v1_modelbase.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright 2018-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import abc -from collections import OrderedDict -import inspect -from typing import (Any, cast, Dict, get_type_hints, List, Mapping, - MutableMapping, MutableSequence, Sequence, Type, TypeVar, - Union) - -T = TypeVar('T') - - -def verify_object_against_type(x: Any, typ: Type[T]) -> T: - """Verifies that the object is compatible to the specified type (types from - the typing package can be used).""" - #TODO: Merge with parse_object_from_struct_based_on_type which has almost the same code - if typ is type(None): - if x is None: - return x - else: - raise TypeError(f'Error: Object "{x}" is not None.') - - if typ is Any or type(typ) is TypeVar: - return x - - try: #isinstance can fail for generics - if isinstance(x, typ): - return cast(typ, x) - except Exception: - pass - - if hasattr(typ, '__origin__'): #Handling generic types - if typ.__origin__ is Union: #Optional == Union - exception_map = {} - possible_types = typ.__args__ - if type( - None - ) in possible_types and x is None: #Shortcut for Optional[] tests. Can be removed, but the exceptions will be more noisy. - return x - for possible_type in possible_types: - try: - verify_object_against_type(x, possible_type) - return x - except Exception as ex: - exception_map[possible_type] = ex - #exception_lines = ['Exception for type {}: {}.'.format(t, e) for t, e in exception_map.items()] - exception_lines = [str(e) for t, e in exception_map.items()] - exception_lines.append( - f'Error: Object "{x}" is incompatible with type "{typ}".') - raise TypeError('\n'.join(exception_lines)) - - #not Union => not None - if x is None: - raise TypeError( - f'Error: None object is incompatible with type {typ}') - - generic_type = typ.__origin__ - if generic_type in [ - list, List, abc.Sequence, abc.MutableSequence, Sequence, - MutableSequence - ] and type(x) is not str: #! str is also Sequence - if not isinstance(x, generic_type): - raise TypeError( - f'Error: Object "{x}" is incompatible with type "{typ}"') - - # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts - type_args = typ.__args__ if getattr( - typ, '__args__', None) is not None else (Any, Any) - inner_type = type_args[0] - for item in x: - verify_object_against_type(item, inner_type) - return x - - elif generic_type in [ - dict, Dict, abc.Mapping, abc.MutableMapping, Mapping, - MutableMapping, OrderedDict - ]: - if not isinstance(x, generic_type): - raise TypeError( - f'Error: Object "{x}" is incompatible with type "{typ}"') - - # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts - type_args = typ.__args__ if getattr( - typ, '__args__', None) is not None else (Any, Any) - inner_key_type = type_args[0] - inner_value_type = type_args[1] - for k, v in x.items(): - verify_object_against_type(k, inner_key_type) - verify_object_against_type(v, inner_value_type) - return x - - else: - raise TypeError( - f'Error: Unsupported generic type "{typ}". type.__origin__ or type.__extra__ == "{generic_type}"' - ) - - raise TypeError(f'Error: Object "{x}" is incompatible with type "{typ}"') - - -def parse_object_from_struct_based_on_type(struct: Any, typ: Type[T]) -> T: - """Constructs an object from structure (usually dict) based on type. - - Supports list and dict types from the typing package plus Optional[] - and Union[] types. If some type is a class that has .from_dict class - method, that method is used for object construction. - """ - if typ is type(None): - if struct is None: - return None - else: - raise TypeError(f'Error: Structure "{struct}" is not None.') - - if typ is Any or type(typ) is TypeVar: - return struct - - try: #isinstance can fail for generics - #if (isinstance(struct, typ) - # and not (typ is Sequence and type(struct) is str) #! str is also Sequence - # and not (typ is int and type(struct) is bool) #! bool is int - #): - if type(struct) is typ: - return struct - except: - pass - if hasattr(typ, 'from_dict'): - try: #More informative errors - return typ.from_dict(struct) - except Exception as ex: - raise TypeError( - f'Error: {typ.__name__}.from_dict(struct={struct}) failed with exception:\n{str(ex)}' - ) - if hasattr(typ, '__origin__'): #Handling generic types - if typ.__origin__ is Union: #Optional == Union - results = {} - exception_map = {} - # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts - # Union without subscripts seems useless, but semantically it should be the same as Any. - possible_types = list(getattr(typ, '__args__', [Any])) - #if type(None) in possible_types and struct is None: #Shortcut for Optional[] tests. Can be removed, but the exceptions will be more noisy. - # return None - - for possible_type in possible_types: - try: - obj = parse_object_from_struct_based_on_type( - struct, possible_type) - results[possible_type] = obj - except Exception as ex: - if isinstance(ex, TypeError): - exception_map[possible_type] = ex - else: - exception_map[ - possible_type] = f'Unexpected exception when trying to convert structure "{struct}" to type "{typ}": {type(ex)}: {ex}' - - #Single successful parsing. - if len(results) == 1: - return list(results.values())[0] - - if len(results) > 1: - raise TypeError( - f'Error: Structure "{struct}" is ambiguous. It can be parsed to multiple types: {list(results.keys())}.' - ) - - exception_lines = [str(e) for t, e in exception_map.items()] - exception_lines.append( - f'Error: Structure "{struct}" is incompatible with type "{typ}" - none of the types in Union are compatible.' - ) - raise TypeError('\n'.join(exception_lines)) - #not Union => not None - if struct is None: - raise TypeError( - f'Error: None structure is incompatible with type {typ}') - - generic_type = typ.__origin__ - if generic_type in [ - list, List, abc.Sequence, abc.MutableSequence, Sequence, - MutableSequence - ] and type(struct) is not str: #! str is also Sequence - if not isinstance(struct, generic_type): - raise TypeError( - f'Error: Structure "{struct}" is incompatible with type "{typ}" - it does not have list type.' - ) - - # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts - type_args = typ.__args__ if getattr( - typ, '__args__', None) is not None else (Any, Any) - inner_type = type_args[0] - return [ - parse_object_from_struct_based_on_type(item, inner_type) - for item in struct - ] - - elif generic_type in [ - dict, Dict, abc.Mapping, abc.MutableMapping, Mapping, - MutableMapping, OrderedDict - ]: - if not isinstance(struct, generic_type): - raise TypeError( - f'Error: Structure "{struct}" is incompatible with type "{typ}" - it does not have dict type.' - ) - - # In Python 3.9 typ.__args__ does not exist when the generic type does not have subscripts - type_args = typ.__args__ if getattr( - typ, '__args__', None) is not None else (Any, Any) - inner_key_type = type_args[0] - inner_value_type = type_args[1] - return { - parse_object_from_struct_based_on_type(k, inner_key_type): - parse_object_from_struct_based_on_type(v, inner_value_type) - for k, v in struct.items() - } - - else: - raise TypeError( - f'Error: Unsupported generic type "{typ}". type.__origin__ or type.__extra__ == "{generic_type}"' - ) - - raise TypeError( - f'Error: Structure "{struct}" is incompatible with type "{typ}". Structure is not the instance of the type, the type does not have .from_dict method and is not generic.' - ) - - -def convert_object_to_struct(obj, serialized_names: Mapping[str, str] = {}): - """Converts an object to structure (usually a dict). - - Serializes all properties that do not start with underscores. If the - type of some property is a class that has .to_dict class method, - that method is used for conversion. Used by the ModelBase class. - """ - signature = inspect.signature(obj.__init__) #Needed for default values - result = {} - for python_name in signature.parameters: #TODO: Make it possible to specify the field ordering regardless of the presence of default values - value = getattr(obj, python_name) - if python_name.startswith('_'): - continue - attr_name = serialized_names.get(python_name, python_name) - if hasattr(value, 'to_dict'): - result[attr_name] = value.to_dict() - elif isinstance(value, list): - result[attr_name] = [ - (x.to_dict() if hasattr(x, 'to_dict') else x) for x in value - ] - elif isinstance(value, dict): - result[attr_name] = { - k: (v.to_dict() if hasattr(v, 'to_dict') else v) - for k, v in value.items() - } - else: - param = signature.parameters.get(python_name, None) - if param is None or param.default == inspect.Parameter.empty or value != param.default: - result[attr_name] = value - - return result - - -def parse_object_from_struct_based_on_class_init( - cls: Type[T], - struct: Mapping, - serialized_names: Mapping[str, str] = {}) -> T: - """Constructs an object of specified class from structure (usually dict) - using the class.__init__ method. Converts all constructor arguments to - appropriate types based on the __init__ type hints. Used by the ModelBase - class. - - Arguments: - - serialized_names: specifies the mapping between __init__ parameter names and the structure key names for cases where these names are different (due to language syntax clashes or style differences). - """ - parameter_types = get_type_hints( - cls.__init__) #Properlty resolves forward references - - serialized_names_to_pythonic = {v: k for k, v in serialized_names.items()} - #If a pythonic name has a different original name, we forbid the pythonic name in the structure. Otherwise, this function would accept "python-styled" structures that should be invalid - forbidden_struct_keys = set( - serialized_names_to_pythonic.values()).difference( - serialized_names_to_pythonic.keys()) - args = {} - for original_name, value in struct.items(): - if original_name in forbidden_struct_keys: - raise ValueError( - f'Use "{serialized_names[original_name]}" key instead of pythonic key "{original_name}" in the structure: {struct}.' - ) - python_name = serialized_names_to_pythonic.get(original_name, - original_name) - param_type = parameter_types.get(python_name, None) - if param_type is not None: - args[python_name] = parse_object_from_struct_based_on_type( - value, param_type) - else: - args[python_name] = value - - return cls(**args) - - -class ModelBase: - """Base class for types that can be converted to JSON-like dict structures - or constructed from such structures. The object fields, their types and - default values are taken from the __init__ method arguments. Override the - _serialized_names mapping to control the key names of the serialized - structures. - - The derived class objects will have the .from_dict and .to_dict methods for conversion to or from structure. The base class constructor accepts the arguments map, checks the argument types and sets the object field values. - - Example derived class: - - class TaskSpec(ModelBase): - _serialized_names = { - 'component_ref': 'componentRef', - 'is_enabled': 'isEnabled', - } - - def __init__(self, - component_ref: ComponentReference, - arguments: Optional[Mapping[str, ArgumentType]] = None, - is_enabled: Optional[Union[ArgumentType, EqualsPredicate, NotEqualsPredicate]] = None, #Optional property with default value - ): - super().__init__(locals()) #Calling the ModelBase constructor to check the argument types and set the object field values. - - task_spec = TaskSpec.from_dict("{'componentRef': {...}, 'isEnabled: {'and': {...}}}") # = instance of TaskSpec - task_struct = task_spec.to_dict() #= "{'componentRef': {...}, 'isEnabled: {'and': {...}}}" - """ - _serialized_names = {} - - def __init__(self, args): - parameter_types = get_type_hints(self.__class__.__init__) - field_values = { - k: v - for k, v in args.items() - if k != 'self' and not k.startswith('_') - } - for k, v in field_values.items(): - parameter_type = parameter_types.get(k, None) - if parameter_type is not None: - try: - verify_object_against_type(v, parameter_type) - except Exception as e: - raise TypeError( - f'Argument for {k} is not compatible with type "{parameter_type}". Exception: {e}' - ) - self.__dict__.update(field_values) - - @classmethod - def from_dict(cls: Type[T], struct: Mapping) -> T: - return parse_object_from_struct_based_on_class_init( - cls, struct, serialized_names=cls._serialized_names) - - def to_dict(self) -> Dict[str, Any]: - return convert_object_to_struct( - self, serialized_names=self._serialized_names) - - def _get_field_names(self): - return list(inspect.signature(self.__init__).parameters) - - def __repr__(self): - return self.__class__.__name__ + '(' + ', '.join( - param + '=' + repr(getattr(self, param)) - for param in self._get_field_names()) + ')' - - def __eq__(self, other): - return self.__class__ == other.__class__ and { - k: getattr(self, k) for k in self._get_field_names() - } == {k: getattr(other, k) for k in other._get_field_names()} - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(repr(self)) diff --git a/sdk/python/kfp-dsl/kfp/dsl/v1_structures.py b/sdk/python/kfp-dsl/kfp/dsl/v1_structures.py deleted file mode 100644 index 57cc7c6375..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/v1_structures.py +++ /dev/null @@ -1,839 +0,0 @@ -# Copyright 2018-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import OrderedDict -from typing import Any, Dict, List, Mapping, Optional, Union - -from kfp.dsl.v1_modelbase import ModelBase - -PrimitiveTypes = Union[str, int, float, bool] -PrimitiveTypesIncludingNone = Optional[PrimitiveTypes] - -TypeSpecType = Union[str, Dict, List] - - -class InputSpec(ModelBase): - """Describes the component input specification.""" - - def __init__( - self, - name: str, - type: Optional[TypeSpecType] = None, - description: Optional[str] = None, - default: Optional[PrimitiveTypes] = None, - optional: Optional[bool] = False, - annotations: Optional[Dict[str, Any]] = None, - ): - super().__init__(locals()) - - -class OutputSpec(ModelBase): - """Describes the component output specification.""" - - def __init__( - self, - name: str, - type: Optional[TypeSpecType] = None, - description: Optional[str] = None, - annotations: Optional[Dict[str, Any]] = None, - ): - super().__init__(locals()) - - -class InputValuePlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by the input argument value.""" - _serialized_names = { - 'input_name': 'inputValue', - } - - def __init__( - self, - input_name: str, - ): - super().__init__(locals()) - - -class InputPathPlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by a local file path pointing to a file containing the input - argument value.""" - _serialized_names = { - 'input_name': 'inputPath', - } - - def __init__( - self, - input_name: str, - ): - super().__init__(locals()) - - -class OutputPathPlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by a local file path pointing to a file where the program - should write its output data.""" - _serialized_names = { - 'output_name': 'outputPath', - } - - def __init__( - self, - output_name: str, - ): - super().__init__(locals()) - - -class InputUriPlaceholder(ModelBase): # Non-standard attr names - """Represents a placeholder for the URI of an input artifact. - - Represents the command-line argument placeholder that will be - replaced at run-time by the URI of the input artifact argument. - """ - _serialized_names = { - 'input_name': 'inputUri', - } - - def __init__( - self, - input_name: str, - ): - super().__init__(locals()) - - -class OutputUriPlaceholder(ModelBase): # Non-standard attr names - """Represents a placeholder for the URI of an output artifact. - - Represents the command-line argument placeholder that will be - replaced at run-time by a URI of the output artifac where the - program should write its output data. - """ - _serialized_names = { - 'output_name': 'outputUri', - } - - def __init__( - self, - output_name: str, - ): - super().__init__(locals()) - - -class InputMetadataPlaceholder(ModelBase): # Non-standard attr names - """Represents the file path to an input artifact metadata. - - During runtime, this command-line argument placeholder will be - replaced by the path where the metadata file associated with this - artifact has been written to. Currently only supported in v2 - components. - """ - _serialized_names = { - 'input_name': 'inputMetadata', - } - - def __init__(self, input_name: str): - super().__init__(locals()) - - -class InputOutputPortNamePlaceholder(ModelBase): # Non-standard attr names - """Represents the output port name of an input artifact. - - During compile time, this command-line argument placeholder will be - replaced by the actual output port name used by the producer task. - Currently only supported in v2 components. - """ - _serialized_names = { - 'input_name': 'inputOutputPortName', - } - - def __init__(self, input_name: str): - super().__init__(locals()) - - -class OutputMetadataPlaceholder(ModelBase): # Non-standard attr names - """Represents the output metadata JSON file location of this task. - - This file will encode the metadata information produced by this task: - - Artifacts metadata, but not the content of the artifact, and - - output parameters. - - Only supported in v2 components. - """ - _serialized_names = { - 'output_metadata': 'outputMetadata', - } - - def __init__(self, output_metadata: type(None) = None): - if output_metadata: - raise RuntimeError( - 'Output metadata placeholder cannot be associated with key') - super().__init__(locals()) - - def to_dict(self) -> Mapping[str, Any]: - # Override parent implementation. Otherwise it always returns {}. - return {'outputMetadata': None} - - -class ExecutorInputPlaceholder(ModelBase): # Non-standard attr names - """Represents the serialized ExecutorInput message at runtime. - - This placeholder will be replaced by a serialized - [ExecutorInput](https://github.com/kubeflow/pipelines/blob/61f9c2c328d245d89c9d9b8c923f24dbbd08cdc9/api/v2alpha1/pipeline_spec.proto#L730) - proto message at runtime, which includes parameters of the task, artifact - URIs and metadata. - """ - _serialized_names = { - 'executor_input': 'executorInput', - } - - def __init__(self, executor_input: type(None) = None): - if executor_input: - raise RuntimeError( - f'Executor input placeholder cannot be associated with input key. Got {executor_input}' - ) - super().__init__(locals()) - - def to_dict(self) -> Mapping[str, Any]: - # Override parent implementation. Otherwise it always returns {}. - return {'executorInput': None} - - -CommandlineArgumentType = Union[str, InputValuePlaceholder, - InputPathPlaceholder, OutputPathPlaceholder, - InputUriPlaceholder, OutputUriPlaceholder, - InputMetadataPlaceholder, - InputOutputPortNamePlaceholder, - OutputMetadataPlaceholder, - ExecutorInputPlaceholder, 'ConcatPlaceholder', - 'IfPlaceholder',] - - -class ConcatPlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by the concatenated values of its items.""" - _serialized_names = { - 'items': 'concat', - } - - def __init__( - self, - items: List[CommandlineArgumentType], - ): - super().__init__(locals()) - - -class IsPresentPlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by a boolean value specifying whether the caller has passed an - argument for the specified optional input.""" - _serialized_names = { - 'input_name': 'isPresent', - } - - def __init__( - self, - input_name: str, - ): - super().__init__(locals()) - - -IfConditionArgumentType = Union[bool, str, IsPresentPlaceholder, - InputValuePlaceholder] - - -class IfPlaceholderStructure(ModelBase): #Non-standard attr names - '''Used in by the IfPlaceholder - the command-line argument placeholder that will be replaced at run-time by the expanded value of either "then_value" or "else_value" depending on the submissio-time resolved value of the "cond" predicate.''' - _serialized_names = { - 'condition': 'cond', - 'then_value': 'then', - 'else_value': 'else', - } - - def __init__( - self, - condition: IfConditionArgumentType, - then_value: Union[CommandlineArgumentType, - List[CommandlineArgumentType]], - else_value: Optional[Union[CommandlineArgumentType, - List[CommandlineArgumentType]]] = None, - ): - super().__init__(locals()) - - -class IfPlaceholder(ModelBase): #Non-standard attr names - """Represents the command-line argument placeholder that will be replaced - at run-time by the expanded value of either "then_value" or "else_value" - depending on the submissio-time resolved value of the "cond" predicate.""" - _serialized_names = { - 'if_structure': 'if', - } - - def __init__( - self, - if_structure: IfPlaceholderStructure, - ): - super().__init__(locals()) - - -class ContainerSpec(ModelBase): - """Describes the container component implementation.""" - _serialized_names = { - 'file_outputs': - 'fileOutputs', #TODO: rename to something like legacy_unconfigurable_output_paths - } - - def __init__( - self, - image: str, - command: Optional[List[CommandlineArgumentType]] = None, - args: Optional[List[CommandlineArgumentType]] = None, - env: Optional[Mapping[str, str]] = None, - file_outputs: - Optional[Mapping[ - str, - str]] = None, #TODO: rename to something like legacy_unconfigurable_output_paths - ): - super().__init__(locals()) - - -class ContainerImplementation(ModelBase): - """Represents the container component implementation.""" - - def __init__( - self, - container: ContainerSpec, - ): - super().__init__(locals()) - - -ImplementationType = Union[ContainerImplementation, 'GraphImplementation'] - - -class MetadataSpec(ModelBase): - - def __init__( - self, - annotations: Optional[Dict[str, str]] = None, - labels: Optional[Dict[str, str]] = None, - ): - super().__init__(locals()) - - -class ComponentSpec(ModelBase): - """Component specification. - - Describes the metadata (name, description, annotations and labels), - the interface (inputs and outputs) and the implementation of the - component. - """ - - def __init__( - self, - name: Optional[str] = None, #? Move to metadata? - description: Optional[str] = None, #? Move to metadata? - metadata: Optional[MetadataSpec] = None, - inputs: Optional[List[InputSpec]] = None, - outputs: Optional[List[OutputSpec]] = None, - implementation: Optional[ImplementationType] = None, - version: Optional[str] = 'google.com/cloud/pipelines/component/v1', - #tags: Optional[Set[str]] = None, - ): - super().__init__(locals()) - self._post_init() - - def _post_init(self): - #Checking input names for uniqueness - self._inputs_dict = {} - if self.inputs: - for input in self.inputs: - if input.name in self._inputs_dict: - raise ValueError(f'Non-unique input name "{input.name}"') - self._inputs_dict[input.name] = input - - #Checking output names for uniqueness - self._outputs_dict = {} - if self.outputs: - for output in self.outputs: - if output.name in self._outputs_dict: - raise ValueError(f'Non-unique output name "{output.name}"') - self._outputs_dict[output.name] = output - - if isinstance(self.implementation, ContainerImplementation): - container = self.implementation.container - - if container.file_outputs: - for output_name, path in container.file_outputs.items(): - if output_name not in self._outputs_dict: - raise TypeError( - 'Unconfigurable output entry "{}" references non-existing output.' - .format({output_name: path})) - - def verify_arg(arg): - if arg is None: - pass - elif isinstance( - arg, (str, int, float, bool, OutputMetadataPlaceholder, - ExecutorInputPlaceholder)): - pass - elif isinstance(arg, list): - for arg2 in arg: - verify_arg(arg2) - elif isinstance( - arg, - (InputUriPlaceholder, InputValuePlaceholder, - InputPathPlaceholder, IsPresentPlaceholder, - InputMetadataPlaceholder, InputOutputPortNamePlaceholder)): - if arg.input_name not in self._inputs_dict: - raise TypeError( - f'Argument "{arg}" references non-existing input.') - elif isinstance(arg, - (OutputUriPlaceholder, OutputPathPlaceholder)): - if arg.output_name not in self._outputs_dict: - raise TypeError( - f'Argument "{arg}" references non-existing output.') - elif isinstance(arg, ConcatPlaceholder): - for arg2 in arg.items: - verify_arg(arg2) - elif isinstance(arg, IfPlaceholder): - verify_arg(arg.if_structure.condition) - verify_arg(arg.if_structure.then_value) - verify_arg(arg.if_structure.else_value) - else: - raise TypeError(f'Unexpected argument "{arg}"') - - verify_arg(container.command) - verify_arg(container.args) - - if isinstance(self.implementation, GraphImplementation): - graph = self.implementation.graph - - if graph.output_values is not None: - for output_name, argument in graph.output_values.items(): - if output_name not in self._outputs_dict: - raise TypeError( - 'Graph output argument entry "{}" references non-existing output.' - .format({output_name: argument})) - - if graph.tasks is not None: - for task in graph.tasks.values(): - if task.arguments is not None: - for argument in task.arguments.values(): - if isinstance( - argument, GraphInputArgument - ) and argument.graph_input.input_name not in self._inputs_dict: - raise TypeError( - f'Argument "{argument}" references non-existing input.' - ) - - -class ComponentReference(ModelBase): - """Component reference. - - Contains information that can be used to locate and load a component - by name, digest or URL - """ - - def __init__( - self, - name: Optional[str] = None, - digest: Optional[str] = None, - tag: Optional[str] = None, - url: Optional[str] = None, - spec: Optional[ComponentSpec] = None, - ): - super().__init__(locals()) - self._post_init() - - def _post_init(self) -> None: - if not any([self.name, self.digest, self.tag, self.url, self.spec]): - raise TypeError('Need at least one argument.') - - -class GraphInputReference(ModelBase): - """References the input of the graph (the scope is a single graph).""" - _serialized_names = { - 'input_name': 'inputName', - } - - def __init__( - self, - input_name: str, - type: - Optional[ - TypeSpecType] = None, # Can be used to override the reference data type - ): - super().__init__(locals()) - - def as_argument(self) -> 'GraphInputArgument': - return GraphInputArgument(graph_input=self) - - def with_type(self, type_spec: TypeSpecType) -> 'GraphInputReference': - return GraphInputReference( - input_name=self.input_name, - type=type_spec, - ) - - def without_type(self) -> 'GraphInputReference': - return self.with_type(None) - - -class GraphInputArgument(ModelBase): - """Represents the component argument value that comes from the graph - component input.""" - _serialized_names = { - 'graph_input': 'graphInput', - } - - def __init__( - self, - graph_input: GraphInputReference, - ): - super().__init__(locals()) - - -class TaskOutputReference(ModelBase): - """References the output of some task (the scope is a single graph).""" - _serialized_names = { - 'task_id': 'taskId', - 'output_name': 'outputName', - } - - def __init__( - self, - output_name: str, - task_id: - Optional[ - str] = None, # Used for linking to the upstream task in serialized component file. - task: - Optional[ - 'TaskSpec'] = None, # Used for linking to the upstream task in runtime since Task does not have an ID until inserted into a graph. - type: - Optional[ - TypeSpecType] = None, # Can be used to override the reference data type - ): - super().__init__(locals()) - if self.task_id is None and self.task is None: - raise TypeError('task_id and task cannot be None at the same time.') - - def with_type(self, type_spec: TypeSpecType) -> 'TaskOutputReference': - return TaskOutputReference( - output_name=self.output_name, - task_id=self.task_id, - task=self.task, - type=type_spec, - ) - - def without_type(self) -> 'TaskOutputReference': - return self.with_type(None) - - -class TaskOutputArgument(ModelBase - ): #Has additional constructor for convenience - """Represents the component argument value that comes from the output of - another task.""" - _serialized_names = { - 'task_output': 'taskOutput', - } - - def __init__( - self, - task_output: TaskOutputReference, - ): - super().__init__(locals()) - - @staticmethod - def construct( - task_id: str, - output_name: str, - ) -> 'TaskOutputArgument': - return TaskOutputArgument( - TaskOutputReference( - task_id=task_id, - output_name=output_name, - )) - - def with_type(self, type_spec: TypeSpecType) -> 'TaskOutputArgument': - return TaskOutputArgument( - task_output=self.task_output.with_type(type_spec),) - - def without_type(self) -> 'TaskOutputArgument': - return self.with_type(None) - - -ArgumentType = Union[PrimitiveTypes, GraphInputArgument, TaskOutputArgument] - - -class TwoOperands(ModelBase): - - def __init__( - self, - op1: ArgumentType, - op2: ArgumentType, - ): - super().__init__(locals()) - - -class BinaryPredicate(ModelBase): #abstract base type - - def __init__(self, operands: TwoOperands): - super().__init__(locals()) - - -class EqualsPredicate(BinaryPredicate): - """Represents the "equals" comparison predicate.""" - _serialized_names = {'operands': '=='} - - -class NotEqualsPredicate(BinaryPredicate): - """Represents the "not equals" comparison predicate.""" - _serialized_names = {'operands': '!='} - - -class GreaterThanPredicate(BinaryPredicate): - """Represents the "greater than" comparison predicate.""" - _serialized_names = {'operands': '>'} - - -class GreaterThanOrEqualPredicate(BinaryPredicate): - """Represents the "greater than or equal" comparison predicate.""" - _serialized_names = {'operands': '>='} - - -class LessThenPredicate(BinaryPredicate): - """Represents the "less than" comparison predicate.""" - _serialized_names = {'operands': '<'} - - -class LessThenOrEqualPredicate(BinaryPredicate): - """Represents the "less than or equal" comparison predicate.""" - _serialized_names = {'operands': '<='} - - -PredicateType = Union[ArgumentType, EqualsPredicate, NotEqualsPredicate, - GreaterThanPredicate, GreaterThanOrEqualPredicate, - LessThenPredicate, LessThenOrEqualPredicate, - 'NotPredicate', 'AndPredicate', 'OrPredicate',] - - -class TwoBooleanOperands(ModelBase): - - def __init__( - self, - op1: PredicateType, - op2: PredicateType, - ): - super().__init__(locals()) - - -class NotPredicate(ModelBase): - """Represents the "not" logical operation.""" - _serialized_names = {'operand': 'not'} - - def __init__(self, operand: PredicateType): - super().__init__(locals()) - - -class AndPredicate(ModelBase): - """Represents the "and" logical operation.""" - _serialized_names = {'operands': 'and'} - - def __init__(self, operands: TwoBooleanOperands): - super().__init__(locals()) - - -class OrPredicate(ModelBase): - """Represents the "or" logical operation.""" - _serialized_names = {'operands': 'or'} - - def __init__(self, operands: TwoBooleanOperands): - super().__init__(locals()) - - -class RetryStrategySpec(ModelBase): - _serialized_names = { - 'max_retries': 'maxRetries', - } - - def __init__( - self, - max_retries: int, - ): - super().__init__(locals()) - - -class CachingStrategySpec(ModelBase): - _serialized_names = { - 'max_cache_staleness': 'maxCacheStaleness', - } - - def __init__( - self, - max_cache_staleness: Optional[ - str] = None, # RFC3339 compliant duration: P30DT1H22M3S - ): - super().__init__(locals()) - - -class ExecutionOptionsSpec(ModelBase): - _serialized_names = { - 'retry_strategy': 'retryStrategy', - 'caching_strategy': 'cachingStrategy', - } - - def __init__( - self, - retry_strategy: Optional[RetryStrategySpec] = None, - caching_strategy: Optional[CachingStrategySpec] = None, - ): - super().__init__(locals()) - - -class TaskSpec(ModelBase): - """Task specification. - - Task is a "configured" component - a component supplied with arguments and other applied configuration changes. - """ - _serialized_names = { - 'component_ref': 'componentRef', - 'is_enabled': 'isEnabled', - 'execution_options': 'executionOptions' - } - - def __init__( - self, - component_ref: ComponentReference, - arguments: Optional[Mapping[str, ArgumentType]] = None, - is_enabled: Optional[PredicateType] = None, - execution_options: Optional[ExecutionOptionsSpec] = None, - annotations: Optional[Dict[str, Any]] = None, - ): - super().__init__(locals()) - #TODO: If component_ref is resolved to component spec, then check that the arguments correspond to the inputs - - def _init_outputs(self): - #Adding output references to the task - if self.component_ref.spec is None: - return - task_outputs = OrderedDict() - for output in self.component_ref.spec.outputs or []: - task_output_ref = TaskOutputReference( - output_name=output.name, - task=self, - type=output. - type, # TODO: Resolve type expressions. E.g. type: {TypeOf: Input 1} - ) - task_output_arg = TaskOutputArgument(task_output=task_output_ref) - task_outputs[output.name] = task_output_arg - - self.outputs = task_outputs - if len(task_outputs) == 1: - self.output = list(task_outputs.values())[0] - - -class GraphSpec(ModelBase): - """Describes the graph component implementation. - - It represents a graph of component tasks connected to the upstream - sources of data using the argument specifications. It also describes - the sources of graph output values. - """ - _serialized_names = { - 'output_values': 'outputValues', - } - - def __init__( - self, - tasks: Mapping[str, TaskSpec], - output_values: Mapping[str, ArgumentType] = None, - ): - super().__init__(locals()) - self._post_init() - - def _post_init(self): - #Checking task output references and preparing the dependency table - task_dependencies = {} - for task_id, task in self.tasks.items(): - dependencies = set() - task_dependencies[task_id] = dependencies - if task.arguments is not None: - for argument in task.arguments.values(): - if isinstance(argument, TaskOutputArgument): - dependencies.add(argument.task_output.task_id) - if argument.task_output.task_id not in self.tasks: - raise TypeError( - f'Argument "{argument}" references non-existing task.' - ) - - #Topologically sorting tasks to detect cycles - task_dependents = {k: set() for k in task_dependencies.keys()} - for task_id, dependencies in task_dependencies.items(): - for dependency in dependencies: - task_dependents[dependency].add(task_id) - task_number_of_remaining_dependencies = { - k: len(v) for k, v in task_dependencies.items() - } - sorted_tasks = OrderedDict() - - def process_task(task_id): - if task_number_of_remaining_dependencies[ - task_id] == 0 and task_id not in sorted_tasks: - sorted_tasks[task_id] = self.tasks[task_id] - for dependent_task in task_dependents[task_id]: - task_number_of_remaining_dependencies[ - dependent_task] = task_number_of_remaining_dependencies[ - dependent_task] - 1 - process_task(dependent_task) - - for task_id in task_dependencies.keys(): - process_task(task_id) - if len(sorted_tasks) != len(task_dependencies): - tasks_with_unsatisfied_dependencies = { - k: v - for k, v in task_number_of_remaining_dependencies.items() - if v > 0 - } - task_wth_minimal_number_of_unsatisfied_dependencies = min( - tasks_with_unsatisfied_dependencies.keys(), - key=lambda task_id: tasks_with_unsatisfied_dependencies[task_id] - ) - raise ValueError( - f'Task "{task_wth_minimal_number_of_unsatisfied_dependencies}" has cyclical dependency.' - ) - - self._toposorted_tasks = sorted_tasks - - -class GraphImplementation(ModelBase): - """Represents the graph component implementation.""" - - def __init__( - self, - graph: GraphSpec, - ): - super().__init__(locals()) - - -class PipelineRunSpec(ModelBase): - """The object that can be sent to the backend to start a new Run.""" - _serialized_names = { - 'root_task': 'rootTask', - #'on_exit_task': 'onExitTask', - } - - def __init__( - self, - root_task: TaskSpec, - #on_exit_task: Optional[TaskSpec] = None, - ): - super().__init__(locals()) diff --git a/sdk/python/kfp-dsl/kfp/dsl/yaml_component.py b/sdk/python/kfp-dsl/kfp/dsl/yaml_component.py deleted file mode 100644 index 807ca4e0ce..0000000000 --- a/sdk/python/kfp-dsl/kfp/dsl/yaml_component.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2021-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Component loaded from YAML.""" - -from kfp import dsl -from kfp.dsl import base_component -from kfp.dsl import structures - - -class YamlComponent(base_component.BaseComponent): - """A component loaded from a YAML file. - - **Note:** ``YamlComponent`` is not intended to be used to construct components directly. Use ``kfp.components.load_component_from_*()`` instead. - - Attribute: - component_spec: Component definition. - component_yaml: The yaml string that this component is loaded from. - """ - - def __init__( - self, - component_spec: structures.ComponentSpec, - component_yaml: str, - ): - super().__init__(component_spec=component_spec) - self.component_yaml = component_yaml - - @property - def pipeline_spec(self) -> 'pipeline_spec_pb2.PipelineSpec': - """Returns the pipeline spec of the component.""" - try: - from google.protobuf import json_format - from kfp.components import load_yaml_utilities - from kfp.pipeline_spec import pipeline_spec_pb2 - except ImportError as e: - raise ImportError(dsl._kfp_dsl_import_error_msg) from e - component_dict = load_yaml_utilities._load_documents_from_yaml( - self.component_yaml)[0] - is_v1 = 'implementation' in set(component_dict.keys()) - if is_v1: - return self.component_spec.to_pipeline_spec() - else: - - return json_format.ParseDict(component_dict, - pipeline_spec_pb2.PipelineSpec()) - - def execute(self, *args, **kwargs): - """Not implemented.""" - raise NotImplementedError diff --git a/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py b/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py deleted file mode 100644 index 084a1f204f..0000000000 --- a/sdk/python/kfp-dsl/runtime_tests/execute_commands_args_test.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2023 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import dataclasses -import json -import os -import re -import shutil -import subprocess -import tempfile -from typing import Any, Dict - -from absl.testing import parameterized -import yaml - -TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data') - - -@dataclasses.dataclass -class RuntimeTestConfig: - pipeline_file_relpath: str - executor_name: str - executor_input: Dict[str, Any] - - -TEST_CONFIGS = [ - RuntimeTestConfig( - pipeline_file_relpath=os.path.join( - TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), - executor_name='exec-print-op', - executor_input={ - 'inputs': { - 'parameterValues': { - 'message': 'Hello World!' - }, - 'parameters': { - 'message': { - 'stringValue': 'Hello World!' - } - } - }, - 'outputs': { - 'outputFile': - '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-18-50-32/print-op_-9063136771365142528/executor_output.json' - } - }, - ), - RuntimeTestConfig( - pipeline_file_relpath=os.path.join( - TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), - executor_name='exec-exit-op', - executor_input={ - 'inputs': { - 'parameterValues': { - 'status': { - 'error': { - 'code': - 9, - 'message': - 'The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].' - }, - 'pipelineJobResourceName': - 'projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11', - 'pipelineTaskName': - 'my-pipeline', - 'state': - 'FAILED' - }, - 'user_input': 'Hello World!' - }, - 'parameters': { - 'status': { - 'stringValue': - "{\"error\":{\"code\":9,\"message\":\"The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].\"},\"pipelineJobResourceName\":\"projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11\",\"pipelineTaskName\":\"my-pipeline\",\"state\":\"FAILED\"}" - }, - 'user_input': { - 'stringValue': 'Hello World!' - } - } - }, - 'outputs': { - 'outputFile': - '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-19-07-11/exit-op_-6100894116462198784/executor_output.json' - } - }, - ) -] - - -def run_commands_and_args( - config: RuntimeTestConfig, - temp_dir: str, -) -> subprocess.CompletedProcess: - with open(config.pipeline_file_relpath) as f: - pipline_spec_dict = yaml.safe_load(f) - container = pipline_spec_dict['deploymentSpec']['executors'][ - config.executor_name]['container'] - - command_and_args = container['command'] + container['args'] - command_and_args = [ - re.sub(r"'(kfp(-dsl)?)==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", - 'kfp-dsl', cmd) for cmd in command_and_args - ] - - executor_input_json = json.dumps(config.executor_input).replace( - '/gcs/', temp_dir) - command_and_args = [ - v.replace('{{$}}', executor_input_json) for v in command_and_args - ] - - return subprocess.run( - command_and_args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) - - -class TestRuntime(parameterized.TestCase): - - @classmethod - def setUp(cls): - cls.temp_dir = tempfile.mkdtemp() - - @classmethod - def tearDown(cls): - shutil.rmtree(cls.temp_dir) - - @parameterized.parameters(TEST_CONFIGS) - def test(self, config: RuntimeTestConfig): - process = run_commands_and_args( - config=config, - temp_dir=self.temp_dir, - ) - self.assertEqual(process.returncode, 0, process.stderr) diff --git a/sdk/python/kfp-dsl/runtime_tests/executor_test.py b/sdk/python/kfp-dsl/runtime_tests/executor_test.py deleted file mode 100644 index 4cc5969344..0000000000 --- a/sdk/python/kfp-dsl/runtime_tests/executor_test.py +++ /dev/null @@ -1,1333 +0,0 @@ -# Copyright 2021 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests for kfp.dsl.executor.""" - -import json -import os -import tempfile -from typing import Callable, Dict, List, NamedTuple, Optional -import unittest -from unittest import mock - -from absl.testing import parameterized -from kfp import dsl -from kfp.dsl import executor -from kfp.dsl import Input -from kfp.dsl import Output -from kfp.dsl.task_final_status import PipelineTaskFinalStatus -from kfp.dsl.types import artifact_types -from kfp.dsl.types.artifact_types import Artifact -from kfp.dsl.types.artifact_types import Dataset -from kfp.dsl.types.artifact_types import Metrics -from kfp.dsl.types.artifact_types import Model -from kfp.dsl.types.type_annotations import InputPath -from kfp.dsl.types.type_annotations import OutputPath - - -class ExecutorTest(parameterized.TestCase): - - @classmethod - def setUp(cls): - cls.maxDiff = None - cls._test_dir = tempfile.mkdtemp() - artifact_types._GCS_LOCAL_MOUNT_PREFIX = cls._test_dir + '/' - artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls._test_dir + '/minio/' - artifact_types._S3_LOCAL_MOUNT_PREFIX = cls._test_dir + '/s3/' - - def execute(self, func: Callable, executor_input: str) -> None: - executor_input_dict = json.loads(executor_input % - {'test_dir': self._test_dir}) - - executor.Executor( - executor_input=executor_input_dict, - function_to_execute=func).execute() - - def execute_and_load_output_metadata(self, func: Callable, - executor_input: str) -> dict: - self.execute(func, executor_input) - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: - return json.loads(f.read()) - - def test_input_and_output_parameters(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "input_parameter": "Hello, KFP" - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_parameter: str) -> str: - self.assertEqual(input_parameter, 'Hello, KFP') - return input_parameter - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - self.assertEqual({'parameterValues': { - 'Output': 'Hello, KFP' - }}, output_metadata) - - def test_input_artifact_custom_type(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "input_artifact_one": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "google.VertexDataset" - }, - "uri": "gs://some-bucket/input_artifact_one" - } - ] - } - } - }, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - class VertexDataset(dsl.Artifact): - schema_title = 'google.VertexDataset' - schema_version = '0.0.0' - - @property - def path(self) -> str: - return self.uri.replace('gs://', - artifact_types._GCS_LOCAL_MOUNT_PREFIX) - - def test_func(input_artifact_one: Input[VertexDataset]): - self.assertEqual(input_artifact_one.uri, - 'gs://some-bucket/input_artifact_one') - self.assertEqual( - input_artifact_one.path, - os.path.join(artifact_types._GCS_LOCAL_MOUNT_PREFIX, - 'some-bucket/input_artifact_one')) - self.assertEqual( - input_artifact_one.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' - ) - self.assertIsInstance(input_artifact_one, VertexDataset) - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_input_artifact(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "input_artifact_one": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "google.VertexDataset" - }, - "uri": "gs://some-bucket/input_artifact_one" - } - ] - } - } - }, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_artifact_one: Input[Dataset]): - self.assertEqual(input_artifact_one.uri, - 'gs://some-bucket/input_artifact_one') - self.assertEqual( - input_artifact_one.path, - os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - self.assertEqual( - input_artifact_one.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' - ) - self.assertIsInstance(input_artifact_one, Dataset) - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_output_parameter(self): - executor_input = """\ - { - "outputs": { - "parameters": { - "output_parameter_path": { - "outputFile": "%(test_dir)s/gcs/some-bucket/some_task/nested/output_parameter" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(output_parameter_path: OutputPath(str)): - # Test that output parameters just use the passed in filename. - self.assertEqual( - output_parameter_path, self._test_dir + - '/gcs/some-bucket/some_task/nested/output_parameter') - with open(output_parameter_path, 'w') as f: - f.write('Hello, World!') - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_input_path_artifact(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "input_artifact_one_path": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Dataset" - }, - "uri": "gs://some-bucket/input_artifact_one" - } - ] - } - } - }, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_artifact_one_path: InputPath('Dataset')): - self.assertEqual( - input_artifact_one_path, - os.path.join(self._test_dir, 'some-bucket/input_artifact_one')) - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_output_path_artifact(self): - executor_input = """\ - { - "outputs": { - "artifacts": { - "output_artifact_one_path": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Model" - }, - "uri": "gs://some-bucket/output_artifact_one" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(output_artifact_one_path: OutputPath('Model')): - self.assertEqual( - output_artifact_one_path, - os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_output_metadata(self): - executor_input = """\ - { - "outputs": { - "artifacts": { - "output_artifact_two": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Metrics" - }, - "uri": "gs://some-bucket/output_artifact_two" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(output_artifact_two: Output[Metrics]): - output_artifact_two.metadata['key_1'] = 'value_1' - output_artifact_two.metadata['key_2'] = 2 - output_artifact_two.uri = 'new-uri' - - # log_metric works here since the schema is specified as Metrics. - output_artifact_two.log_metric('metric', 0.9) - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'artifacts': { - 'output_artifact_two': { - 'artifacts': [{ - 'name': - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', - 'uri': - 'new-uri', - 'metadata': { - 'key_1': 'value_1', - 'key_2': 2, - 'metric': 0.9 - } - }] - } - } - }) - - def test_function_string_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first_message": "Hello", - "second_message": ", ", - "third_message": "World" - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func( - first_message: str, - second_message: str, - third_message: str, - ) -> str: - return first_message + second_message + third_message - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': 'Hello, World' - }, - }) - - def test_function_with_int_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": 40, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: int, second: int) -> int: - return first + second - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': 42 - }, - }) - - @parameterized.parameters( - { - 'executor_input': - """\ - { - "inputs": { - "parameterValues": { - "first": 0.0, - "second": 1.2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """, - 'expected_output_metadata': { - 'parameterValues': { - 'Output': 1.2 - }, - }, - }, - { - 'executor_input': - """\ - { - "inputs": { - "parameterValues": { - "first": 1, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """, - 'expected_output_metadata': { - 'parameterValues': { - 'Output': 3 - }, - }, - }, - ) - def test_function_with_float_output(self, executor_input, - expected_output_metadata): - - def test_func(first: float, second: float) -> float: - return first + second - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, expected_output_metadata) - - def test_function_with_list_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": 40, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: int, second: int) -> List: - return [first, second] - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': [40, 2] - }, - }) - - def test_function_with_dict_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": 40, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: int, second: int) -> Dict: - return {'first': first, 'second': second} - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': { - 'first': 40, - 'second': 2 - } - }, - }) - - def test_function_with_typed_list_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": 40, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: int, second: int) -> List[int]: - return [first, second] - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': [40, 2] - }, - }) - - def test_function_with_typed_dict_output(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": 40, - "second": 2 - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: int, second: int) -> Dict[str, int]: - return {'first': first, 'second': second} - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, { - 'parameterValues': { - 'Output': { - 'first': 40, - 'second': 2 - } - }, - }) - - def test_artifact_output1(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": "Hello", - "second": "World" - } - }, - "outputs": { - "artifacts": { - "output": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Artifact" - }, - "uri": "gs://some-bucket/output" - } - ] - } - }, - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: str, second: str, output: Output[Artifact]) -> str: - with open(output.path, 'w') as f: - f.write('artifact output') - return first + ', ' + second - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'artifacts': { - 'output': { - 'artifacts': [{ - 'metadata': {}, - 'name': - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', - 'uri': - 'gs://some-bucket/output' - }] - } - }, - 'parameterValues': { - 'Output': 'Hello, World' - } - }) - - with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: - artifact_payload = f.read() - self.assertEqual(artifact_payload, 'artifact output') - - def test_artifact_output2(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first": "Hello", - "second": "World" - } - }, - "outputs": { - "artifacts": { - "Output": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Artifact" - }, - "uri": "gs://some-bucket/output" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(first: str, second: str) -> Artifact: - return first + ', ' + second - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'artifacts': { - 'Output': { - 'artifacts': [{ - 'metadata': {}, - 'name': - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', - 'uri': - 'gs://some-bucket/output' - }] - } - }, - }) - - with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f: - artifact_payload = f.read() - self.assertEqual(artifact_payload, 'Hello, World') - - def test_output_artifact3(self): - executor_input = """\ - { - "outputs": { - "artifacts": { - "output_artifact_one": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Model" - }, - "uri": "gs://some-bucket/output_artifact_one" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(output_artifact_one: Output[Model]): - self.assertEqual(output_artifact_one.uri, - 'gs://some-bucket/output_artifact_one') - - self.assertEqual( - output_artifact_one.path, - os.path.join(self._test_dir, 'some-bucket/output_artifact_one')) - self.assertEqual( - output_artifact_one.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123' - ) - self.assertIsInstance(output_artifact_one, Model) - - self.execute_and_load_output_metadata(test_func, executor_input) - - def test_named_tuple_output(self): - executor_input = """\ - { - "outputs": { - "artifacts": { - "output_dataset": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", - "type": { - "schemaTitle": "system.Dataset" - }, - "uri": "gs://some-bucket/output_dataset" - } - ] - } - }, - "parameters": { - "output_int": { - "outputFile": "gs://some-bucket/output_int" - }, - "output_string": { - "outputFile": "gs://some-bucket/output_string" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - # Functions returning named tuples should work. - def func_returning_named_tuple() -> NamedTuple('Outputs', [ - ('output_dataset', Dataset), - ('output_int', int), - ('output_string', str), - ]): - from collections import namedtuple - output = namedtuple( - 'Outputs', ['output_dataset', 'output_int', 'output_string']) - return output('Dataset contents', 101, 'Some output string') - - # Functions returning plain tuples should work too. - def func_returning_plain_tuple() -> NamedTuple('Outputs', [ - ('output_dataset', Dataset), - ('output_int', int), - ('output_string', str), - ]): - return ('Dataset contents', 101, 'Some output string') - - for test_func in [ - func_returning_named_tuple, func_returning_plain_tuple - ]: - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'artifacts': { - 'output_dataset': { - 'artifacts': [{ - 'metadata': {}, - 'name': - 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', - 'uri': - 'gs://some-bucket/output_dataset' - }] - } - }, - 'parameterValues': { - 'output_int': 101, - 'output_string': 'Some output string' - }, - }) - - with open( - os.path.join(self._test_dir, 'some-bucket/output_dataset'), - 'r') as f: - artifact_payload = f.read() - self.assertEqual(artifact_payload, 'Dataset contents') - - def test_function_with_optional_inputs(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "first_message": "Hello", - "second_message": "World" - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func( - first_message: str = 'default value', - second_message: Optional[str] = None, - third_message: Optional[str] = None, - fourth_argument: str = 'abc', - fifth_argument: int = 100, - sixth_argument: float = 1.23, - seventh_argument: bool = True, - eighth_argument: list = [1, 2], - ninth_argument: dict = {'a': 1}, - ) -> str: - return (f'{first_message} ({type(first_message)}), ' - f'{second_message} ({type(second_message)}), ' - f'{third_message} ({type(third_message)}), ' - f'{fourth_argument} ({type(fourth_argument)}), ' - f'{fifth_argument} ({type(fifth_argument)}), ' - f'{sixth_argument} ({type(sixth_argument)}), ' - f'{seventh_argument} ({type(seventh_argument)}), ' - f'{eighth_argument} ({type(eighth_argument)}), ' - f'{ninth_argument} ({type(ninth_argument)}).') - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'parameterValues': { - 'Output': "Hello (), " - "World (), " - "None (), " - "abc (), " - "100 (), " - "1.23 (), " - "True (), " - "[1, 2] (), " - "{'a': 1} ()." - }, - }) - - def test_function_with_optional_input_artifact(self): - executor_input = """\ - { - "inputs": {}, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(a: Optional[Input[Artifact]] = None): - self.assertIsNone(a) - - self.execute(test_func, executor_input) - - def test_function_with_pipeline_task_final_status(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "status": {"error":{"code":9,"message":"The DAG failed because some tasks failed. The failed tasks are: [fail-op]."},"pipelineJobResourceName":"projects/123/locations/us-central1/pipelineJobs/pipeline-456", "pipelineTaskName": "upstream-task", "state":"FAILED"} - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(status: PipelineTaskFinalStatus) -> str: - return (f'Pipeline status: {status.state}\n' - f'Job resource name: {status.pipeline_job_resource_name}\n' - f'Pipeline task name: {status.pipeline_task_name}\n' - f'Error code: {status.error_code}\n' - f'Error message: {status.error_message}') - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual( - output_metadata, { - 'parameterValues': { - 'Output': - 'Pipeline status: FAILED\n' - 'Job resource name: projects/123/locations/us-central1/pipelineJobs/pipeline-456\n' - 'Pipeline task name: upstream-task\n' - 'Error code: 9\n' - 'Error message: The DAG failed because some tasks failed. The failed tasks are: [fail-op].' - }, - }) - - def test_component_with_input_path(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "dataset_one_path": { - "artifacts": [ - { - "name": "84085", - "type": { - "instanceSchema": "" - }, - "uri": "gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/preprocess/output_dataset_one", - "metadata": { - "display_name": "output_dataset_one" - } - } - ] - } - }, - "parameterValues": { - "input_bool": true, - "input_dict": { - "A": 1, - "B": 2 - }, - "input_list": [ - "a", - "b", - "c" - ], - "message": "here is my message", - "num_steps": 100 - } - }, - "outputs": { - "artifacts": { - "model": { - "artifacts": [ - { - "type": { - "schemaTitle": "system.Model", - "schemaVersion": "0.0.1" - }, - "uri": "gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/train/model" - } - ] - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - path = os.path.join( - self._test_dir, - 'mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/preprocess/output_dataset_one' - ) - os.makedirs(os.path.dirname(path)) - with open(path, 'w+') as f: - f.write('data!') - - def test_func( - # Use InputPath to get a locally accessible path for the input artifact - # of type `Dataset`. - dataset_one_path: InputPath('Dataset'), - # An input parameter of type string. - message: str, - # Use Output[T] to get a metadata-rich handle to the output artifact - # of type `Dataset`. - model: Output[Model], - # An input parameter of type bool. - input_bool: bool, - # An input parameter of type dict. - input_dict: Dict[str, int], - # An input parameter of type List[str]. - input_list: List[str], - # An input parameter of type int with a default value. - num_steps: int = 100, - ): - """Dummy Training step.""" - with open(dataset_one_path) as input_file: - dataset_one_contents = input_file.read() - - line = (f'dataset_one_contents: {dataset_one_contents} || ' - f'message: {message} || ' - f'input_bool: {input_bool}, type {type(input_bool)} || ' - f'input_dict: {input_dict}, type {type(input_dict)} || ' - f'input_list: {input_list}, type {type(input_list)} \n') - - with open(model.path, 'w') as output_file: - for i in range(num_steps): - output_file.write(f'Step {i}\n{line}\n=====\n') - - # model is an instance of Model artifact, which has a .metadata dictionary - # to store arbitrary metadata for the output artifact. - model.metadata['accuracy'] = 0.9 - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - self.assertEqual( - output_metadata, { - 'artifacts': { - 'model': { - 'artifacts': [{ - 'name': - '', - 'uri': - 'gs://mlpipeline/v2/artifacts/my-test-pipeline-beta/b2b0cdee-b15c-48ff-b8bc-a394ae46c854/train/model', - 'metadata': { - 'accuracy': 0.9 - } - }] - } - } - }) - - @mock.patch.dict( - os.environ, - {'CLUSTER_SPEC': json.dumps({'task': { - 'type': 'workerpool0' - }})}, - clear=True) - def test_distributed_training_strategy_write(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "input_parameter": "Hello, KFP" - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_parameter: str): - self.assertEqual(input_parameter, 'Hello, KFP') - - self.execute( - func=test_func, - executor_input=executor_input, - ) - self.assertTrue( - os.path.exists( - os.path.join(self._test_dir, 'output_metadata.json'))) - - @mock.patch.dict( - os.environ, - {'CLUSTER_SPEC': json.dumps({'task': { - 'type': 'workerpool1' - }})}, - clear=True) - def test_distributed_training_strategy_no_write(self): - executor_input = """\ - { - "inputs": { - "parameterValues": { - "input_parameter": "Hello, KFP" - } - }, - "outputs": { - "parameters": { - "Output": { - "outputFile": "gs://some-bucket/output" - } - }, - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_parameter: str): - self.assertEqual(input_parameter, 'Hello, KFP') - - self.execute( - func=test_func, - executor_input=executor_input, - ) - self.assertFalse( - os.path.exists( - os.path.join(self._test_dir, 'output_metadata.json'))) - - def test_single_artifact_input(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "input_artifact": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact", - "type": { - "schemaTitle": "system.Artifact" - }, - "uri": "gs://some-bucket/output/input_artifact" - } - ] - } - } - }, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_artifact: Input[Artifact]): - self.assertIsInstance(input_artifact, Artifact) - self.assertEqual( - input_artifact.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' - ) - self.assertEqual( - input_artifact.name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' - ) - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, {}) - - def test_list_of_artifacts_input(self): - executor_input = """\ - { - "inputs": { - "artifacts": { - "input_list": { - "artifacts": [ - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0", - "type": { - "schemaTitle": "system.Artifact" - }, - "uri": "gs://some-bucket/output/input_list/0" - }, - { - "metadata": {}, - "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1", - "type": { - "schemaTitle": "system.Artifact" - }, - "uri": "gs://some-bucket/output/input_list/1" - } - ] - } - } - }, - "outputs": { - "outputFile": "%(test_dir)s/output_metadata.json" - } - } - """ - - def test_func(input_list: Input[List[Artifact]]): - self.assertEqual(len(input_list), 2) - self.assertEqual( - input_list[0].name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0' - ) - self.assertEqual( - input_list[1].name, - 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1' - ) - - output_metadata = self.execute_and_load_output_metadata( - test_func, executor_input) - - self.assertDictEqual(output_metadata, {}) - - -class TestDictToArtifact(parameterized.TestCase): - - @parameterized.parameters( - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Artifact' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.Artifact, - 'expected_type': artifact_types.Artifact, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Model' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.Model, - 'expected_type': artifact_types.Model, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Dataset' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.Dataset, - 'expected_type': artifact_types.Dataset, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Metrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.Metrics, - 'expected_type': artifact_types.Metrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.ClassificationMetrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.ClassificationMetrics, - 'expected_type': artifact_types.ClassificationMetrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.SlicedClassificationMetrics' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': artifact_types.SlicedClassificationMetrics, - 'expected_type': artifact_types.SlicedClassificationMetrics, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.HTML' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': None, - 'expected_type': artifact_types.HTML, - }, - { - 'runtime_artifact': { - 'metadata': {}, - 'name': 'input_artifact_one', - 'type': { - 'schemaTitle': 'system.Markdown' - }, - 'uri': 'gs://some-bucket/input_artifact_one' - }, - 'artifact_cls': None, - 'expected_type': artifact_types.Markdown, - }, - ) - def test_dict_to_artifact_kfp_artifact( - self, - runtime_artifact, - artifact_cls, - expected_type, - ): - # with artifact_cls - self.assertIsInstance( - executor.create_artifact_instance( - runtime_artifact, artifact_cls=artifact_cls), expected_type) - - # without artifact_cls - self.assertIsInstance( - executor.create_artifact_instance(runtime_artifact), expected_type) - - -if __name__ == '__main__': - unittest.main() diff --git a/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py b/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py deleted file mode 100644 index a1d432cd31..0000000000 --- a/sdk/python/kfp-dsl/runtime_tests/import_objects_test.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2023 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -class TestImportObjects: - - def test(self): - # from kfp.dsl import * only allowed at module level, so emulate behavior - from kfp import dsl - for obj_name in dir(dsl): - if not obj_name.startswith('_'): - getattr(dsl, obj_name) diff --git a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py deleted file mode 100644 index 27d418a333..0000000000 --- a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Pipeline using ExitHandler with PipelineTaskFinalStatus.""" - -from kfp import compiler -from kfp import dsl -from kfp.dsl import component -from kfp.dsl import PipelineTaskFinalStatus - - -@component -def exit_op(user_input: str, status: PipelineTaskFinalStatus): - """Checks pipeline run status.""" - print('Pipeline status: ', status.state) - print('Job resource name: ', status.pipeline_job_resource_name) - print('Pipeline task name: ', status.pipeline_task_name) - print('Error code: ', status.error_code) - print('Error message: ', status.error_message) - - -@component -def print_op(message: str): - """Prints a message.""" - print(message) - - -@component -def fail_op(message: str): - """Fails.""" - import sys - print(message) - sys.exit(1) - - -@dsl.pipeline(name='pipeline-with-task-final-status') -def my_pipeline(message: str = 'Hello World!'): - exit_task = exit_op(user_input=message) - - with dsl.ExitHandler(exit_task, name='my-pipeline'): - print_op(message=message) - fail_op(message='Task failed.') - - -if __name__ == '__main__': - compiler.Compiler().compile( - pipeline_func=my_pipeline, - package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml b/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml deleted file mode 100644 index 86ad841a3d..0000000000 --- a/sdk/python/kfp-dsl/runtime_tests/test_data/pipeline_with_task_final_status.yaml +++ /dev/null @@ -1,183 +0,0 @@ -# PIPELINE DEFINITION -# Name: pipeline-with-task-final-status -# Inputs: -# message: str [Default: 'Hello World!'] -components: - comp-exit-handler-1: - dag: - tasks: - fail-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-fail-op - inputs: - parameters: - message: - runtimeValue: - constant: Task failed. - taskInfo: - name: fail-op - print-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-print-op - inputs: - parameters: - message: - componentInputParameter: pipelinechannel--message - taskInfo: - name: print-op - inputDefinitions: - parameters: - pipelinechannel--message: - parameterType: STRING - comp-exit-op: - executorLabel: exec-exit-op - inputDefinitions: - parameters: - status: - isOptional: true - parameterType: TASK_FINAL_STATUS - user_input: - parameterType: STRING - comp-fail-op: - executorLabel: exec-fail-op - inputDefinitions: - parameters: - message: - parameterType: STRING - comp-print-op: - executorLabel: exec-print-op - inputDefinitions: - parameters: - message: - parameterType: STRING -deploymentSpec: - executors: - exec-exit-op: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - exit_op - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef exit_op(user_input: str, status: PipelineTaskFinalStatus):\n\ - \ \"\"\"Checks pipeline run status.\"\"\"\n print('Pipeline status:\ - \ ', status.state)\n print('Job resource name: ', status.pipeline_job_resource_name)\n\ - \ print('Pipeline task name: ', status.pipeline_task_name)\n print('Error\ - \ code: ', status.error_code)\n print('Error message: ', status.error_message)\n\ - \n" - image: python:3.7 - exec-fail-op: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - fail_op - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n\ - \ print(message)\n sys.exit(1)\n\n" - image: python:3.7 - exec-print-op: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - print_op - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp-dsl==2.0.1'\ - \ && \"$0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n\ - \ print(message)\n\n" - image: python:3.7 -pipelineInfo: - name: pipeline-with-task-final-status -root: - dag: - tasks: - exit-handler-1: - componentRef: - name: comp-exit-handler-1 - inputs: - parameters: - pipelinechannel--message: - componentInputParameter: message - taskInfo: - name: my-pipeline - exit-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-exit-op - dependentTasks: - - exit-handler-1 - inputs: - parameters: - status: - taskFinalStatus: - producerTask: exit-handler-1 - user_input: - componentInputParameter: message - taskInfo: - name: exit-op - triggerPolicy: - strategy: ALL_UPSTREAM_TASKS_COMPLETED - inputDefinitions: - parameters: - message: - defaultValue: Hello World! - isOptional: true - parameterType: STRING -schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 diff --git a/sdk/python/kfp-dsl/setup.py b/sdk/python/kfp-dsl/setup.py deleted file mode 100644 index 5c2cdfaccc..0000000000 --- a/sdk/python/kfp-dsl/setup.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2023 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import setuptools - -setuptools.setup( - name='kfp-dsl', - version='2.1.2', - description='A KFP SDK subpackage containing the DSL and runtime code.', - author='google', - author_email='kubeflow-pipelines@google.com', - url='https://github.com/kubeflow/pipelines', - packages=setuptools.find_namespace_packages(include=['kfp.*']), - python_requires='>=3.7.0', - install_requires=['typing-extensions>=3.7.4,<5; python_version<"3.9"'], - include_package_data=True, - license='Apache 2.0', -) From fe60742b000763b0d589d3124b544091a0aa29fb Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:55:32 -0700 Subject: [PATCH 095/253] fix(backend): Move ConMaxLifeTime back to DbConfig.ConMaxLifeTime. (#9873) * fix(backend): Move ConMaxLifeTime back to DbConfig.ConMaxLifeTime. * remove comma --- backend/src/apiserver/config/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/apiserver/config/config.json b/backend/src/apiserver/config/config.json index 251d22a387..aa7088ce77 100644 --- a/backend/src/apiserver/config/config.json +++ b/backend/src/apiserver/config/config.json @@ -7,7 +7,8 @@ }, "PostgreSQLConfig": { "DBName": "mlpipeline" - } + }, + "ConMaxLifeTime": "120s" }, "ObjectStoreConfig": { "AccessKey": "minio", @@ -16,7 +17,6 @@ "PipelinePath": "pipelines" }, "DBDriverName": "mysql", - "ConMaxLifeTime": "120s", "ARCHIVE_CONFIG_LOG_FILE_NAME": "main.log", "ARCHIVE_CONFIG_LOG_PATH_PREFIX": "/artifacts", "InitConnectionTimeout": "6m", From 47f11475c5b19cc99a49962527723fff5f85aa5a Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 16 Aug 2023 11:01:21 -0700 Subject: [PATCH 096/253] feat(components): Metric importing for embedding evaluation PiperOrigin-RevId: 557540363 --- .../model_evaluation/import_evaluation/component.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py index eb8e991d16..2d4976d1ed 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/import_evaluation/component.py @@ -40,6 +40,7 @@ def model_evaluation_import( summarization_metrics: Optional[Input[Metrics]] = None, explanation: Optional[Input[Metrics]] = None, feature_attributions: Optional[Input[Metrics]] = None, + embedding_metrics: Optional[Input[Metrics]] = None, display_name: str = "", dataset_path: str = "", dataset_paths: List[str] = [], @@ -82,6 +83,8 @@ def model_evaluation_import( component. feature_attributions: The feature attributions metrics artifact generated from the feature attribution component. + embedding_metrics: The embedding metrics artifact generated from the + embedding retrieval metrics component. display_name: The display name for the uploaded model evaluation resource. """ # fmt: on @@ -159,6 +162,13 @@ def model_evaluation_import( feature_attributions.uri, ], ), + dsl.IfPresentPlaceholder( + input_name="embedding_metrics", + then=[ + "--embedding_metrics", + embedding_metrics.uri, + ], + ), dsl.IfPresentPlaceholder( input_name="problem_type", then=[ From 90cec167c0e49e115910928b00b5c5e50eaeed7c Mon Sep 17 00:00:00 2001 From: Junggil Lee Date: Thu, 17 Aug 2023 09:21:38 +0900 Subject: [PATCH 097/253] fix(samples): Update volume_ops sample to v2 pipelines (#9877) * Update volume_ops sample to v2 pipelines * Update samples/core/kubernetes_pvc/kubernetes_pvc.py minor change on copyright range Co-authored-by: Connor McCarthy --------- Co-authored-by: Connor McCarthy --- samples/core/kubernetes_pvc/README.md | 12 +++ samples/core/kubernetes_pvc/kubernetes_pvc.py | 62 +++++++++++++++ samples/core/volume_ops/README.md | 75 ------------------- samples/core/volume_ops/volume_ops.py | 39 ---------- 4 files changed, 74 insertions(+), 114 deletions(-) create mode 100644 samples/core/kubernetes_pvc/README.md create mode 100644 samples/core/kubernetes_pvc/kubernetes_pvc.py delete mode 100644 samples/core/volume_ops/README.md delete mode 100644 samples/core/volume_ops/volume_ops.py diff --git a/samples/core/kubernetes_pvc/README.md b/samples/core/kubernetes_pvc/README.md new file mode 100644 index 0000000000..8384b2b37f --- /dev/null +++ b/samples/core/kubernetes_pvc/README.md @@ -0,0 +1,12 @@ +## Read/write to a Kubernetes PVC using kfp-kubernetes + +This sample uses [kfp-kubernetes](https://pypi.org/project/kfp-kubernetes/) to +demonstrate typical usage of a plugin library. Specifically, we will use +`kfp-kubernetes` to create a [PersistentVolumeClaim +(PVC)](https://kubernetes.io/docs/concepts/storage/persistent-volumes/), use the +PVC to pass data between tasks, and delete the PVC after using it. + +See the [kfp-kubernetes documentation](https://kfp-kubernetes.readthedocs.io/) +and [Kubeflow Pipeline +documentation](https://www.kubeflow.org/docs/components/pipelines/v2/platform-specific-features/#example-readwrite-to-a-kubernetes-pvc-using-kfp-kubernetes) +for more information. diff --git a/samples/core/kubernetes_pvc/kubernetes_pvc.py b/samples/core/kubernetes_pvc/kubernetes_pvc.py new file mode 100644 index 0000000000..1b82772f65 --- /dev/null +++ b/samples/core/kubernetes_pvc/kubernetes_pvc.py @@ -0,0 +1,62 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from kfp import dsl, compiler +from kfp import kubernetes + +@dsl.component +def make_data(): + with open('/data/file.txt', 'w') as f: + f.write('my data') + +@dsl.component +def read_data(): + with open('/reused_data/file.txt') as f: + print(f.read()) + +@dsl.pipeline( + name="kubernetes-pvc-basic", + description="A Basic Example on Kubernetes PVC Usage." +) +def my_pipeline(): + pvc1 = kubernetes.CreatePVC( + # can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC + pvc_name_suffix='-my-pvc', + access_modes=['ReadWriteOnce'], + size='5Gi', + storage_class_name='standard', + ) + + task1 = make_data() + # normally task sequencing is handled by data exchange via component inputs/outputs + # but since data is exchanged via volume, we need to call .after explicitly to sequence tasks + task2 = read_data().after(task1) + + kubernetes.mount_pvc( + task1, + pvc_name=pvc1.outputs['name'], + mount_path='/data', + ) + kubernetes.mount_pvc( + task2, + pvc_name=pvc1.outputs['name'], + mount_path='/reused_data', + ) + + # wait to delete the PVC until after task2 completes + delete_pvc1 = kubernetes.DeletePVC( + pvc_name=pvc1.outputs['name']).after(task2) + +if __name__ == '__main__': + compiler.Compiler().compile(my_pipeline, __file__ + '.yaml') diff --git a/samples/core/volume_ops/README.md b/samples/core/volume_ops/README.md deleted file mode 100644 index 7e07379046..0000000000 --- a/samples/core/volume_ops/README.md +++ /dev/null @@ -1,75 +0,0 @@ -## Simplify the creation of `PersistentVolumeClaim` instances - -**`VolumeOp`:** A specified `ResourceOp` for PVC creation. - -### Arguments: -The following arguments are an extension to the `ResourceOp` arguments. -If a `k8s_resource` is passed, then none of the following may be provided. - -* `resource_name`: The name of the resource which will be created. - This string will be prepended with the workflow name. - This may contain `PipelineParam`s. - (_required_) -* `size`: The requested size for the PVC. - This may contain `PipelineParam`s. - (_required_) -* `storage_class`: The storage class to be used. - This may contain `PipelineParam`s. - (_optional_) -* `modes`: The `accessModes` of the PVC. - Check - [this documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes) - for further information. - The user may find the following modes built-in: - * `VOLUME_MODE_RWO`: `["ReadWriteOnce"]` - * `VOLUME_MODE_RWM`: `["ReadWriteMany"]` - * `VOLUME_MODE_ROM`: `["ReadOnlyMany"]` - - Defaults to `VOLUME_MODE_RWM`. -* `annotations`: Annotations to be patched in the PVC. - These may contain `PipelineParam`s. - (_optional_) -* `data_source`: It is used to create a PVC from a `VolumeSnapshot`. - Can be either a `V1TypedLocalObjectReference` or a `string`, and may contain `PipelineParam`s. - (_Alpha feature_, _optional_) - -### Outputs -Additionally to the whole specification of the resource and its name (`ResourceOp` defaults), a -`VolumeOp` also outputs the storage size of the bounded PV (as `step.outputs["size"]`). -However, this may be empty if the storage provisioner has a `WaitForFirstConsumer` binding mode. -This value, if not empty, is always ≥ the requested size. - -### Useful attributes -1. The `VolumeOp` step has a `.volume` attribute which is a `PipelineVolume` referencing the - created PVC. - A `PipelineVolume` is essentially a `V1Volume` supplemented with an `.after()` method extending - the carried dependencies. - These dependencies can then be parsed properly by a `ContainerOp`, if used with the `pvolumes` - attribute, to extend the `ContainerOp`'s dependencies. -2. A `ContainerOp` has a `pvolumes` argument in its constructor. - This is a dictionary with mount paths as keys and volumes as values and functions similarly to - `file_outputs` (which can then be used as `op.outputs["key"]` or `op.output`). - For example: - ```python - vop = dsl.VolumeOp( - name="volume_creation", - resource_name="mypvc", - size="1Gi" - ) - step1 = dsl.ContainerOp( - name="step1", - ... - pvolumes={"/mnt": vop.volume} # Implies execution after vop - ) - step2 = dsl.ContainerOp( - name="step2", - ... - pvolumes={"/data": step1.pvolume, # Implies execution after step1 - "/mnt": dsl.PipelineVolume(pvc="existing-pvc")} - ) - step3 = dsl.ContainerOp( - name="step3", - ... - pvolumes={"/common": step2.pvolumes["/mnt"]} # Implies execution after step2 - ) - ``` diff --git a/samples/core/volume_ops/volume_ops.py b/samples/core/volume_ops/volume_ops.py deleted file mode 100644 index 5ce57f12cb..0000000000 --- a/samples/core/volume_ops/volume_ops.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2019-2023 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from kfp import components, dsl, compiler - -@components.create_component_from_func -def write_to_volume(): - with open("/mnt/file.txt", "w") as file: - file.write("Hello world") - - -@dsl.pipeline( - name="volumeop-basic", - description="A Basic Example on VolumeOp Usage." -) -def volumeop_basic(size: str="1Gi"): - vop = dsl.VolumeOp( - name="create-pvc", - resource_name="my-pvc", - modes=dsl.VOLUME_MODE_RWO, - size=size - ) - - write_to_volume().add_pvolumes({"/mnt": vop.volume}) - - -if __name__ == '__main__': - compiler.Compiler().compile(volumeop_basic, __file__ + '.yaml') From cd7e316db6d90c447f1e6c967d8d632dbe0f6993 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 17 Aug 2023 02:33:38 -0700 Subject: [PATCH 098/253] test: upgrade e2e sample tests to using sdk v2 (#9885) * install kfp sdk 2.0 in sample test * remove reference to GCSHelper from deprecated module * exclude obsolete samples * update test script * update client usage * revert recent changes to exit_handler.py --- samples/core/exit_handler/exit_handler.py | 17 +- test/e2e_test_gke_v2.yaml | 3 - test/sample-test/Dockerfile | 2 +- test/sample-test/run_sample_test.py | 145 +++---- test/sample-test/sample_test_launcher.py | 494 ++++++++++++---------- 5 files changed, 329 insertions(+), 332 deletions(-) mode change 100644 => 100755 samples/core/exit_handler/exit_handler.py diff --git a/samples/core/exit_handler/exit_handler.py b/samples/core/exit_handler/exit_handler.py old mode 100644 new mode 100755 index a02122ab4e..39bab79d72 --- a/samples/core/exit_handler/exit_handler.py +++ b/samples/core/exit_handler/exit_handler.py @@ -1,4 +1,5 @@ -# Copyright 2021 The Kubeflow Authors +#!/usr/bin/env python3 +# Copyright 2019-2023 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,22 +15,19 @@ """Pipeline using ExitHandler.""" import os -from kfp import dsl + from kfp import compiler +from kfp import dsl from kfp.dsl import component -# In tests, we install a KFP package from the PR under test. Users should not -# normally need to specify `kfp_package_path` in their component definitions. -_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH') - -@component(kfp_package_path=_KFP_PACKAGE_PATH) +@component def print_op(message: str): """Prints a message.""" print(message) -@component(kfp_package_path=_KFP_PACKAGE_PATH) +@component def fail_op(message: str): """Fails.""" import sys @@ -49,5 +47,4 @@ def pipeline_exit_handler(message: str = 'Hello World!'): if __name__ == '__main__': compiler.Compiler().compile( - pipeline_func=pipeline_exit_handler, - package_path=__file__.replace('.py', '.yaml')) + pipeline_func=pipeline_exit_handler, package_path=__file__ + '.yaml') diff --git a/test/e2e_test_gke_v2.yaml b/test/e2e_test_gke_v2.yaml index ab6a41952f..cdc79cf5ac 100644 --- a/test/e2e_test_gke_v2.yaml +++ b/test/e2e_test_gke_v2.yaml @@ -145,10 +145,7 @@ spec: value: "{{item}}" withItems: - exit_handler - - recursion - sequential - - parallel_join - - volume_ops - name: upgrade-test-preparation inputs: diff --git a/test/sample-test/Dockerfile b/test/sample-test/Dockerfile index 9f661271ab..ffc3bbd0a8 100644 --- a/test/sample-test/Dockerfile +++ b/test/sample-test/Dockerfile @@ -16,7 +16,7 @@ RUN pip3 install -r /python/src/github.com/kubeflow/pipelines/test/sample-test/r # Install python client, including DSL compiler. # COPY ./sdk/python /sdk/python # RUN pip3 install /sdk/python -RUN pip3 install kfp~=1.8 +RUN pip3 install kfp~=2.0 # Copy sample test and samples source code. COPY ./test/sample-test /python/src/github.com/kubeflow/pipelines/test/sample-test diff --git a/test/sample-test/run_sample_test.py b/test/sample-test/run_sample_test.py index 2ec1765217..1a77d2fd05 100644 --- a/test/sample-test/run_sample_test.py +++ b/test/sample-test/run_sample_test.py @@ -12,40 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. -import kfp +from datetime import datetime import os import tarfile import time + +from constants import CONFIG_DIR +from constants import DEFAULT_CONFIG +from constants import SCHEMA_CONFIG +import kfp +from kfp import Client import utils import yamale import yaml -from datetime import datetime -from kfp import Client -from constants import CONFIG_DIR, DEFAULT_CONFIG, SCHEMA_CONFIG class PySampleChecker(object): - def __init__( - self, - testname, - input, - output, - result, - experiment_name, - host, - namespace='kubeflow' - ): + def __init__(self, + testname, + input, + output, + result, + experiment_name, + host, + namespace='kubeflow'): """Util class for checking python sample test running results. - :param testname: test name. - :param input: The path of a pipeline file that will be submitted. - :param output: The path of the test output. - :param result: The path of the test result that will be exported. - :param host: The hostname of KFP API endpoint. - :param namespace: namespace of the deployed pipeline system. Default: kubeflow - :param experiment_name: Name of the experiment to monitor - """ + :param testname: test name. + :param input: The path of a pipeline file that will be submitted. + :param output: The path of the test output. + :param result: The path of the test result that will be exported. + :param host: The hostname of KFP API endpoint. + :param namespace: namespace of the deployed pipeline system. Default: kubeflow + :param experiment_name: Name of the experiment to monitor + """ self._testname = testname self._experiment_name = experiment_name self._input = input @@ -72,20 +73,18 @@ def run(self): self._client = Client(host=self._host) ###### Check Input File ###### - utils.add_junit_test( - self._test_cases, 'input generated yaml file', - os.path.exists(self._input), 'yaml file is not generated' - ) + utils.add_junit_test(self._test_cases, 'input generated yaml file', + os.path.exists(self._input), + 'yaml file is not generated') if not os.path.exists(self._input): - utils.write_junit_xml( - self._test_name, self._result, self._test_cases - ) + utils.write_junit_xml(self._test_name, self._result, + self._test_cases) print('Error: job not found.') exit(1) ###### Create Experiment ###### response = self._client.create_experiment(self._experiment_name) - self._experiment_id = response.id + self._experiment_id = response.experiment_id utils.add_junit_test(self._test_cases, 'create experiment', True) ###### Create Job ###### @@ -98,8 +97,8 @@ def run(self): raw_args = yaml.safe_load(f) default_config = yamale.make_data(DEFAULT_CONFIG) yamale.validate( - config_schema, default_config - ) # If fails, a ValueError will be raised. + config_schema, + default_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: raise RuntimeError('Illegal default config:{}'.format(yamlerr)) except OSError as ose: @@ -109,25 +108,21 @@ def run(self): self._run_pipeline = raw_args['run_pipeline'] try: - config_file = os.path.join( - CONFIG_DIR, '%s.config.yaml' % self._testname - ) + config_file = os.path.join(CONFIG_DIR, + '%s.config.yaml' % self._testname) with open(config_file, 'r') as f: raw_args = yaml.safe_load(f) test_config = yamale.make_data(config_file) yamale.validate( - config_schema, test_config - ) # If fails, a ValueError will be raised. + config_schema, + test_config) # If fails, a ValueError will be raised. except yaml.YAMLError as yamlerr: - print( - 'No legit yaml config file found, use default args:{}'. - format(yamlerr) - ) + print('No legit yaml config file found, use default args:{}'.format( + yamlerr)) except OSError as ose: print( - 'Config file with the same name not found, use default args:{}'. - format(ose) - ) + 'Config file with the same name not found, use default args:{}' + .format(ose)) else: if 'arguments' in raw_args.keys() and raw_args['arguments']: self._test_args.update(raw_args['arguments']) @@ -143,17 +138,15 @@ def run(self): if self._testname == 'parameterized_tfx_oss': self._test_args['pipeline-root'] = os.path.join( self._test_args['output'], - 'tfx_taxi_simple_' + kfp.dsl.RUN_ID_PLACEHOLDER - ) + 'tfx_taxi_simple_' + kfp.dsl.RUN_ID_PLACEHOLDER) del self._test_args['output'] # Submit for pipeline running. if self._run_pipeline: - response = self._client.run_pipeline( - self._experiment_id, self._job_name, self._input, - self._test_args - ) - self._run_id = response.id + response = self._client.run_pipeline(self._experiment_id, + self._job_name, self._input, + self._test_args) + self._run_id = response.run_id utils.add_junit_test(self._test_cases, 'create pipeline run', True) def check(self): @@ -163,54 +156,22 @@ def check(self): try: start_time = datetime.now() response = self._client.wait_for_run_completion( - self._run_id, self._test_timeout - ) - succ = (response.run.status.lower() == 'succeeded') + self._run_id, self._test_timeout) + succ = (response.state.lower() == 'succeeded') end_time = datetime.now() elapsed_time = (end_time - start_time).seconds - utils.add_junit_test( - self._test_cases, 'job completion', succ, - 'waiting for job completion failure', elapsed_time - ) + utils.add_junit_test(self._test_cases, 'job completion', succ, + 'waiting for job completion failure', + elapsed_time) finally: - ###### Output Argo Log for Debugging ###### - workflow_json = self._client._get_workflow_json(self._run_id) - workflow_id = workflow_json['metadata']['name'] - print("Argo Workflow Name: ", workflow_id) - argo_log, _ = utils.run_bash_command( - 'argo logs {} -n {}'.format( - workflow_id, self._namespace - ) - ) - print('=========Argo Workflow Log=========') - print(argo_log) + # TODO(chensun): print log for debugging + pass if not succ: - utils.write_junit_xml( - self._test_name, self._result, self._test_cases - ) + utils.write_junit_xml(self._test_name, self._result, + self._test_cases) exit(1) - ###### Validate the results for specific test cases ###### - if self._testname == 'xgboost_training_cm': - # For xgboost sample, check its confusion matrix. - cm_tar_path = './confusion_matrix.tar.gz' - utils.get_artifact_in_minio( - workflow_json, 'confusion-matrix', cm_tar_path, - 'mlpipeline-ui-metadata' - ) - with tarfile.open(cm_tar_path) as tar_handle: - file_handles = tar_handle.getmembers() - assert len(file_handles) == 1 - - with tar_handle.extractfile(file_handles[0]) as f: - cm_data = f.read() - utils.add_junit_test( - self._test_cases, 'confusion matrix format', - (len(cm_data) > 0), - 'the confusion matrix file is empty' - ) - ###### Delete Job ###### #TODO: add deletion when the backend API offers the interface. diff --git a/test/sample-test/sample_test_launcher.py b/test/sample-test/sample_test_launcher.py index f9bd30a27f..7b02556f5c 100644 --- a/test/sample-test/sample_test_launcher.py +++ b/test/sample-test/sample_test_launcher.py @@ -11,250 +11,292 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -This launcher module serves as the entry-point of the sample test image. It -decides which test to trigger based upon the arguments provided. +"""This launcher module serves as the entry-point of the sample test image. + +It decides which test to trigger based upon the arguments provided. """ -import fire import os -import papermill as pm +import pathlib import re import subprocess -import utils -import yamale -import yaml -import kubernetes -from constants import PAPERMILL_ERR_MSG, BASE_DIR, TEST_DIR, SCHEMA_CONFIG, CONFIG_DIR, DEFAULT_CONFIG from check_notebook_results import NoteBookChecker -from kfp.containers._gcs_helper import GCSHelper +from constants import BASE_DIR +from constants import CONFIG_DIR +from constants import DEFAULT_CONFIG +from constants import PAPERMILL_ERR_MSG +from constants import SCHEMA_CONFIG +from constants import TEST_DIR +import fire +import kubernetes +import papermill as pm from run_sample_test import PySampleChecker +import utils +import yamale +import yaml class SampleTest(object): - def __init__(self, test_name, results_gcs_dir, host='', target_image_prefix='', - namespace='kubeflow'): - """Launch a KFP sample_test provided its name. - - :param test_name: name of the corresponding sample test. - :param results_gcs_dir: gs dir to store test result. - :param host: host of KFP API endpoint, default is auto-discovery from inverse-proxy-config. - :param target_image_prefix: prefix of docker image, default is empty. - :param namespace: namespace for kfp, default is kubeflow. - """ - self._test_name = test_name - self._results_gcs_dir = results_gcs_dir - # Capture the first segment after gs:// as the project name. - self._bucket_name = results_gcs_dir.split('/')[2] - self._target_image_prefix = target_image_prefix - self._namespace = namespace - self._host = host - if self._host == '': - try: - # Get inverse proxy hostname from a config map called 'inverse-proxy-config' - # in the same namespace as KFP. + def __init__(self, + test_name, + results_gcs_dir, + host='', + target_image_prefix='', + namespace='kubeflow'): + """Launch a KFP sample_test provided its name. + + :param test_name: name of the corresponding sample test. + :param results_gcs_dir: gs dir to store test result. + :param host: host of KFP API endpoint, default is auto-discovery from inverse-proxy-config. + :param target_image_prefix: prefix of docker image, default is empty. + :param namespace: namespace for kfp, default is kubeflow. + """ + self._test_name = test_name + self._results_gcs_dir = results_gcs_dir + # Capture the first segment after gs:// as the project name. + self._bucket_name = results_gcs_dir.split('/')[2] + self._target_image_prefix = target_image_prefix + self._namespace = namespace + self._host = host + if self._host == '': + try: + # Get inverse proxy hostname from a config map called 'inverse-proxy-config' + # in the same namespace as KFP. + try: + kubernetes.config.load_incluster_config() + except: + kubernetes.config.load_kube_config() + + v1 = kubernetes.client.CoreV1Api() + inverse_proxy_config = v1.read_namespaced_config_map( + name='inverse-proxy-config', namespace=self._namespace) + self._host = inverse_proxy_config.data.get('Hostname') + except Exception as err: + raise RuntimeError( + 'Failed to get inverse proxy hostname') from err + # Keep as comment here, we can also specify host in-cluster as the following, + # but we no longer use it in e2e tests, because we prefer including + # test coverage for inverse proxy. + # self._host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace + print('KFP API host is %s' % self._host) + + self._is_notebook = None + self._work_dir = os.path.join(BASE_DIR, 'samples/core/', + self._test_name) + + self._sample_test_result = 'junit_Sample%sOutput.xml' % self._test_name + self._sample_test_output = self._results_gcs_dir + + def _copy_result(self): + """Copy generated sample test result to gcs, so that Prow can pick + it.""" + + def _upload_gcs_file(local_path: str, gcs_path: str): + from google.cloud import storage + pure_path = pathlib.PurePath(gcs_path) + gcs_bucket = pure_path.parts[1] + gcs_blob = '/'.join(pure_path.parts[2:]) + client = storage.Client() + bucket = client.get_bucket(gcs_bucket) + blob = bucket.blob(gcs_blob) + blob.upload_from_filename(local_path) + + print('Copy the test results to GCS %s/' % self._results_gcs_dir) + + _upload_gcs_file( + self._sample_test_result, + os.path.join(self._results_gcs_dir, self._sample_test_result)) + + def _compile(self): + + os.chdir(self._work_dir) + print('Run the sample tests...') + + # Looking for the entry point of the test. + list_of_files = os.listdir('.') + for file in list_of_files: + m = re.match(self._test_name + '\.[a-zA-Z]+', file) + if m: + file_name, ext_name = os.path.splitext(file) + if self._is_notebook is not None: + raise (RuntimeError( + 'Multiple entry points found under sample: {}'.format( + self._test_name))) + if ext_name == '.py': + self._is_notebook = False + if ext_name == '.ipynb': + self._is_notebook = True + + if self._is_notebook is None: + raise (RuntimeError('No entry point found for sample: {}'.format( + self._test_name))) + + config_schema = yamale.make_schema(SCHEMA_CONFIG) + # Retrieve default config try: - kubernetes.config.load_incluster_config() - except: - kubernetes.config.load_kube_config() - - v1 = kubernetes.client.CoreV1Api() - inverse_proxy_config = v1.read_namespaced_config_map(name='inverse-proxy-config', namespace=self._namespace) - self._host = inverse_proxy_config.data.get('Hostname') - except Exception as err: - raise RuntimeError('Failed to get inverse proxy hostname') from err - # Keep as comment here, we can also specify host in-cluster as the following, - # but we no longer use it in e2e tests, because we prefer including - # test coverage for inverse proxy. - # self._host = 'ml-pipeline.%s.svc.cluster.local:8888' % self._namespace - print('KFP API host is %s' % self._host) - - self._is_notebook = None - self._work_dir = os.path.join(BASE_DIR, 'samples/core/', self._test_name) - - self._sample_test_result = 'junit_Sample%sOutput.xml' % self._test_name - self._sample_test_output = self._results_gcs_dir - - def _copy_result(self): - """ Copy generated sample test result to gcs, so that Prow can pick it. """ - print('Copy the test results to GCS %s/' % self._results_gcs_dir) - - GCSHelper.upload_gcs_file( - self._sample_test_result, - os.path.join(self._results_gcs_dir, self._sample_test_result)) - - def _compile(self): - - os.chdir(self._work_dir) - print('Run the sample tests...') - - # Looking for the entry point of the test. - list_of_files = os.listdir('.') - for file in list_of_files: - m = re.match(self._test_name + '\.[a-zA-Z]+', file) - if m: - file_name, ext_name = os.path.splitext(file) - if self._is_notebook is not None: - raise(RuntimeError('Multiple entry points found under sample: {}'.format(self._test_name))) - if ext_name == '.py': - self._is_notebook = False - if ext_name == '.ipynb': - self._is_notebook = True - - if self._is_notebook is None: - raise(RuntimeError('No entry point found for sample: {}'.format(self._test_name))) - - config_schema = yamale.make_schema(SCHEMA_CONFIG) - # Retrieve default config - try: - with open(DEFAULT_CONFIG, 'r') as f: - raw_args = yaml.safe_load(f) - default_config = yamale.make_data(DEFAULT_CONFIG) - yamale.validate(config_schema, default_config) # If fails, a ValueError will be raised. - except yaml.YAMLError as yamlerr: - raise RuntimeError('Illegal default config:{}'.format(yamlerr)) - except OSError as ose: - raise FileExistsError('Default config not found:{}'.format(ose)) - else: - self._run_pipeline = raw_args['run_pipeline'] - - # For presubmit check, do not do any image injection as for now. - # Notebook samples need to be papermilled first. - if self._is_notebook: - # Parse necessary params from config.yaml - nb_params = {} - try: - config_file = os.path.join(CONFIG_DIR, '%s.config.yaml' % self._test_name) - with open(config_file, 'r') as f: - raw_args = yaml.safe_load(f) - test_config = yamale.make_data(config_file) - yamale.validate(config_schema, test_config) # If fails, a ValueError will be raised. - except yaml.YAMLError as yamlerr: - print('No legit yaml config file found, use default args:{}'.format(yamlerr)) - except OSError as ose: - print('Config file with the same name not found, use default args:{}'.format(ose)) - else: - if 'notebook_params' in raw_args.keys(): - nb_params.update(raw_args['notebook_params']) - if 'output' in raw_args['notebook_params'].keys(): # output is a special param that has to be specified dynamically. - nb_params['output'] = self._sample_test_output - if 'run_pipeline' in raw_args.keys(): - self._run_pipeline = raw_args['run_pipeline'] - - pm.execute_notebook( - input_path='%s.ipynb' % self._test_name, - output_path='%s.ipynb' % self._test_name, - parameters=nb_params, - prepare_only=True - ) - # Convert to python script. - subprocess.call([ - 'jupyter', 'nbconvert', '--to', 'python', '%s.ipynb' % self._test_name - ]) - - else: - subprocess.call(['python3', '%s.py' % self._test_name]) - - def _injection(self): - """Inject images for pipeline components. - This is only valid for coimponent test - """ - pass - - def run_test(self): - self._compile() - self._injection() - - # Overriding the experiment name of pipeline runs - experiment_name = self._test_name + '-test' - os.environ['KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME'] = experiment_name - - if self._is_notebook: - nbchecker = NoteBookChecker(testname=self._test_name, - result=self._sample_test_result, - run_pipeline=self._run_pipeline, - experiment_name=experiment_name, - host=self._host, - ) - nbchecker.run() - os.chdir(TEST_DIR) - nbchecker.check() - else: - os.chdir(TEST_DIR) - input_file = os.path.join(self._work_dir, '%s.py.yaml' % self._test_name) - - pysample_checker = PySampleChecker(testname=self._test_name, - input=input_file, - output=self._sample_test_output, - result=self._sample_test_result, - host=self._host, - namespace=self._namespace, - experiment_name=experiment_name, - ) - pysample_checker.run() - pysample_checker.check() - - self._copy_result() + with open(DEFAULT_CONFIG, 'r') as f: + raw_args = yaml.safe_load(f) + default_config = yamale.make_data(DEFAULT_CONFIG) + yamale.validate( + config_schema, + default_config) # If fails, a ValueError will be raised. + except yaml.YAMLError as yamlerr: + raise RuntimeError('Illegal default config:{}'.format(yamlerr)) + except OSError as ose: + raise FileExistsError('Default config not found:{}'.format(ose)) + else: + self._run_pipeline = raw_args['run_pipeline'] + + # For presubmit check, do not do any image injection as for now. + # Notebook samples need to be papermilled first. + if self._is_notebook: + # Parse necessary params from config.yaml + nb_params = {} + try: + config_file = os.path.join(CONFIG_DIR, + '%s.config.yaml' % self._test_name) + with open(config_file, 'r') as f: + raw_args = yaml.safe_load(f) + test_config = yamale.make_data(config_file) + yamale.validate( + config_schema, + test_config) # If fails, a ValueError will be raised. + except yaml.YAMLError as yamlerr: + print('No legit yaml config file found, use default args:{}' + .format(yamlerr)) + except OSError as ose: + print( + 'Config file with the same name not found, use default args:{}' + .format(ose)) + else: + if 'notebook_params' in raw_args.keys(): + nb_params.update(raw_args['notebook_params']) + if 'output' in raw_args['notebook_params'].keys( + ): # output is a special param that has to be specified dynamically. + nb_params['output'] = self._sample_test_output + if 'run_pipeline' in raw_args.keys(): + self._run_pipeline = raw_args['run_pipeline'] + + pm.execute_notebook( + input_path='%s.ipynb' % self._test_name, + output_path='%s.ipynb' % self._test_name, + parameters=nb_params, + prepare_only=True) + # Convert to python script. + subprocess.call([ + 'jupyter', 'nbconvert', '--to', 'python', + '%s.ipynb' % self._test_name + ]) + + else: + subprocess.call(['python3', '%s.py' % self._test_name]) + + def _injection(self): + """Inject images for pipeline components. + + This is only valid for coimponent test + """ + pass + + def run_test(self): + self._compile() + self._injection() + + # Overriding the experiment name of pipeline runs + experiment_name = self._test_name + '-test' + os.environ['KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME'] = experiment_name + + if self._is_notebook: + nbchecker = NoteBookChecker( + testname=self._test_name, + result=self._sample_test_result, + run_pipeline=self._run_pipeline, + experiment_name=experiment_name, + host=self._host, + ) + nbchecker.run() + os.chdir(TEST_DIR) + nbchecker.check() + else: + os.chdir(TEST_DIR) + input_file = os.path.join(self._work_dir, + '%s.py.yaml' % self._test_name) + + pysample_checker = PySampleChecker( + testname=self._test_name, + input=input_file, + output=self._sample_test_output, + result=self._sample_test_result, + host=self._host, + namespace=self._namespace, + experiment_name=experiment_name, + ) + pysample_checker.run() + pysample_checker.check() + + self._copy_result() class ComponentTest(SampleTest): - """ Launch a KFP sample test as component test provided its name. - - Currently follows the same logic as sample test for compatibility. - include xgboost_training_cm - """ - def __init__(self, test_name, results_gcs_dir, - gcp_image, - local_confusionmatrix_image, - local_roc_image, - target_image_prefix='', - namespace='kubeflow'): - super().__init__( - test_name=test_name, - results_gcs_dir=results_gcs_dir, - target_image_prefix=target_image_prefix, - namespace=namespace - ) - self._local_confusionmatrix_image = local_confusionmatrix_image - self._local_roc_image = local_roc_image - self._dataproc_gcp_image = gcp_image - - def _injection(self): - """Sample-specific image injection into yaml file.""" - subs = { # Tag can look like 1.0.0-rc.3, so we need both "-" and "." in the regex. - 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-confusion-matrix:(\w+|[.-])+':self._local_confusionmatrix_image, - 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-roc:(\w+|[.-])+':self._local_roc_image - } - if self._test_name == 'xgboost_training_cm': - subs.update({ - 'gcr\.io/ml-pipeline/ml-pipeline-gcp:(\w|[.-])+':self._dataproc_gcp_image - }) - - utils.file_injection('%s.py.yaml' % self._test_name, - '%s.py.yaml.tmp' % self._test_name, - subs) - else: - # Only the above sample need injection for now. - pass - utils.file_injection('%s.py.yaml' % self._test_name, - '%s.py.yaml.tmp' % self._test_name, - subs) + """Launch a KFP sample test as component test provided its name. + + Currently follows the same logic as sample test for compatibility. + include xgboost_training_cm + """ + + def __init__(self, + test_name, + results_gcs_dir, + gcp_image, + local_confusionmatrix_image, + local_roc_image, + target_image_prefix='', + namespace='kubeflow'): + super().__init__( + test_name=test_name, + results_gcs_dir=results_gcs_dir, + target_image_prefix=target_image_prefix, + namespace=namespace) + self._local_confusionmatrix_image = local_confusionmatrix_image + self._local_roc_image = local_roc_image + self._dataproc_gcp_image = gcp_image + + def _injection(self): + """Sample-specific image injection into yaml file.""" + subs = { # Tag can look like 1.0.0-rc.3, so we need both "-" and "." in the regex. + 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-confusion-matrix:(\w+|[.-])+': + self._local_confusionmatrix_image, + 'gcr\.io/ml-pipeline/ml-pipeline/ml-pipeline-local-roc:(\w+|[.-])+': + self._local_roc_image + } + if self._test_name == 'xgboost_training_cm': + subs.update({ + 'gcr\.io/ml-pipeline/ml-pipeline-gcp:(\w|[.-])+': + self._dataproc_gcp_image + }) + + utils.file_injection('%s.py.yaml' % self._test_name, + '%s.py.yaml.tmp' % self._test_name, subs) + else: + # Only the above sample need injection for now. + pass + utils.file_injection('%s.py.yaml' % self._test_name, + '%s.py.yaml.tmp' % self._test_name, subs) def main(): - """Launches either KFP sample test or component test as a command entrypoint. - - Usage: - python sample_test_launcher.py sample_test run_test arg1 arg2 to launch sample test, and - python sample_test_launcher.py component_test run_test arg1 arg2 to launch component - test. - """ - fire.Fire({ - 'sample_test': SampleTest, - 'component_test': ComponentTest - }) + """Launches either KFP sample test or component test as a command + entrypoint. + + Usage: + python sample_test_launcher.py sample_test run_test arg1 arg2 to launch sample test, and + python sample_test_launcher.py component_test run_test arg1 arg2 to launch component + test. + """ + fire.Fire({'sample_test': SampleTest, 'component_test': ComponentTest}) + if __name__ == '__main__': - main() + main() From cb18d00bbbaed9cd77fc50dce739ed62c72b2356 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Thu, 17 Aug 2023 15:34:38 +0300 Subject: [PATCH 099/253] feat(backend) Enable auth between pesistence agent and pipelineAPI (ReportServer) (#9699) * Enable auth between pesistence agent and pipelineAPI (ReportServer) 1. Add authentication and authorization logic to PipelineAPI's PeportServer & 2. Make Persistence Agent authenticate itself through Service Account Token Volume Projection. Signed-off-by: diana * Do not use MULTIUSER on report weorkflows Only Persistent agent can KFPipeline API "Report (Scheduled)Workflows" no matter the namespace workflows belongs to. Signed-off-by: diana * Add unit tests - unit tests added - do not stop the ticker on stopCh Signed-off-by: diana --------- Signed-off-by: diana --- .../persistence/client/pipeline_client.go | 34 +++++- .../persistence/client/token_refresher.go | 78 ++++++++++++ .../client/token_refresher_test.go | 111 ++++++++++++++++++ backend/src/agent/persistence/main.go | 16 ++- .../auth/authenticator_token_review.go | 3 +- backend/src/apiserver/common/const.go | 15 ++- backend/src/apiserver/server/report_server.go | 32 +++++ .../persistence-agent/cluster-role.yaml | 7 ++ ...-pipeline-persistenceagent-deployment.yaml | 11 ++ .../ml-pipeline-persistenceagent-role.yaml | 7 ++ 10 files changed, 304 insertions(+), 10 deletions(-) create mode 100644 backend/src/agent/persistence/client/token_refresher.go create mode 100644 backend/src/agent/persistence/client/token_refresher_test.go diff --git a/backend/src/agent/persistence/client/pipeline_client.go b/backend/src/agent/persistence/client/pipeline_client.go index 7d26056b20..e1725cc20c 100644 --- a/backend/src/agent/persistence/client/pipeline_client.go +++ b/backend/src/agent/persistence/client/pipeline_client.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "os" + "strings" "time" "github.com/kubeflow/pipelines/backend/src/apiserver/common" @@ -46,11 +47,13 @@ type PipelineClient struct { timeout time.Duration reportServiceClient api.ReportServiceClient runServiceClient api.RunServiceClient + tokenRefresher TokenRefresherInterface } func NewPipelineClient( initializeTimeout time.Duration, timeout time.Duration, + tokenRefresher TokenRefresherInterface, basePath string, mlPipelineServiceName string, mlPipelineServiceHttpPort string, @@ -71,13 +74,18 @@ func NewPipelineClient( return &PipelineClient{ initializeTimeout: initializeTimeout, timeout: timeout, + tokenRefresher: tokenRefresher, reportServiceClient: api.NewReportServiceClient(connection), runServiceClient: api.NewRunServiceClient(connection), }, nil } func (p *PipelineClient) ReportWorkflow(workflow util.ExecutionSpec) error { - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + pctx := context.Background() + pctx = metadata.AppendToOutgoingContext(pctx, "Authorization", + "Bearer "+p.tokenRefresher.GetToken()) + + ctx, cancel := context.WithTimeout(pctx, time.Minute) defer cancel() _, err := p.reportServiceClient.ReportWorkflowV1(ctx, &api.ReportWorkflowRequest{ @@ -96,6 +104,15 @@ func (p *PipelineClient) ReportWorkflow(workflow util.ExecutionSpec) error { statusCode.Message(), err.Error(), workflow.ToStringForStore()) + } else if statusCode.Code() == codes.Unauthenticated && strings.Contains(err.Error(), "service account token has expired") { + // If unauthenticated because SA token is expired, re-read/refresh the token and try again + p.tokenRefresher.RefreshToken() + return util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, + "Error while reporting workflow resource (code: %v, message: %v): %v, %+v", + statusCode.Code(), + statusCode.Message(), + err.Error(), + workflow.ToStringForStore()) } else { // Retry otherwise return util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, @@ -110,7 +127,11 @@ func (p *PipelineClient) ReportWorkflow(workflow util.ExecutionSpec) error { } func (p *PipelineClient) ReportScheduledWorkflow(swf *util.ScheduledWorkflow) error { - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + pctx := context.Background() + pctx = metadata.AppendToOutgoingContext(pctx, "Authorization", + "Bearer "+p.tokenRefresher.GetToken()) + + ctx, cancel := context.WithTimeout(pctx, time.Minute) defer cancel() _, err := p.reportServiceClient.ReportScheduledWorkflowV1(ctx, @@ -128,6 +149,15 @@ func (p *PipelineClient) ReportScheduledWorkflow(swf *util.ScheduledWorkflow) er statusCode.Message(), err.Error(), swf.ScheduledWorkflow) + } else if statusCode.Code() == codes.Unauthenticated && strings.Contains(err.Error(), "service account token has expired") { + // If unauthenticated because SA token is expired, re-read/refresh the token and try again + p.tokenRefresher.RefreshToken() + return util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, + "Error while reporting workflow resource (code: %v, message: %v): %v, %+v", + statusCode.Code(), + statusCode.Message(), + err.Error(), + swf.ScheduledWorkflow) } else { // Retry otherwise return util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, diff --git a/backend/src/agent/persistence/client/token_refresher.go b/backend/src/agent/persistence/client/token_refresher.go new file mode 100644 index 0000000000..2672deed71 --- /dev/null +++ b/backend/src/agent/persistence/client/token_refresher.go @@ -0,0 +1,78 @@ +package client + +import ( + log "github.com/sirupsen/logrus" + "os" + "sync" + "time" +) + +type TokenRefresherInterface interface { + GetToken() string + RefreshToken() error +} + +const SaTokenFile = "/var/run/secrets/kubeflow/tokens/persistenceagent-sa-token" + +type FileReader interface { + ReadFile(filename string) ([]byte, error) +} + +type tokenRefresher struct { + mu sync.RWMutex + seconds *time.Duration + token string + fileReader *FileReader +} + +type FileReaderImpl struct{} + +func (r *FileReaderImpl) ReadFile(filename string) ([]byte, error) { + return os.ReadFile(filename) +} + +func NewTokenRefresher(seconds time.Duration, fileReader FileReader) *tokenRefresher { + if fileReader == nil { + fileReader = &FileReaderImpl{} + } + + tokenRefresher := &tokenRefresher{ + seconds: &seconds, + fileReader: &fileReader, + } + + return tokenRefresher +} + +func (tr *tokenRefresher) StartTokenRefreshTicker() error { + err := tr.RefreshToken() + if err != nil { + return err + } + + ticker := time.NewTicker(*tr.seconds) + go func() { + for range ticker.C { + tr.RefreshToken() + } + }() + return err +} + +func (tr *tokenRefresher) GetToken() string { + tr.mu.RLock() + defer tr.mu.RUnlock() + return tr.token +} + +func (tr *tokenRefresher) RefreshToken() error { + tr.mu.Lock() + defer tr.mu.Unlock() + b, err := (*tr.fileReader).ReadFile(SaTokenFile) + if err != nil { + log.Errorf("Error reading persistence agent service account token '%s': %v", SaTokenFile, err) + return err + } + tr.token = string(b) + return nil +} diff --git a/backend/src/agent/persistence/client/token_refresher_test.go b/backend/src/agent/persistence/client/token_refresher_test.go new file mode 100644 index 0000000000..b6e50d124d --- /dev/null +++ b/backend/src/agent/persistence/client/token_refresher_test.go @@ -0,0 +1,111 @@ +package client + +import ( + "fmt" + "io/fs" + "log" + "syscall" + "testing" + "time" +) + +const refreshInterval = 2 * time.Second + +type FileReaderFake struct { + Data string + Err error + readCounter int +} + +func (m *FileReaderFake) ReadFile(filename string) ([]byte, error) { + if m.Err != nil { + return nil, m.Err + } + content := fmt.Sprintf("%s-%v", m.Data, m.readCounter) + m.readCounter++ + return []byte(content), nil +} + +func Test_token_refresher(t *testing.T) { + tests := []struct { + name string + baseToken string + wanted string + refreshedToken string + err error + }{ + { + name: "TestTokenRefresher_GetToken_Success", + baseToken: "rightToken", + wanted: "rightToken-0", + err: nil, + }, + { + name: "TestTokenRefresher_GetToken_Failed_PathError", + baseToken: "rightToken", + wanted: "rightToken-0", + err: &fs.PathError{Err: syscall.ENOENT}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // setup + fakeFileReader := &FileReaderFake{ + Data: tt.baseToken, + Err: tt.err, + } + tr := NewTokenRefresher(refreshInterval, fakeFileReader) + err := tr.StartTokenRefreshTicker() + if err != nil { + got, sameType := err.(*fs.PathError) + if sameType != true { + t.Errorf("%v(): got = %v, wanted %v", tt.name, got, tt.err) + } + return + } + if err != nil { + log.Fatalf("Error starting Service Account Token Refresh Ticker: %v", err) + } + + if got := tr.GetToken(); got != tt.wanted { + t.Errorf("%v(): got %v, wanted %v", tt.name, got, tt.wanted) + } + }) + } +} + +func TestTokenRefresher_GetToken_After_TickerRefresh_Success(t *testing.T) { + fakeFileReader := &FileReaderFake{ + Data: "Token", + Err: nil, + } + tr := NewTokenRefresher(1*time.Second, fakeFileReader) + err := tr.StartTokenRefreshTicker() + if err != nil { + log.Fatalf("Error starting Service Account Token Refresh Ticker: %v", err) + } + time.Sleep(1200 * time.Millisecond) + expectedToken := "Token-1" + + if got := tr.GetToken(); got != expectedToken { + t.Errorf("%v(): got %v, wanted 'refreshed baseToken' %v", t.Name(), got, expectedToken) + } +} + +func TestTokenRefresher_GetToken_After_ForceRefresh_Success(t *testing.T) { + fakeFileReader := &FileReaderFake{ + Data: "Token", + Err: nil, + } + tr := NewTokenRefresher(refreshInterval, fakeFileReader) + err := tr.StartTokenRefreshTicker() + if err != nil { + log.Fatalf("Error starting Service Account Token Refresh Ticker: %v", err) + } + tr.RefreshToken() + expectedToken := "Token-1" + + if got := tr.GetToken(); got != expectedToken { + t.Errorf("%v(): got %v, wanted 'refreshed baseToken' %v", t.Name(), got, expectedToken) + } +} diff --git a/backend/src/agent/persistence/main.go b/backend/src/agent/persistence/main.go index 297206cb88..2ea143b21e 100644 --- a/backend/src/agent/persistence/main.go +++ b/backend/src/agent/persistence/main.go @@ -43,6 +43,7 @@ var ( numWorker int clientQPS float64 clientBurst int + saTokenRefreshInterval float64 ) const ( @@ -59,10 +60,12 @@ const ( numWorkerName = "numWorker" clientQPSFlagName = "clientQPS" clientBurstFlagName = "clientBurst" + saTokenRefreshIntervalFlagName = "saTokenRefreshInterval" ) const ( - DefaultConnectionTimeout = 6 * time.Minute + DefaultConnectionTimeout = 6 * time.Minute + DefaultTokenRefresherInterval = 1 * time.Hour ) func main() { @@ -97,9 +100,16 @@ func main() { Burst: clientBurst, }) + tokenRefresher := client.NewTokenRefresher(time.Duration(saTokenRefreshInterval), nil) + err = tokenRefresher.StartTokenRefreshTicker() + if err != nil { + log.Fatalf("Error starting Service Account Token Refresh Ticker due to: %v", err) + } + pipelineClient, err := client.NewPipelineClient( initializeTimeout, timeout, + tokenRefresher, mlPipelineAPIServerBasePath, mlPipelineAPIServerName, mlPipelineServiceHttpPort, @@ -140,4 +150,8 @@ func init() { // k8s.io/client-go/rest/config.go#RESTClientFor flag.Float64Var(&clientQPS, clientQPSFlagName, 5, "The maximum QPS to the master from this client.") flag.IntVar(&clientBurst, clientBurstFlagName, 10, "Maximum burst for throttle from this client.") + // TODO use viper/config file instead. Sync `saTokenRefreshIntervalFlagName` with the value from manifest file by using ENV var. + flag.Float64Var(&saTokenRefreshInterval, saTokenRefreshIntervalFlagName, DefaultTokenRefresherInterval.Seconds(), "Persistence agent service account token read interval in seconds. "+ + "Defines how often `/var/run/secrets/kubeflow/tokens/kubeflow-persistent_agent-api-token` to be read") + } diff --git a/backend/src/apiserver/auth/authenticator_token_review.go b/backend/src/apiserver/auth/authenticator_token_review.go index d0f49e6b04..ddd114841d 100644 --- a/backend/src/apiserver/auth/authenticator_token_review.go +++ b/backend/src/apiserver/auth/authenticator_token_review.go @@ -92,7 +92,8 @@ func (tra *TokenReviewAuthenticator) doTokenReview(ctx context.Context, userIden if !review.Status.Authenticated { return nil, util.NewUnauthenticatedError( errors.New("Failed to authenticate token review"), - "Review.Status.Authenticated is false", + "Review.Status.Authenticated is false. Error %s", + review.Status.Error, ) } if !tra.ensureAudience(review.Status.Audiences) { diff --git a/backend/src/apiserver/common/const.go b/backend/src/apiserver/common/const.go index a50d89512b..85fd981419 100644 --- a/backend/src/apiserver/common/const.go +++ b/backend/src/apiserver/common/const.go @@ -19,12 +19,14 @@ const ( RbacPipelinesGroup = "pipelines.kubeflow.org" RbacPipelinesVersion = "v1beta1" - RbacResourceTypePipelines = "pipelines" - RbacResourceTypeExperiments = "experiments" - RbacResourceTypeRuns = "runs" - RbacResourceTypeJobs = "jobs" - RbacResourceTypeViewers = "viewers" - RbacResourceTypeVisualizations = "visualizations" + RbacResourceTypePipelines = "pipelines" + RbacResourceTypeExperiments = "experiments" + RbacResourceTypeRuns = "runs" + RbacResourceTypeJobs = "jobs" + RbacResourceTypeViewers = "viewers" + RbacResourceTypeVisualizations = "visualizations" + RbacResourceTypeScheduledWorkflows = "scheduledworkflows" + RbacResourceTypeWorkflows = "workflows" RbacResourceVerbArchive = "archive" RbacResourceVerbUpdate = "update" @@ -39,6 +41,7 @@ const ( RbacResourceVerbUnarchive = "unarchive" RbacResourceVerbReportMetrics = "reportMetrics" RbacResourceVerbReadArtifact = "readArtifact" + RbacResourceVerbReport = "report" ) const ( diff --git a/backend/src/apiserver/server/report_server.go b/backend/src/apiserver/server/report_server.go index 892e43e102..1459b0f48a 100644 --- a/backend/src/apiserver/server/report_server.go +++ b/backend/src/apiserver/server/report_server.go @@ -17,6 +17,8 @@ package server import ( "context" "encoding/json" + "github.com/kubeflow/pipelines/backend/src/apiserver/common" + authorizationv1 "k8s.io/api/authorization/v1" "github.com/golang/protobuf/ptypes/empty" apiv1beta1 "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" @@ -49,6 +51,17 @@ func (s *ReportServer) reportWorkflow(ctx context.Context, workflow string) (*em if err != nil { return nil, util.Wrap(err, "Report workflow failed") } + + executionName := (*execSpec).ExecutionName() + resourceAttributes := &authorizationv1.ResourceAttributes{ + Verb: common.RbacResourceVerbReport, + Resource: common.RbacResourceTypeWorkflows, + } + + if err := s.canAccessWorkflow(ctx, executionName, resourceAttributes); err != nil { + return nil, err + } + newExecSpec, err := s.resourceManager.ReportWorkflowResource(ctx, *execSpec) if err != nil { return nil, util.Wrap(err, "Failed to report workflow") @@ -80,6 +93,15 @@ func (s *ReportServer) reportScheduledWorkflow(ctx context.Context, swf string) if err != nil { return nil, util.Wrap(err, "Report scheduled workflow failed") } + resourceAttributes := &authorizationv1.ResourceAttributes{ + Verb: common.RbacResourceVerbReport, + Resource: common.RbacResourceTypeScheduledWorkflows, + } + err = s.canAccessWorkflow(ctx, string(scheduledWorkflow.UID), resourceAttributes) + if err != nil { + return nil, err + } + err = s.resourceManager.ReportScheduledWorkflowResource(scheduledWorkflow) if err != nil { return nil, err @@ -136,6 +158,16 @@ func validateReportScheduledWorkflowRequest(swfManifest string) (*util.Scheduled return swf, nil } +func (s *ReportServer) canAccessWorkflow(ctx context.Context, executionName string, resourceAttributes *authorizationv1.ResourceAttributes) error { + resourceAttributes.Group = common.RbacPipelinesGroup + resourceAttributes.Version = common.RbacPipelinesVersion + err := s.resourceManager.IsAuthorized(ctx, resourceAttributes) + if err != nil { + return util.Wrapf(err, "Failed to report %s `%s` due to authorization error.", resourceAttributes.Resource, executionName) + } + return nil +} + func NewReportServer(resourceManager *resource.ResourceManager) *ReportServer { return &ReportServer{resourceManager: resourceManager} } diff --git a/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml b/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml index cf3b34a82f..bd1a0f53df 100644 --- a/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml +++ b/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml @@ -19,6 +19,13 @@ rules: - get - list - watch +- apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report - apiGroups: - '' resources: diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml index 74c19c9d79..30bea2326a 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml @@ -36,4 +36,15 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml index 2a288092c1..077d556e10 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml @@ -19,6 +19,13 @@ rules: - get - list - watch +- apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report - apiGroups: - '' resources: From 6dfcee7fa9d0e54a4797189d1f437367d4d9f4a2 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Thu, 17 Aug 2023 19:19:38 +0300 Subject: [PATCH 100/253] Fix Persistence Agent SA Token time interval (#9892) Issue: https://github.com/kubeflow/pipelines/issues/9891 Signed-off-by: diana --- .../src/agent/persistence/client/token_refresher.go | 8 ++++---- backend/src/agent/persistence/main.go | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/src/agent/persistence/client/token_refresher.go b/backend/src/agent/persistence/client/token_refresher.go index 2672deed71..addbbb3f54 100644 --- a/backend/src/agent/persistence/client/token_refresher.go +++ b/backend/src/agent/persistence/client/token_refresher.go @@ -20,7 +20,7 @@ type FileReader interface { type tokenRefresher struct { mu sync.RWMutex - seconds *time.Duration + interval *time.Duration token string fileReader *FileReader } @@ -31,13 +31,13 @@ func (r *FileReaderImpl) ReadFile(filename string) ([]byte, error) { return os.ReadFile(filename) } -func NewTokenRefresher(seconds time.Duration, fileReader FileReader) *tokenRefresher { +func NewTokenRefresher(interval time.Duration, fileReader FileReader) *tokenRefresher { if fileReader == nil { fileReader = &FileReaderImpl{} } tokenRefresher := &tokenRefresher{ - seconds: &seconds, + interval: &interval, fileReader: &fileReader, } @@ -50,7 +50,7 @@ func (tr *tokenRefresher) StartTokenRefreshTicker() error { return err } - ticker := time.NewTicker(*tr.seconds) + ticker := time.NewTicker(*tr.interval) go func() { for range ticker.C { tr.RefreshToken() diff --git a/backend/src/agent/persistence/main.go b/backend/src/agent/persistence/main.go index 2ea143b21e..f8c26da385 100644 --- a/backend/src/agent/persistence/main.go +++ b/backend/src/agent/persistence/main.go @@ -43,7 +43,7 @@ var ( numWorker int clientQPS float64 clientBurst int - saTokenRefreshInterval float64 + saTokenRefreshIntervalInSecs int64 ) const ( @@ -60,12 +60,12 @@ const ( numWorkerName = "numWorker" clientQPSFlagName = "clientQPS" clientBurstFlagName = "clientBurst" - saTokenRefreshIntervalFlagName = "saTokenRefreshInterval" + saTokenRefreshIntervalFlagName = "saTokenRefreshIntervalInSecs" ) const ( - DefaultConnectionTimeout = 6 * time.Minute - DefaultTokenRefresherInterval = 1 * time.Hour + DefaultConnectionTimeout = 6 * time.Minute + DefaultSATokenRefresherIntervalInSecs = 60 * 60 // 1 Hour in seconds ) func main() { @@ -100,7 +100,7 @@ func main() { Burst: clientBurst, }) - tokenRefresher := client.NewTokenRefresher(time.Duration(saTokenRefreshInterval), nil) + tokenRefresher := client.NewTokenRefresher(time.Duration(saTokenRefreshIntervalInSecs)*time.Second, nil) err = tokenRefresher.StartTokenRefreshTicker() if err != nil { log.Fatalf("Error starting Service Account Token Refresh Ticker due to: %v", err) @@ -151,7 +151,7 @@ func init() { flag.Float64Var(&clientQPS, clientQPSFlagName, 5, "The maximum QPS to the master from this client.") flag.IntVar(&clientBurst, clientBurstFlagName, 10, "Maximum burst for throttle from this client.") // TODO use viper/config file instead. Sync `saTokenRefreshIntervalFlagName` with the value from manifest file by using ENV var. - flag.Float64Var(&saTokenRefreshInterval, saTokenRefreshIntervalFlagName, DefaultTokenRefresherInterval.Seconds(), "Persistence agent service account token read interval in seconds. "+ + flag.Int64Var(&saTokenRefreshIntervalInSecs, saTokenRefreshIntervalFlagName, DefaultSATokenRefresherIntervalInSecs, "Persistence agent service account token read interval in seconds. "+ "Defines how often `/var/run/secrets/kubeflow/tokens/kubeflow-persistent_agent-api-token` to be read") } From 4ed8b7ceb91fe09123dca0b2f868a8e70c7ff510 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 17 Aug 2023 13:18:49 -0700 Subject: [PATCH 101/253] chore(components): add GCPC automl reference docs PiperOrigin-RevId: 557916896 --- .../docs/source/api/preview/automl/forecasting.rst | 4 ++++ .../google-cloud/docs/source/api/preview/automl/index.rst | 8 ++++++++ .../docs/source/api/preview/automl/tabular.rst | 4 ++++ components/google-cloud/docs/source/api/preview/index.rst | 1 + .../docs/source/api/v1/automl/forecasting.rst | 4 ++++ .../google-cloud/docs/source/api/v1/automl/index.rst | 2 ++ .../google-cloud/docs/source/api/v1/automl/tabular.rst | 4 ++++ 7 files changed, 27 insertions(+) create mode 100644 components/google-cloud/docs/source/api/preview/automl/forecasting.rst create mode 100644 components/google-cloud/docs/source/api/preview/automl/index.rst create mode 100644 components/google-cloud/docs/source/api/preview/automl/tabular.rst create mode 100644 components/google-cloud/docs/source/api/v1/automl/forecasting.rst create mode 100644 components/google-cloud/docs/source/api/v1/automl/tabular.rst diff --git a/components/google-cloud/docs/source/api/preview/automl/forecasting.rst b/components/google-cloud/docs/source/api/preview/automl/forecasting.rst new file mode 100644 index 0000000000..761f8d5180 --- /dev/null +++ b/components/google-cloud/docs/source/api/preview/automl/forecasting.rst @@ -0,0 +1,4 @@ +AutoML Forecasting +========================== + +.. automodule:: preview.automl.forecasting \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/preview/automl/index.rst b/components/google-cloud/docs/source/api/preview/automl/index.rst new file mode 100644 index 0000000000..1aedabd617 --- /dev/null +++ b/components/google-cloud/docs/source/api/preview/automl/index.rst @@ -0,0 +1,8 @@ +AutoML +============= + +.. toctree:: + :maxdepth: 1 + + forecasting + tabular \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/preview/automl/tabular.rst b/components/google-cloud/docs/source/api/preview/automl/tabular.rst new file mode 100644 index 0000000000..4b9d913ad2 --- /dev/null +++ b/components/google-cloud/docs/source/api/preview/automl/tabular.rst @@ -0,0 +1,4 @@ +AutoML Tabular +========================== + +.. automodule:: preview.automl.tabular \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/preview/index.rst b/components/google-cloud/docs/source/api/preview/index.rst index 20cdbf132b..3f91c093b5 100644 --- a/components/google-cloud/docs/source/api/preview/index.rst +++ b/components/google-cloud/docs/source/api/preview/index.rst @@ -4,5 +4,6 @@ Preview Components .. toctree:: :maxdepth: 1 + automl dataflow model_evaluation \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/v1/automl/forecasting.rst b/components/google-cloud/docs/source/api/v1/automl/forecasting.rst new file mode 100644 index 0000000000..de710136ca --- /dev/null +++ b/components/google-cloud/docs/source/api/v1/automl/forecasting.rst @@ -0,0 +1,4 @@ +AutoML Forecasting +========================== + +.. automodule:: v1.automl.forecasting \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/v1/automl/index.rst b/components/google-cloud/docs/source/api/v1/automl/index.rst index 92f20b20af..4e9c83f04a 100644 --- a/components/google-cloud/docs/source/api/v1/automl/index.rst +++ b/components/google-cloud/docs/source/api/v1/automl/index.rst @@ -4,4 +4,6 @@ AutoML .. toctree:: :maxdepth: 1 + forecasting + tabular training_job \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/v1/automl/tabular.rst b/components/google-cloud/docs/source/api/v1/automl/tabular.rst new file mode 100644 index 0000000000..dabbebf2e9 --- /dev/null +++ b/components/google-cloud/docs/source/api/v1/automl/tabular.rst @@ -0,0 +1,4 @@ +AutoML Tabular +========================== + +.. automodule:: v1.automl.tabular \ No newline at end of file From 40c759f4316c2503fd5e6ad187cb69122f7f53b1 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 17 Aug 2023 14:49:40 -0700 Subject: [PATCH 102/253] chore(release): bump version to 2.0.1 on master branch (#9899) --- CHANGELOG.md | 16 ++++++++++++++++ VERSION | 2 +- backend/api/v1beta1/python_http_client/README.md | 4 ++-- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 ++-- backend/api/v1beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- backend/api/v2beta1/python_http_client/README.md | 4 ++-- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 ++-- backend/api/v2beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../templates/application.yaml | 2 +- manifests/gcp_marketplace/schema.yaml | 4 ++-- .../base/cache-deployer/kustomization.yaml | 2 +- .../kustomize/base/cache/kustomization.yaml | 2 +- .../generic/pipeline-install-config.yaml | 2 +- .../base/metadata/base/kustomization.yaml | 2 +- .../kustomize/base/pipeline/kustomization.yaml | 12 ++++++------ .../pipeline/metadata-writer/kustomization.yaml | 2 +- .../env/gcp/inverse-proxy/kustomization.yaml | 2 +- 23 files changed, 48 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 889779c1ea..1c4a74290a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +### [2.0.1](https://github.com/kubeflow/pipelines/compare/2.0.0...2.0.1) (2023-08-17) + + +### Bug Fixes + +* **backend:** Fix performance issue within a mysql request ([\#9680](https://github.com/kubeflow/pipelines/issues/9680)) ([81618d0](https://github.com/kubeflow/pipelines/commit/81618d0fd6810560e0b78c61776d73042bd6f3bb)) +* **backend:** fix timeouts with list run api. Fixes [\#9780](https://github.com/kubeflow/pipelines/issues/9780) ([\#9806](https://github.com/kubeflow/pipelines/issues/9806)) ([c467ece](https://github.com/kubeflow/pipelines/commit/c467ece30551046fa0304a6a7067d3e185d7cf14)) +* **frontend:** Introduce ALLOWED_ARTIFACT_DOMAIN_REGEX flag to prevent accessing undesired domains. Remove user input string from server response. ([\#9844](https://github.com/kubeflow/pipelines/issues/9844)) ([737c0cc](https://github.com/kubeflow/pipelines/commit/737c0cc12606da3994e978678ace7adb1b309944)) + + +### Other Pull Requests + +* Fix Persistence Agent SA Token time interval ([\#9892](https://github.com/kubeflow/pipelines/issues/9892)) ([681c46f](https://github.com/kubeflow/pipelines/commit/681c46f62bb1d3aa5e1e4db2a239c7c4dd64881a)) +* feat(backend) Enable auth between pesistence agent and pipelineAPI (ReportServer) ([\#9699](https://github.com/kubeflow/pipelines/issues/9699)) ([f232d0b](https://github.com/kubeflow/pipelines/commit/f232d0b3902bf666a2bfdc65ac6f93934e010083)) +* fix(backend) Replace LEFT with INNER JOIN when Archive Experiment ([\#9730](https://github.com/kubeflow/pipelines/issues/9730)) ([5593dee](https://github.com/kubeflow/pipelines/commit/5593dee729b0b9518c1a70dbc3f0052796c4f10a)) + ## [2.0.0](https://github.com/kubeflow/pipelines/compare/1.7.0...2.0.0) (2023-06-20) diff --git a/VERSION b/VERSION index 359a5b952d..10bf840ed5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0 \ No newline at end of file +2.0.1 \ No newline at end of file diff --git a/backend/api/v1beta1/python_http_client/README.md b/backend/api/v1beta1/python_http_client/README.md index 2435026978..f0e94be6d2 100644 --- a/backend/api/v1beta1/python_http_client/README.md +++ b/backend/api/v1beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.0 -- Package version: 2.0.0 +- API version: 2.0.1 +- Package version: 2.0.1 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py index f14da081f1..fc1497d659 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.0" +__version__ = "2.0.1" # import apis into sdk package from kfp_server_api.api.experiment_service_api import ExperimentServiceApi diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py index 2ca5b13e98..5b4cb571de 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.0/python' + self.user_agent = 'OpenAPI-Generator/2.0.1/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py index c00241a23c..fe73377512 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.0\n"\ - "SDK Package Version: 2.0.0".\ + "Version of the API: 2.0.1\n"\ + "SDK Package Version: 2.0.1".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v1beta1/python_http_client/setup.py b/backend/api/v1beta1/python_http_client/setup.py index 70cc6616fb..d3fd643008 100644 --- a/backend/api/v1beta1/python_http_client/setup.py +++ b/backend/api/v1beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.0" +VERSION = "2.0.1" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json index 83bfd2daf3..a4ebf3ca8a 100644 --- a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.0", + "version": "2.0.1", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/backend/api/v2beta1/python_http_client/README.md b/backend/api/v2beta1/python_http_client/README.md index 1a10103b5c..dd98e9d6bd 100644 --- a/backend/api/v2beta1/python_http_client/README.md +++ b/backend/api/v2beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.0 -- Package version: 2.0.0 +- API version: 2.0.1 +- Package version: 2.0.1 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py index 693e0410dc..87463f0a21 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.0" +__version__ = "2.0.1" # import apis into sdk package from kfp_server_api.api.auth_service_api import AuthServiceApi diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py index 2ca5b13e98..5b4cb571de 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.0/python' + self.user_agent = 'OpenAPI-Generator/2.0.1/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py index c00241a23c..fe73377512 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.0\n"\ - "SDK Package Version: 2.0.0".\ + "Version of the API: 2.0.1\n"\ + "SDK Package Version: 2.0.1".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v2beta1/python_http_client/setup.py b/backend/api/v2beta1/python_http_client/setup.py index 70cc6616fb..d3fd643008 100644 --- a/backend/api/v2beta1/python_http_client/setup.py +++ b/backend/api/v2beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.0" +VERSION = "2.0.1" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json index ac42a27779..bf218246d0 100644 --- a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.0", + "version": "2.0.1", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml index 42542f7292..fd778769e4 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml @@ -12,7 +12,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.0 + version: 2.0.1 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index ef985d265e..46cd2b385e 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -1,9 +1,9 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: 2.0.0 + publishedVersion: 2.0.1 publishedVersionMetadata: - releaseNote: Based on 2.0.0 version. + releaseNote: Based on 2.0.1 version. releaseTypes: - Feature recommended: false diff --git a/manifests/kustomize/base/cache-deployer/kustomization.yaml b/manifests/kustomize/base/cache-deployer/kustomization.yaml index 3640b1657c..a9640aa6cb 100644 --- a/manifests/kustomize/base/cache-deployer/kustomization.yaml +++ b/manifests/kustomize/base/cache-deployer/kustomization.yaml @@ -8,4 +8,4 @@ commonLabels: app: cache-deployer images: - name: gcr.io/ml-pipeline/cache-deployer - newTag: 2.0.0 + newTag: 2.0.1 diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml index a2c39814f5..56c40afbe4 100644 --- a/manifests/kustomize/base/cache/kustomization.yaml +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -10,4 +10,4 @@ commonLabels: app: cache-server images: - name: gcr.io/ml-pipeline/cache-server - newTag: 2.0.0 + newTag: 2.0.1 diff --git a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml index 0a2a11d0c9..47ce6b687d 100644 --- a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml +++ b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml @@ -11,7 +11,7 @@ data: until the changes take effect. A quick way to restart all deployments in a namespace: `kubectl rollout restart deployment -n `. appName: pipeline - appVersion: 2.0.0 + appVersion: 2.0.1 dbHost: mysql dbPort: "3306" mlmdDb: metadb diff --git a/manifests/kustomize/base/metadata/base/kustomization.yaml b/manifests/kustomize/base/metadata/base/kustomization.yaml index b23f6470d5..5ad3f07bac 100644 --- a/manifests/kustomize/base/metadata/base/kustomization.yaml +++ b/manifests/kustomize/base/metadata/base/kustomization.yaml @@ -9,4 +9,4 @@ resources: - metadata-grpc-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-envoy - newTag: 2.0.0 + newTag: 2.0.1 diff --git a/manifests/kustomize/base/pipeline/kustomization.yaml b/manifests/kustomize/base/pipeline/kustomization.yaml index e04217c1fc..f3ca5274c8 100644 --- a/manifests/kustomize/base/pipeline/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/kustomization.yaml @@ -37,14 +37,14 @@ resources: - kfp-launcher-configmap.yaml images: - name: gcr.io/ml-pipeline/api-server - newTag: 2.0.0 + newTag: 2.0.1 - name: gcr.io/ml-pipeline/persistenceagent - newTag: 2.0.0 + newTag: 2.0.1 - name: gcr.io/ml-pipeline/scheduledworkflow - newTag: 2.0.0 + newTag: 2.0.1 - name: gcr.io/ml-pipeline/frontend - newTag: 2.0.0 + newTag: 2.0.1 - name: gcr.io/ml-pipeline/viewer-crd-controller - newTag: 2.0.0 + newTag: 2.0.1 - name: gcr.io/ml-pipeline/visualization-server - newTag: 2.0.0 + newTag: 2.0.1 diff --git a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml index c78915c688..2d118e33cf 100644 --- a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml @@ -7,4 +7,4 @@ resources: - metadata-writer-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-writer - newTag: 2.0.0 + newTag: 2.0.1 diff --git a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml index cd80c7ce0a..e8798ef205 100644 --- a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml +++ b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: gcr.io/ml-pipeline/inverse-proxy-agent - newTag: 2.0.0 + newTag: 2.0.1 resources: - proxy-configmap.yaml - proxy-deployment.yaml From c405d6bf24ec79c9501ea4479b644811cfbdef08 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 17 Aug 2023 16:44:16 -0700 Subject: [PATCH 103/253] docs(components): fix automl docs PiperOrigin-RevId: 557972304 --- components/google-cloud/docs/source/api/preview/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/docs/source/api/preview/index.rst b/components/google-cloud/docs/source/api/preview/index.rst index 3f91c093b5..171ef3d672 100644 --- a/components/google-cloud/docs/source/api/preview/index.rst +++ b/components/google-cloud/docs/source/api/preview/index.rst @@ -4,6 +4,6 @@ Preview Components .. toctree:: :maxdepth: 1 - automl + automl/index dataflow model_evaluation \ No newline at end of file From 562cd299cc25244b9b3d900cd8c6cb86142f8326 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 17 Aug 2023 18:30:09 -0700 Subject: [PATCH 104/253] feat(components): Components for Embedding Eval pipeline PiperOrigin-RevId: 557993650 --- .../model_evaluation/__init__.py | 6 + .../llm_embedding_retrieval/__init__.py | 14 ++ .../llm_embedding_retrieval/component.py | 141 ++++++++++++++++ .../__init__.py | 14 ++ .../component.py | 152 ++++++++++++++++++ .../llm_retrieval_metrics/__init__.py | 14 ++ .../llm_retrieval_metrics/component.py | 131 +++++++++++++++ 7 files changed, 472 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 190eac1cb1..075ccdd67d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -25,7 +25,10 @@ from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp @@ -39,8 +42,11 @@ 'EvaluatedAnnotationOp', 'FeatureAttributionGraphComponentOp', 'FeatureExtractorOp', + 'LLMEmbeddingRetrievalOp', 'LLMEvaluationClassificationPredictionsPostprocessorOp', 'LLMEvaluationTextGenerationOp', + 'LLMInformationRetrievalPreprocessorOp', + 'LLMRetrievalMetricsOp', 'LLMSafetyBiasMetricsOp', 'ModelEvaluationFeatureAttributionOp', 'ModelImportEvaluatedAnnotationOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/__init__.py new file mode 100644 index 0000000000..c6e8df70e4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Embedding Evaluation Retrieval Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py new file mode 100644 index 0000000000..0a492e4b5c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py @@ -0,0 +1,141 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python LLM Embedding Retrieval component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp.dsl import container_component +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' +# TODO(b/290838262): Update the image URI to point to +# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. + + +@container_component +def llm_embedding_retrieval( + gcp_resources: OutputPath(str), + embedding_retrieval_results_path: OutputPath(str), + project: str, + location: str, + query_embedding_source_directory: Input[Artifact], + doc_embedding_source_directory: Input[Artifact], + embedding_retrieval_top_n: int, + display_name: str = 'llm_embedding_retrieval_component', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + runner: str = 'DirectRunner', + dataflow_service_account: str = '', + dataflow_disk_size_gb: int = 50, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_workers_num: int = 1, + dataflow_max_workers_num: int = 5, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +): + """Top N doc retrieval for queries, based on their embedding similarities. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + query_embedding_source_directory: Required. Directory where query embedding + results are saved. + doc_embedding_source_directory: Required. Directory where doc embedding results + are saved. + embedding_retrieval_top_n: Required. Top N docs will be retrieved for each + query, based on similarity. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + runner: runner for the beam pipeline. DirectRunner and DataflowRunner are + supported. + dataflow_service_account: Service account to run the dataflow job. If not + set, dataflow will use the default worker service account. For more + details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. + dataflow_machine_type: The machine type executing the evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the + evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when + empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + embedding_retrieval_results_path (str): + The prefix of sharded GCS output of document retrieval results based on + embedding similarity, in JSONL format. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--embedding_retrieval={True}', + f'--project={project}', + f'--location={location}', + f'--query_embedding_source_directory={query_embedding_source_directory.path}', + f'--doc_embedding_source_directory={doc_embedding_source_directory.path}', + f'--embedding_retrieval_top_n={embedding_retrieval_top_n}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--embedding_retrieval_results_path={embedding_retrieval_results_path}', + f'--runner={runner}', + f'--dataflow_service_account={dataflow_service_account}', + f'--dataflow_disk_size={dataflow_disk_size_gb}', + f'--dataflow_machine_type={dataflow_machine_type}', + f'--dataflow_workers_num={dataflow_workers_num}', + f'--dataflow_max_workers_num={dataflow_max_workers_num}', + f'--dataflow_subnetwork={dataflow_subnetwork}', + f'--dataflow_use_public_ips={dataflow_use_public_ips}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/__init__.py new file mode 100644 index 0000000000..ddb8ed570f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Embedding Evaluation Preprocessing Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py new file mode 100644 index 0000000000..1d9c728c81 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -0,0 +1,152 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Information Retrieval preprocessor component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' +# TODO(b/290838262): Update the image URI to point to +# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. + + +@container_component +def llm_information_retrieval_preprocessor( + gcp_resources: OutputPath(str), + batch_prediction_query_gcs_source: OutputPath(list), + batch_prediction_corpus_gcs_source: OutputPath(list), + embedding_retrieval_gcs_source: OutputPath(str), + project: str, + location: str, + corpus_gcs_source: str, + query_gcs_source: str, + golden_docs_gcs_source: str, + display_name: str = 'information-retrieval-preprocessor', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + runner: str = 'DirectRunner', + dataflow_service_account: str = '', + dataflow_disk_size_gb: int = 50, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_workers_num: int = 1, + dataflow_max_workers_num: int = 5, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +): + """Preprocess inputs for information retrieval task - json files for corpus + + and queries, and csv for query to golden docs mapping. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + corpus_gcs_source: Required. The path for json file containing corpus + documents. + query_gcs_source: Required. The path for json file containing query + documents. + golden_docs_gcs_source: Required. The path for csv file containing mapping + of each query to the golden docs. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + runner: runner for the beam pipeline. DirectRunner and DataflowRunner are + supported. + dataflow_service_account: Service account to run the dataflow job. If not + set, dataflow will use the default worker service account. For more + details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. + dataflow_machine_type: The dataflow worker machine type executing the + evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the + evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when + empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + batch_prediction_query_gcs_source (list): + The GCS directory to save preprocessed query data to run batch + prediction. + batch_prediction_corpus_gcs_source (list): + The GCS directory to save preprocessed corpus data to run batch + prediction. + embedding_retrieval_gcs_source (str): + The GCS directory to save preprocessed golden docs mapping data to run + batch prediction. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--information_retrieval_preprocessor={True}', + f'--project={project}', + f'--location={location}', + f'--corpus_gcs_source={corpus_gcs_source}', + f'--query_gcs_source={query_gcs_source}', + f'--golden_docs_gcs_source={golden_docs_gcs_source}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--batch_prediction_query_gcs_source={batch_prediction_query_gcs_source}', + f'--batch_prediction_corpus_gcs_source={batch_prediction_corpus_gcs_source}', + f'--embedding_retrieval_gcs_source={embedding_retrieval_gcs_source}', + f'--runner={runner}', + f'--dataflow_service_account={dataflow_service_account}', + f'--dataflow_disk_size={dataflow_disk_size_gb}', + f'--dataflow_machine_type={dataflow_machine_type}', + f'--dataflow_workers_num={dataflow_workers_num}', + f'--dataflow_max_workers_num={dataflow_max_workers_num}', + f'--dataflow_subnetwork={dataflow_subnetwork}', + f'--dataflow_use_public_ips={dataflow_use_public_ips}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/__init__.py new file mode 100644 index 0000000000..f6ca06c113 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Embedding Evaluation Metrics Computation Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py new file mode 100644 index 0000000000..ff71d2c496 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py @@ -0,0 +1,131 @@ +"""Information Retrieval metrics computation component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp.dsl import container_component +from kfp.dsl import Metrics +from kfp.dsl import Output +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' +# TODO(b/290838262): Update the image URI to point to +# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. + + +@container_component +def llm_retrieval_metrics( + gcp_resources: OutputPath(str), + retrieval_metrics: Output[Metrics], + project: str, + location: str, + golden_docs_pattern: str, + embedding_retrieval_results_pattern: str, + retrieval_metrics_top_k_list: str, + display_name: str = 'llm_retrieval_metrics_component', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + runner: str = 'DirectRunner', + dataflow_service_account: str = '', + dataflow_disk_size_gb: int = 50, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_workers_num: int = 1, + dataflow_max_workers_num: int = 5, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +): + """Compute retrieval metrics based on the docs retrieved for each query. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + golden_docs_pattern: Required. Files where queries and corresponding + golden doc ids are saved. The path pattern can contain glob characters + (``*``, ``?``, and ``[...]`` sets). + embedding_retrieval_results_pattern: Required. Files where doc retrieval + results for each query are saved. The path pattern can contain glob + characters (``*``, ``?``, and ``[...]`` sets). + retrieval_metrics_top_k_list: Required. k values for retrieval metrics, + for example, precision@k, accuracy@k, etc. If more than one value, + separated by comma. e.g., "1,5,10". + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + runner: runner for the beam pipeline. DirectRunner and DataflowRunner are + supported. + dataflow_service_account: Service account to run the dataflow job. If not + set, dataflow will use the default worker service account. For more + details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. + dataflow_machine_type: The dataflow worker machine type executing the + evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the + evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when + empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources: + Serialized gcp_resources proto tracking the custom job. + retrieval_metrics: + A Metrics artifact representing the retrieval metrics. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--retrieval_metrics={True}', + f'--project={project}', + f'--location={location}', + f'--golden_docs_pattern={golden_docs_pattern}', + f'--embedding_retrieval_results_pattern={embedding_retrieval_results_pattern}', + f'--retrieval_metrics_top_k_list={retrieval_metrics_top_k_list}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--retrieval_metrics_output_path={retrieval_metrics.path}', + f'--runner={runner}', + f'--dataflow_service_account={dataflow_service_account}', + f'--dataflow_disk_size={dataflow_disk_size_gb}', + f'--dataflow_machine_type={dataflow_machine_type}', + f'--dataflow_workers_num={dataflow_workers_num}', + f'--dataflow_max_workers_num={dataflow_max_workers_num}', + f'--dataflow_subnetwork={dataflow_subnetwork}', + f'--dataflow_use_public_ips={dataflow_use_public_ips}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) From db3f867cc2cb3cc135eec131d49751cf5581ecc4 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 18 Aug 2023 14:04:30 -0700 Subject: [PATCH 105/253] chore(components): Update the pipelines for the Tabular Workflow for Forecasting PiperOrigin-RevId: 558241332 --- .../forecasting/forecasting_ensemble.py | 2 +- .../forecasting/forecasting_stage_1_tuner.py | 4 +- .../forecasting/forecasting_stage_2_tuner.py | 4 +- .../learn_to_learn_forecasting_pipeline.yaml | 278 +++++++++++++----- ...ence_to_sequence_forecasting_pipeline.yaml | 278 +++++++++++++----- ...sion_transformer_forecasting_pipeline.yaml | 278 +++++++++++++----- ...es_dense_encoder_forecasting_pipeline.yaml | 278 +++++++++++++----- .../preview/automl/forecasting/utils.py | 58 ++-- ...ml_tabular_feature_selection_pipeline.yaml | 117 ++++++-- .../tabular/automl_tabular_v2_pipeline.yaml | 189 +++++++++--- .../automl/tabular/feature_selection.py | 4 +- .../tabular/feature_transform_engine.py | 92 ++++-- .../tabnet_hyperparameter_tuning_job.py | 4 +- ...et_hyperparameter_tuning_job_pipeline.yaml | 121 ++++++-- .../preview/automl/tabular/tabnet_trainer.py | 4 +- .../tabular/tabnet_trainer_pipeline.yaml | 113 +++++-- ...wide_and_deep_hyperparameter_tuning_job.py | 4 +- ...ep_hyperparameter_tuning_job_pipeline.yaml | 119 ++++++-- .../automl/tabular/wide_and_deep_trainer.py | 4 +- .../wide_and_deep_trainer_pipeline.yaml | 113 +++++-- ...st_hyperparameter_tuning_job_pipeline.yaml | 117 ++++++-- .../tabular/xgboost_trainer_pipeline.yaml | 111 +++++-- .../bqml_arima_predict_pipeline.yaml | 44 +-- .../bqml_arima_train_pipeline.yaml | 220 ++++++++++---- .../forecasting/prophet_predict_pipeline.yaml | 46 +-- .../v1/automl/forecasting/prophet_trainer.py | 10 +- .../forecasting/prophet_trainer_pipeline.yaml | 151 ++++++++-- .../tabular/automl_tabular_pipeline.yaml | 120 ++++++-- .../v1/automl/tabular/cv_trainer.py | 4 +- .../v1/automl/tabular/ensemble.py | 4 +- .../v1/automl/tabular/finalizer.py | 2 +- .../v1/automl/tabular/infra_validator.py | 2 +- .../automl/tabular/split_materialized_data.py | 2 +- .../v1/automl/tabular/stage_1_tuner.py | 4 +- .../automl/tabular/stats_and_example_gen.py | 4 +- .../training_configurator_and_validator.py | 39 ++- .../v1/automl/tabular/transform.py | 4 +- 37 files changed, 2170 insertions(+), 778 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py index b7e0580c4e..662ec172a7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -104,7 +104,7 @@ def automl_forecasting_ensemble( ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', '", "args": ["forecasting_mp_ensemble', '", "--transform_output_path=', transform_output.uri, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py index e82e55708b..d1acbae54c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -110,14 +110,14 @@ def automl_forecasting_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', '", "args": ["forecasting_mp_l2l_stage_1_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', '", "--reduce_search_space_mode=', reduce_search_space_mode, f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py index 5375f61955..9c8aab1566 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -107,14 +107,14 @@ def automl_forecasting_stage_2_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', '", "args": ["forecasting_mp_l2l_stage_2_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', '", "--training_base_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml index 3d28c0a17f..3fad373d5c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -37,6 +37,7 @@ # model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] # num_selected_trials: int [Default: 10.0] # optimization_objective: str +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # project: str # quantiles: list @@ -53,7 +54,7 @@ # test_fraction: float [Default: -1.0] # time_column: str # time_series_attribute_columns: list -# time_series_identifier_column: str +# time_series_identifier_columns: list # timestamp_split_key: str [Default: ''] # train_budget_milli_node_hours: float # training_fraction: float [Default: -1.0] @@ -793,6 +794,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -826,6 +829,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1346,6 +1353,9 @@ components: componentInputParameter: pipelinechannel--num_selected_trials project: componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + runtimeValue: + constant: full root_dir: componentInputParameter: pipelinechannel--root_dir single_run_max_secs: @@ -1499,6 +1509,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble-2 + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1532,6 +1544,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -2005,6 +2021,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2117,6 +2135,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2252,8 +2272,8 @@ components: componentInputParameter: pipelinechannel--time_column forecasting_time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns forecasting_unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns forecasting_window_max_count: @@ -2280,6 +2300,9 @@ components: componentInputParameter: pipelinechannel--project root_dir: componentInputParameter: pipelinechannel--root_dir + stats_gen_execution_engine: + runtimeValue: + constant: bigquery target_column: componentInputParameter: pipelinechannel--target_column temporal_total_weight: @@ -2358,7 +2381,7 @@ components: runtimeValue: constant: l2l forecasting_transformations: - componentInputParameter: pipelinechannel--transformations + componentInputParameter: pipelinechannel--set-optional-inputs-transformations group_columns: componentInputParameter: pipelinechannel--group_columns group_temporal_total_weight: @@ -2384,8 +2407,8 @@ components: componentInputParameter: pipelinechannel--time_column time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns weight_column: @@ -2393,6 +2416,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--available_at_forecast_columns: parameterType: LIST @@ -2474,6 +2502,8 @@ components: parameterType: STRING pipelinechannel--set-optional-inputs-data_source_csv_filenames: parameterType: STRING + pipelinechannel--set-optional-inputs-transformations: + parameterType: STRUCT pipelinechannel--stage_1_num_parallel_trials: parameterType: NUMBER_INTEGER pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -2496,8 +2526,8 @@ components: parameterType: STRING pipelinechannel--time_series_attribute_columns: parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING + pipelinechannel--time_series_identifier_columns: + parameterType: LIST pipelinechannel--timestamp_split_key: parameterType: STRING pipelinechannel--train_budget_milli_node_hours: @@ -2993,6 +3023,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -3083,12 +3119,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -3168,11 +3211,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -3207,6 +3264,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -5272,6 +5338,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5324,6 +5395,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5392,6 +5468,12 @@ components: project: description: The GCP project that runs the pipeline components. parameterType: STRING + stats_gen_execution_engine: + description: Execution engine used for stats gen in FTE. + parameterType: STRING + transformations: + description: forecasting transformations to append stats gen engine to. + parameterType: STRUCT outputDefinitions: parameters: data_source_bigquery_table_path: @@ -5400,6 +5482,8 @@ components: parameterType: STRING model_display_name: parameterType: STRING + transformations: + parameterType: STRUCT comp-split-materialized-data: executorLabel: exec-split-materialized-data inputDefinitions: @@ -5684,12 +5768,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -5731,7 +5823,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5768,7 +5860,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5805,11 +5897,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5848,11 +5940,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5891,7 +5983,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6136,7 +6228,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -6153,6 +6247,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -6167,8 +6262,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -6176,6 +6273,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -6205,8 +6303,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6222,7 +6320,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6324,13 +6422,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-or-create-model-description-2: container: @@ -6362,13 +6457,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-prediction-image-uri: container: @@ -6398,10 +6490,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6434,10 +6526,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6470,7 +6562,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-predictions-column-2: container: args: @@ -6499,7 +6591,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-importer: importer: artifactUri: @@ -6930,12 +7022,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -6957,12 +7051,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -6975,8 +7071,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -6990,18 +7086,24 @@ deploymentSpec: \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ + ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ + \ data source URI.\n\n Args:\n project: The GCP project that runs the\ + \ pipeline components.\n location: The GCP region that runs the pipeline\ + \ components.\n data_source_csv_filenames: The CSV GCS path when data\ + \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ + \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ + \ source is Vertex dataset.\n model_display_name: The uploaded model's\ + \ display name.\n stats_gen_execution_engine: Execution engine used for\ + \ stats gen in FTE.\n transformations: forecasting transformations to\ + \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ + \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ \ import collections\n from google.cloud import aiplatform\n from google.cloud\ \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ + \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ @@ -7016,10 +7118,10 @@ deploymentSpec: \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ \ return collections.namedtuple(\n 'Outputs',\n [\n \ \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim + \ 'model_display_name',\n 'transformations',\n ],\n\ + \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ + \ model_display_name,\n transformations,\n )\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -7065,7 +7167,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-string-not-empty: container: args: @@ -7131,7 +7233,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -7167,7 +7269,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -7187,7 +7289,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7210,7 +7314,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The AutoML Forecasting pipeline. name: learn-to-learn-forecasting @@ -7252,6 +7356,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--available_at_forecast_columns: componentInputParameter: available_at_forecast_columns @@ -7337,6 +7444,10 @@ root: taskOutputParameter: outputParameterKey: data_source_csv_filenames producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-transformations: + taskOutputParameter: + outputParameterKey: transformations + producerTask: set-optional-inputs pipelinechannel--stage_1_num_parallel_trials: componentInputParameter: stage_1_num_parallel_trials pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -7359,8 +7470,8 @@ root: componentInputParameter: time_column pipelinechannel--time_series_attribute_columns: componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column + pipelinechannel--time_series_identifier_columns: + componentInputParameter: time_series_identifier_columns pipelinechannel--timestamp_split_key: componentInputParameter: timestamp_split_key pipelinechannel--train_budget_milli_node_hours: @@ -7403,10 +7514,21 @@ root: componentInputParameter: model_display_name project: componentInputParameter: project + stats_gen_execution_engine: + runtimeValue: + constant: bigquery + transformations: + componentInputParameter: transformations taskInfo: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex Model to upload this model as a version to. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact @@ -7716,11 +7838,11 @@ root: same time series.' isOptional: true parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different + time_series_identifier_columns: + description: 'The columns that distinguish the different time series.' - parameterType: STRING + parameterType: LIST timestamp_split_key: defaultValue: '' description: The timestamp_split column name. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml index 4f656e1b99..4eb9e7da3c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -36,6 +36,7 @@ # model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] # num_selected_trials: int [Default: 10.0] # optimization_objective: str +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # project: str # root_dir: str @@ -51,7 +52,7 @@ # test_fraction: float [Default: -1.0] # time_column: str # time_series_attribute_columns: list -# time_series_identifier_column: str +# time_series_identifier_columns: list # timestamp_split_key: str [Default: ''] # train_budget_milli_node_hours: float # training_fraction: float [Default: -1.0] @@ -789,6 +790,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -822,6 +825,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1339,6 +1346,9 @@ components: componentInputParameter: pipelinechannel--num_selected_trials project: componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + runtimeValue: + constant: full root_dir: componentInputParameter: pipelinechannel--root_dir single_run_max_secs: @@ -1490,6 +1500,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble-2 + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1523,6 +1535,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1993,6 +2009,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2103,6 +2121,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2236,8 +2256,8 @@ components: componentInputParameter: pipelinechannel--time_column forecasting_time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns forecasting_unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns forecasting_window_max_count: @@ -2264,6 +2284,9 @@ components: componentInputParameter: pipelinechannel--project root_dir: componentInputParameter: pipelinechannel--root_dir + stats_gen_execution_engine: + runtimeValue: + constant: bigquery target_column: componentInputParameter: pipelinechannel--target_column temporal_total_weight: @@ -2343,7 +2366,7 @@ components: runtimeValue: constant: seq2seq forecasting_transformations: - componentInputParameter: pipelinechannel--transformations + componentInputParameter: pipelinechannel--set-optional-inputs-transformations group_columns: componentInputParameter: pipelinechannel--group_columns group_temporal_total_weight: @@ -2370,8 +2393,8 @@ components: componentInputParameter: pipelinechannel--time_column time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns weight_column: @@ -2379,6 +2402,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--available_at_forecast_columns: parameterType: LIST @@ -2456,6 +2484,8 @@ components: parameterType: STRING pipelinechannel--set-optional-inputs-data_source_csv_filenames: parameterType: STRING + pipelinechannel--set-optional-inputs-transformations: + parameterType: STRUCT pipelinechannel--stage_1_num_parallel_trials: parameterType: NUMBER_INTEGER pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -2478,8 +2508,8 @@ components: parameterType: STRING pipelinechannel--time_series_attribute_columns: parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING + pipelinechannel--time_series_identifier_columns: + parameterType: LIST pipelinechannel--timestamp_split_key: parameterType: STRING pipelinechannel--train_budget_milli_node_hours: @@ -2975,6 +3005,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -3065,12 +3101,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -3150,11 +3193,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -3189,6 +3246,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -5254,6 +5320,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5306,6 +5377,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5374,6 +5450,12 @@ components: project: description: The GCP project that runs the pipeline components. parameterType: STRING + stats_gen_execution_engine: + description: Execution engine used for stats gen in FTE. + parameterType: STRING + transformations: + description: forecasting transformations to append stats gen engine to. + parameterType: STRUCT outputDefinitions: parameters: data_source_bigquery_table_path: @@ -5382,6 +5464,8 @@ components: parameterType: STRING model_display_name: parameterType: STRING + transformations: + parameterType: STRUCT comp-split-materialized-data: executorLabel: exec-split-materialized-data inputDefinitions: @@ -5666,12 +5750,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -5713,7 +5805,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5750,7 +5842,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5787,11 +5879,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5830,11 +5922,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5873,7 +5965,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6118,7 +6210,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -6135,6 +6229,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -6149,8 +6244,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -6158,6 +6255,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -6187,8 +6285,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6204,7 +6302,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6306,13 +6404,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-or-create-model-description-2: container: @@ -6344,13 +6439,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-prediction-image-uri: container: @@ -6380,10 +6472,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6416,10 +6508,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6452,7 +6544,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-predictions-column-2: container: args: @@ -6481,7 +6573,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-importer: importer: artifactUri: @@ -6912,12 +7004,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -6939,12 +7033,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -6957,8 +7053,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -6972,18 +7068,24 @@ deploymentSpec: \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ + ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ + \ data source URI.\n\n Args:\n project: The GCP project that runs the\ + \ pipeline components.\n location: The GCP region that runs the pipeline\ + \ components.\n data_source_csv_filenames: The CSV GCS path when data\ + \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ + \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ + \ source is Vertex dataset.\n model_display_name: The uploaded model's\ + \ display name.\n stats_gen_execution_engine: Execution engine used for\ + \ stats gen in FTE.\n transformations: forecasting transformations to\ + \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ + \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ \ import collections\n from google.cloud import aiplatform\n from google.cloud\ \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ + \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ @@ -6998,10 +7100,10 @@ deploymentSpec: \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ \ return collections.namedtuple(\n 'Outputs',\n [\n \ \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim + \ 'model_display_name',\n 'transformations',\n ],\n\ + \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ + \ model_display_name,\n transformations,\n )\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -7047,7 +7149,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-string-not-empty: container: args: @@ -7113,7 +7215,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -7149,7 +7251,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -7169,7 +7271,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7192,7 +7296,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. name: sequence-to-sequence-forecasting @@ -7234,6 +7338,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--available_at_forecast_columns: componentInputParameter: available_at_forecast_columns @@ -7315,6 +7422,10 @@ root: taskOutputParameter: outputParameterKey: data_source_csv_filenames producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-transformations: + taskOutputParameter: + outputParameterKey: transformations + producerTask: set-optional-inputs pipelinechannel--stage_1_num_parallel_trials: componentInputParameter: stage_1_num_parallel_trials pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -7337,8 +7448,8 @@ root: componentInputParameter: time_column pipelinechannel--time_series_attribute_columns: componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column + pipelinechannel--time_series_identifier_columns: + componentInputParameter: time_series_identifier_columns pipelinechannel--timestamp_split_key: componentInputParameter: timestamp_split_key pipelinechannel--train_budget_milli_node_hours: @@ -7381,10 +7492,21 @@ root: componentInputParameter: model_display_name project: componentInputParameter: project + stats_gen_execution_engine: + runtimeValue: + constant: bigquery + transformations: + componentInputParameter: transformations taskInfo: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex model to upload this model as a version to. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact @@ -7675,11 +7797,11 @@ root: same time series.' isOptional: true parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different + time_series_identifier_columns: + description: 'The columns that distinguish the different time series.' - parameterType: STRING + parameterType: LIST timestamp_split_key: defaultValue: '' description: The timestamp_split column name. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml index 6bad578312..c7f96df41c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -35,6 +35,7 @@ # model_description: str [Default: ''] # model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] # optimization_objective: str +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # project: str # root_dir: str @@ -50,7 +51,7 @@ # test_fraction: float [Default: -1.0] # time_column: str # time_series_attribute_columns: list -# time_series_identifier_column: str +# time_series_identifier_columns: list # timestamp_split_key: str [Default: ''] # train_budget_milli_node_hours: float # training_fraction: float [Default: -1.0] @@ -790,6 +791,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -823,6 +826,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1339,6 +1346,9 @@ components: constant: 1.0 project: componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + runtimeValue: + constant: full root_dir: componentInputParameter: pipelinechannel--root_dir single_run_max_secs: @@ -1491,6 +1501,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble-2 + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1524,6 +1536,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1992,6 +2008,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2100,6 +2118,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2231,8 +2251,8 @@ components: componentInputParameter: pipelinechannel--time_column forecasting_time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns forecasting_unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns forecasting_window_max_count: @@ -2259,6 +2279,9 @@ components: componentInputParameter: pipelinechannel--project root_dir: componentInputParameter: pipelinechannel--root_dir + stats_gen_execution_engine: + runtimeValue: + constant: bigquery target_column: componentInputParameter: pipelinechannel--target_column temporal_total_weight: @@ -2338,7 +2361,7 @@ components: runtimeValue: constant: tft forecasting_transformations: - componentInputParameter: pipelinechannel--transformations + componentInputParameter: pipelinechannel--set-optional-inputs-transformations group_columns: componentInputParameter: pipelinechannel--group_columns group_temporal_total_weight: @@ -2365,8 +2388,8 @@ components: componentInputParameter: pipelinechannel--time_column time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns weight_column: @@ -2374,6 +2397,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--available_at_forecast_columns: parameterType: LIST @@ -2449,6 +2477,8 @@ components: parameterType: STRING pipelinechannel--set-optional-inputs-data_source_csv_filenames: parameterType: STRING + pipelinechannel--set-optional-inputs-transformations: + parameterType: STRUCT pipelinechannel--stage_1_num_parallel_trials: parameterType: NUMBER_INTEGER pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -2471,8 +2501,8 @@ components: parameterType: STRING pipelinechannel--time_series_attribute_columns: parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING + pipelinechannel--time_series_identifier_columns: + parameterType: LIST pipelinechannel--timestamp_split_key: parameterType: STRING pipelinechannel--train_budget_milli_node_hours: @@ -2968,6 +2998,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -3058,12 +3094,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -3143,11 +3186,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -3182,6 +3239,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -5247,6 +5313,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5299,6 +5370,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5367,6 +5443,12 @@ components: project: description: The GCP project that runs the pipeline components. parameterType: STRING + stats_gen_execution_engine: + description: Execution engine used for stats gen in FTE. + parameterType: STRING + transformations: + description: forecasting transformations to append stats gen engine to. + parameterType: STRUCT outputDefinitions: parameters: data_source_bigquery_table_path: @@ -5375,6 +5457,8 @@ components: parameterType: STRING model_display_name: parameterType: STRING + transformations: + parameterType: STRUCT comp-split-materialized-data: executorLabel: exec-split-materialized-data inputDefinitions: @@ -5659,12 +5743,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -5706,7 +5798,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5743,7 +5835,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5780,11 +5872,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5823,11 +5915,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5866,7 +5958,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6111,7 +6203,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -6128,6 +6222,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -6142,8 +6237,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -6151,6 +6248,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -6180,8 +6278,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6197,7 +6295,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6299,13 +6397,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-or-create-model-description-2: container: @@ -6337,13 +6432,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-prediction-image-uri: container: @@ -6373,10 +6465,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6409,10 +6501,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6445,7 +6537,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-predictions-column-2: container: args: @@ -6474,7 +6566,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-importer: importer: artifactUri: @@ -6905,12 +6997,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -6932,12 +7026,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -6950,8 +7046,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -6965,18 +7061,24 @@ deploymentSpec: \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ + ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ + \ data source URI.\n\n Args:\n project: The GCP project that runs the\ + \ pipeline components.\n location: The GCP region that runs the pipeline\ + \ components.\n data_source_csv_filenames: The CSV GCS path when data\ + \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ + \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ + \ source is Vertex dataset.\n model_display_name: The uploaded model's\ + \ display name.\n stats_gen_execution_engine: Execution engine used for\ + \ stats gen in FTE.\n transformations: forecasting transformations to\ + \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ + \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ \ import collections\n from google.cloud import aiplatform\n from google.cloud\ \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ + \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ @@ -6991,10 +7093,10 @@ deploymentSpec: \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ \ return collections.namedtuple(\n 'Outputs',\n [\n \ \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim + \ 'model_display_name',\n 'transformations',\n ],\n\ + \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ + \ model_display_name,\n transformations,\n )\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -7040,7 +7142,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-string-not-empty: container: args: @@ -7106,7 +7208,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -7142,7 +7244,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -7162,7 +7264,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7185,7 +7289,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. name: temporal-fusion-transformer-forecasting @@ -7227,6 +7331,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--available_at_forecast_columns: componentInputParameter: available_at_forecast_columns @@ -7306,6 +7413,10 @@ root: taskOutputParameter: outputParameterKey: data_source_csv_filenames producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-transformations: + taskOutputParameter: + outputParameterKey: transformations + producerTask: set-optional-inputs pipelinechannel--stage_1_num_parallel_trials: componentInputParameter: stage_1_num_parallel_trials pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -7328,8 +7439,8 @@ root: componentInputParameter: time_column pipelinechannel--time_series_attribute_columns: componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column + pipelinechannel--time_series_identifier_columns: + componentInputParameter: time_series_identifier_columns pipelinechannel--timestamp_split_key: componentInputParameter: timestamp_split_key pipelinechannel--train_budget_milli_node_hours: @@ -7372,10 +7483,21 @@ root: componentInputParameter: model_display_name project: componentInputParameter: project + stats_gen_execution_engine: + runtimeValue: + constant: bigquery + transformations: + componentInputParameter: transformations taskInfo: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Optional Vertex Model that this model is a version of. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact @@ -7661,11 +7783,11 @@ root: same time series.' isOptional: true parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different + time_series_identifier_columns: + description: 'The columns that distinguish the different time series.' - parameterType: STRING + parameterType: LIST timestamp_split_key: defaultValue: '' description: The timestamp_split column name. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml index afbf67ec9e..e7b6bcedd8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -37,6 +37,7 @@ # model_display_name: str [Default: 'automl-forecasting-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'] # num_selected_trials: int [Default: 10.0] # optimization_objective: str +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # project: str # quantiles: list @@ -53,7 +54,7 @@ # test_fraction: float [Default: -1.0] # time_column: str # time_series_attribute_columns: list -# time_series_identifier_column: str +# time_series_identifier_columns: list # timestamp_split_key: str [Default: ''] # train_budget_milli_node_hours: float # training_fraction: float [Default: -1.0] @@ -793,6 +794,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -826,6 +829,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -1346,6 +1353,9 @@ components: componentInputParameter: pipelinechannel--num_selected_trials project: componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + runtimeValue: + constant: full root_dir: componentInputParameter: pipelinechannel--root_dir single_run_max_secs: @@ -1499,6 +1509,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-forecasting-ensemble-2 + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1532,6 +1544,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -2005,6 +2021,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2117,6 +2135,8 @@ components: taskOutputArtifact: outputArtifactKey: transform_output producerTask: feature-transform-engine + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2252,8 +2272,8 @@ components: componentInputParameter: pipelinechannel--time_column forecasting_time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns forecasting_unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns forecasting_window_max_count: @@ -2280,6 +2300,9 @@ components: componentInputParameter: pipelinechannel--project root_dir: componentInputParameter: pipelinechannel--root_dir + stats_gen_execution_engine: + runtimeValue: + constant: bigquery target_column: componentInputParameter: pipelinechannel--target_column temporal_total_weight: @@ -2358,7 +2381,7 @@ components: runtimeValue: constant: tide forecasting_transformations: - componentInputParameter: pipelinechannel--transformations + componentInputParameter: pipelinechannel--set-optional-inputs-transformations group_columns: componentInputParameter: pipelinechannel--group_columns group_temporal_total_weight: @@ -2384,8 +2407,8 @@ components: componentInputParameter: pipelinechannel--time_column time_series_attribute_columns: componentInputParameter: pipelinechannel--time_series_attribute_columns - time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + time_series_identifier_columns: + componentInputParameter: pipelinechannel--time_series_identifier_columns unavailable_at_forecast_columns: componentInputParameter: pipelinechannel--unavailable_at_forecast_columns weight_column: @@ -2393,6 +2416,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--available_at_forecast_columns: parameterType: LIST @@ -2474,6 +2502,8 @@ components: parameterType: STRING pipelinechannel--set-optional-inputs-data_source_csv_filenames: parameterType: STRING + pipelinechannel--set-optional-inputs-transformations: + parameterType: STRUCT pipelinechannel--stage_1_num_parallel_trials: parameterType: NUMBER_INTEGER pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -2496,8 +2526,8 @@ components: parameterType: STRING pipelinechannel--time_series_attribute_columns: parameterType: LIST - pipelinechannel--time_series_identifier_column: - parameterType: STRING + pipelinechannel--time_series_identifier_columns: + parameterType: LIST pipelinechannel--timestamp_split_key: parameterType: STRING pipelinechannel--train_budget_milli_node_hours: @@ -2993,6 +3023,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -3083,12 +3119,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -3168,11 +3211,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -3207,6 +3264,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -5272,6 +5338,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5324,6 +5395,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5392,6 +5468,12 @@ components: project: description: The GCP project that runs the pipeline components. parameterType: STRING + stats_gen_execution_engine: + description: Execution engine used for stats gen in FTE. + parameterType: STRING + transformations: + description: forecasting transformations to append stats gen engine to. + parameterType: STRUCT outputDefinitions: parameters: data_source_bigquery_table_path: @@ -5400,6 +5482,8 @@ components: parameterType: STRING model_display_name: parameterType: STRING + transformations: + parameterType: STRUCT comp-split-materialized-data: executorLabel: exec-split-materialized-data inputDefinitions: @@ -5684,12 +5768,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -5731,7 +5823,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5768,7 +5860,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", @@ -5805,11 +5897,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5848,11 +5940,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230619_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5891,7 +5983,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6136,7 +6228,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -6153,6 +6247,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -6167,8 +6262,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -6176,6 +6273,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -6205,8 +6303,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6222,7 +6320,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6324,13 +6422,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-or-create-model-description-2: container: @@ -6362,13 +6457,10 @@ deploymentSpec: \ is dsl.PIPELINE_JOB_NAME_PLACEHOLDER, though\n # at compile time the\ \ actual template format doesn't get injected since\n # the Python isn't\ \ interpreted yet, so we have to hardcode the value.\n pipeline_url = 'https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{{$.pipeline_job_name}}?project={project}'.format(\n\ - \ location=location, project=project\n )\n # Note: URL should match\ - \ SDK and API:\n # http://google3/third_party/py/google/cloud/aiplatform/aiplatform/pipeline_jobs.py;l=469;rcl=485422918\n\ - \ # http://google3/google/cloud/aiplatform/master/pipeline_job.proto;l=43;rcl=521956025\n\ - \n if original_description:\n return f'{original_description} From:\ - \ {pipeline_url}'\n\n # The pipeline url contains KFP placeholders injected\ - \ at runtime.\n return f'Vertex forecasting model trained in the pipeline:\ - \ {pipeline_url}'\n\n" + \ location=location, project=project\n )\n if original_description:\n\ + \ return f'{original_description} From: {pipeline_url}'\n\n # The pipeline\ + \ url contains KFP placeholders injected at runtime.\n return f'Vertex\ + \ forecasting model trained in the pipeline: {pipeline_url}'\n\n" image: python:3.7 exec-get-prediction-image-uri: container: @@ -6398,10 +6490,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6434,10 +6526,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230619_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230619_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230619_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230619_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6470,7 +6562,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-predictions-column-2: container: args: @@ -6499,7 +6591,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-importer: importer: artifactUri: @@ -6930,12 +7022,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -6957,12 +7051,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -6975,8 +7071,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -6990,18 +7086,24 @@ deploymentSpec: \ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ \ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ \ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ - \ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ + \ str,\n stats_gen_execution_engine: str,\n transformations: dict,\n\ + ) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ \ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ - \ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ - \ The GCP project that runs the pipeline components.\n location: The\ - \ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ - \ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ - \ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ - \ dataset when data source is Vertex dataset.\n model_display_name: The\ - \ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ - \ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \ str),\n ('transformations', dict),\n ],\n):\n \"\"\"Get the\ + \ data source URI.\n\n Args:\n project: The GCP project that runs the\ + \ pipeline components.\n location: The GCP region that runs the pipeline\ + \ components.\n data_source_csv_filenames: The CSV GCS path when data\ + \ source is CSV.\n data_source_bigquery_table_path: The BigQuery table\ + \ when data source is BQ.\n vertex_dataset: The Vertex dataset when data\ + \ source is Vertex dataset.\n model_display_name: The uploaded model's\ + \ display name.\n stats_gen_execution_engine: Execution engine used for\ + \ stats gen in FTE.\n transformations: forecasting transformations to\ + \ append stats gen engine to.\n\n Returns:\n A named tuple of CSV or\ + \ BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ \ import collections\n from google.cloud import aiplatform\n from google.cloud\ \ import aiplatform_v1beta1 as aip\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ + \n # TODO(b/261504514) Remove this handling when we use the FTE transform\ + \ config.\n transformations['stats_gen_execution_engine'] = stats_gen_execution_engine\n\ \n if not model_display_name:\n model_display_name = _DEFAULT_MODEL_DISPLAY_NAME\n\ \n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ @@ -7016,10 +7118,10 @@ deploymentSpec: \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ \ return collections.namedtuple(\n 'Outputs',\n [\n \ \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ - \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ - \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ - \n" - image: python:3.7-slim + \ 'model_display_name',\n 'transformations',\n ],\n\ + \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ + \ model_display_name,\n transformations,\n )\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -7065,7 +7167,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-string-not-empty: container: args: @@ -7131,7 +7233,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -7167,7 +7269,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -7187,7 +7289,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7210,7 +7314,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. name: time-series-dense-encoder-forecasting @@ -7252,6 +7356,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--available_at_forecast_columns: componentInputParameter: available_at_forecast_columns @@ -7337,6 +7444,10 @@ root: taskOutputParameter: outputParameterKey: data_source_csv_filenames producerTask: set-optional-inputs + pipelinechannel--set-optional-inputs-transformations: + taskOutputParameter: + outputParameterKey: transformations + producerTask: set-optional-inputs pipelinechannel--stage_1_num_parallel_trials: componentInputParameter: stage_1_num_parallel_trials pipelinechannel--stage_1_tuner_worker_pool_specs_override: @@ -7359,8 +7470,8 @@ root: componentInputParameter: time_column pipelinechannel--time_series_attribute_columns: componentInputParameter: time_series_attribute_columns - pipelinechannel--time_series_identifier_column: - componentInputParameter: time_series_identifier_column + pipelinechannel--time_series_identifier_columns: + componentInputParameter: time_series_identifier_columns pipelinechannel--timestamp_split_key: componentInputParameter: timestamp_split_key pipelinechannel--train_budget_milli_node_hours: @@ -7403,10 +7514,21 @@ root: componentInputParameter: model_display_name project: componentInputParameter: project + stats_gen_execution_engine: + runtimeValue: + constant: bigquery + transformations: + componentInputParameter: transformations taskInfo: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex model to upload the model as a version to. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact @@ -7716,11 +7838,11 @@ root: same time series.' isOptional: true parameterType: LIST - time_series_identifier_column: - description: 'The column that distinguish the different + time_series_identifier_columns: + description: 'The columns that distinguish the different time series.' - parameterType: STRING + parameterType: LIST timestamp_split_key: defaultValue: '' description: The timestamp_split column name. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py index 2cf4444e5a..22b4a96eb9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/utils.py @@ -1,5 +1,6 @@ """Util functions for Vertex Forecasting pipelines.""" +import logging import os import pathlib from typing import Any, Dict, FrozenSet, List, Optional, Tuple @@ -22,7 +23,8 @@ def _get_base_forecasting_parameters( transformations: Dict[str, List[str]], train_budget_milli_node_hours: float, time_column: str, - time_series_identifier_column: str, + time_series_identifier_columns: List[str], + time_series_identifier_column: Optional[str] = None, time_series_attribute_columns: Optional[List[str]] = None, available_at_forecast_columns: Optional[List[str]] = None, unavailable_at_forecast_columns: Optional[List[str]] = None, @@ -81,6 +83,14 @@ def _get_base_forecasting_parameters( if not stage_2_trainer_worker_pool_specs_override: stage_2_trainer_worker_pool_specs_override = [] + if time_series_identifier_column: + logging.warning( + 'Deprecation warning: `time_series_identifier_column` will soon be' + ' deprecated in favor of `time_series_identifier_columns`. Please' + ' migrate workloads to use the new field.' + ) + time_series_identifier_columns = [time_series_identifier_column] + parameter_values = {} parameters = { 'project': project, @@ -93,7 +103,7 @@ def _get_base_forecasting_parameters( 'transformations': transformations, 'train_budget_milli_node_hours': train_budget_milli_node_hours, 'time_column': time_column, - 'time_series_identifier_column': time_series_identifier_column, + 'time_series_identifier_columns': time_series_identifier_columns, 'time_series_attribute_columns': time_series_attribute_columns, 'available_at_forecast_columns': available_at_forecast_columns, 'unavailable_at_forecast_columns': unavailable_at_forecast_columns, @@ -184,7 +194,8 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( transformations: Dict[str, List[str]], train_budget_milli_node_hours: float, time_column: str, - time_series_identifier_column: str, + time_series_identifier_columns: List[str], + time_series_identifier_column: Optional[str] = None, time_series_attribute_columns: Optional[List[str]] = None, available_at_forecast_columns: Optional[List[str]] = None, unavailable_at_forecast_columns: Optional[List[str]] = None, @@ -202,7 +213,6 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( data_source_csv_filenames: Optional[str] = None, data_source_bigquery_table_path: Optional[str] = None, predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, training_fraction: Optional[float] = None, validation_fraction: Optional[float] = None, test_fraction: Optional[float] = None, @@ -251,8 +261,10 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time + time_series_identifier_columns: The columns which distinguish different time series. + time_series_identifier_column: [Deprecated] The column which distinguishes + different time series. time_series_attribute_columns: The columns that are invariant across the same time series. available_at_forecast_columns: The columns that are available at the @@ -279,7 +291,6 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. training_fraction: The training fraction. validation_fraction: The validation fraction. test_fraction: The test fraction. @@ -342,6 +353,7 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( train_budget_milli_node_hours=train_budget_milli_node_hours, time_column=time_column, dataflow_service_account=dataflow_service_account, + time_series_identifier_columns=time_series_identifier_columns, time_series_identifier_column=time_series_identifier_column, time_series_attribute_columns=time_series_attribute_columns, available_at_forecast_columns=available_at_forecast_columns, @@ -359,7 +371,6 @@ def get_learn_to_learn_forecasting_pipeline_and_parameters( data_source_csv_filenames=data_source_csv_filenames, data_source_bigquery_table_path=data_source_bigquery_table_path, predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, training_fraction=training_fraction, validation_fraction=validation_fraction, test_fraction=test_fraction, @@ -409,7 +420,8 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( transformations: Dict[str, List[str]], train_budget_milli_node_hours: float, time_column: str, - time_series_identifier_column: str, + time_series_identifier_columns: List[str], + time_series_identifier_column: Optional[str] = None, time_series_attribute_columns: Optional[List[str]] = None, available_at_forecast_columns: Optional[List[str]] = None, unavailable_at_forecast_columns: Optional[List[str]] = None, @@ -427,7 +439,6 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( data_source_csv_filenames: Optional[str] = None, data_source_bigquery_table_path: Optional[str] = None, predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, training_fraction: Optional[float] = None, validation_fraction: Optional[float] = None, test_fraction: Optional[float] = None, @@ -476,8 +487,10 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time + time_series_identifier_columns: The columns which distinguish different time series. + time_series_identifier_column: [Deprecated] The column which distinguishes + different time series. time_series_attribute_columns: The columns that are invariant across the same time series. available_at_forecast_columns: The columns that are available at the @@ -504,7 +517,6 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. training_fraction: The training fraction. validation_fraction: The validation fraction. test_fraction: The test fraction. @@ -568,6 +580,7 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( train_budget_milli_node_hours=train_budget_milli_node_hours, time_column=time_column, dataflow_service_account=dataflow_service_account, + time_series_identifier_columns=time_series_identifier_columns, time_series_identifier_column=time_series_identifier_column, time_series_attribute_columns=time_series_attribute_columns, available_at_forecast_columns=available_at_forecast_columns, @@ -585,7 +598,6 @@ def get_time_series_dense_encoder_forecasting_pipeline_and_parameters( data_source_csv_filenames=data_source_csv_filenames, data_source_bigquery_table_path=data_source_bigquery_table_path, predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, training_fraction=training_fraction, validation_fraction=validation_fraction, test_fraction=test_fraction, @@ -635,7 +647,8 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( transformations: Dict[str, List[str]], train_budget_milli_node_hours: float, time_column: str, - time_series_identifier_column: str, + time_series_identifier_columns: List[str], + time_series_identifier_column: Optional[str] = None, time_series_attribute_columns: Optional[List[str]] = None, available_at_forecast_columns: Optional[List[str]] = None, unavailable_at_forecast_columns: Optional[List[str]] = None, @@ -652,7 +665,6 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( data_source_csv_filenames: Optional[str] = None, data_source_bigquery_table_path: Optional[str] = None, predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, training_fraction: Optional[float] = None, validation_fraction: Optional[float] = None, test_fraction: Optional[float] = None, @@ -695,8 +707,10 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time + time_series_identifier_columns: The columns which distinguish different time series. + time_series_identifier_column: [Deprecated] The column which distinguishes + different time series. time_series_attribute_columns: The columns that are invariant across the same time series. available_at_forecast_columns: The columns that are available at the @@ -722,7 +736,6 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. training_fraction: The training fraction. validation_fraction: The validation fraction. test_fraction: The test fraction. @@ -773,6 +786,7 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( train_budget_milli_node_hours=train_budget_milli_node_hours, time_column=time_column, dataflow_service_account=dataflow_service_account, + time_series_identifier_columns=time_series_identifier_columns, time_series_identifier_column=time_series_identifier_column, time_series_attribute_columns=time_series_attribute_columns, available_at_forecast_columns=available_at_forecast_columns, @@ -789,7 +803,6 @@ def get_temporal_fusion_transformer_forecasting_pipeline_and_parameters( data_source_csv_filenames=data_source_csv_filenames, data_source_bigquery_table_path=data_source_bigquery_table_path, predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, training_fraction=training_fraction, validation_fraction=validation_fraction, test_fraction=test_fraction, @@ -834,7 +847,8 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( transformations: Dict[str, List[str]], train_budget_milli_node_hours: float, time_column: str, - time_series_identifier_column: str, + time_series_identifier_columns: List[str], + time_series_identifier_column: Optional[str] = None, time_series_attribute_columns: Optional[List[str]] = None, available_at_forecast_columns: Optional[List[str]] = None, unavailable_at_forecast_columns: Optional[List[str]] = None, @@ -852,7 +866,6 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( data_source_csv_filenames: Optional[str] = None, data_source_bigquery_table_path: Optional[str] = None, predefined_split_key: Optional[str] = None, - timestamp_split_key: Optional[str] = None, training_fraction: Optional[float] = None, validation_fraction: Optional[float] = None, test_fraction: Optional[float] = None, @@ -895,8 +908,10 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. time_column: The column that indicates the time. - time_series_identifier_column: The column which distinguishes different time + time_series_identifier_columns: The columns which distinguish different time series. + time_series_identifier_column: [Deprecated] The column which distinguishes + different time series. time_series_attribute_columns: The columns that are invariant across the same time series. available_at_forecast_columns: The columns that are available at the @@ -923,7 +938,6 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( data_source_bigquery_table_path: The BigQuery table path of format bq://bq_project.bq_dataset.bq_table predefined_split_key: The predefined_split column name. - timestamp_split_key: The timestamp_split column name. training_fraction: The training fraction. validation_fraction: The validation fraction. test_fraction: The test fraction. @@ -971,6 +985,7 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( train_budget_milli_node_hours=train_budget_milli_node_hours, time_column=time_column, dataflow_service_account=dataflow_service_account, + time_series_identifier_columns=time_series_identifier_columns, time_series_identifier_column=time_series_identifier_column, time_series_attribute_columns=time_series_attribute_columns, available_at_forecast_columns=available_at_forecast_columns, @@ -988,7 +1003,6 @@ def get_sequence_to_sequence_forecasting_pipeline_and_parameters( data_source_csv_filenames=data_source_csv_filenames, data_source_bigquery_table_path=data_source_bigquery_table_path, predefined_split_key=predefined_split_key, - timestamp_split_key=timestamp_split_key, training_fraction=training_fraction, validation_fraction=validation_fraction, test_fraction=test_fraction, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml index b10b4b421a..4993452158 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -35,6 +35,7 @@ # optimization_objective: str # optimization_objective_precision_value: float [Default: -1.0] # optimization_objective_recall_value: float [Default: -1.0] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -1515,6 +1516,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-tabular-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1546,6 +1549,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--purge-unused-features-output_metadata: artifactType: schemaTitle: system.Artifact @@ -2170,6 +2177,8 @@ components: taskOutputArtifact: outputArtifactKey: unmanaged_container_model producerTask: automl-tabular-ensemble-2 + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: taskOutputParameter: @@ -2370,6 +2379,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--purge-unused-features-output_metadata: artifactType: schemaTitle: system.Artifact @@ -2596,6 +2609,8 @@ components: artifacts: explanation_metadata_artifact: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model parameters: @@ -2623,6 +2638,10 @@ components: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: parameterType: STRUCT @@ -3919,6 +3938,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--purge-unused-features-output_metadata: taskOutputArtifact: outputArtifactKey: output_metadata @@ -4045,6 +4066,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--purge-unused-features-output_metadata: taskOutputArtifact: outputArtifactKey: output_metadata @@ -4340,6 +4363,11 @@ components: taskInfo: name: tabular-stats-and-example-gen inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--additional_experiments: parameterType: STRUCT @@ -8152,6 +8180,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8204,6 +8237,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8256,6 +8294,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8768,9 +8811,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8811,9 +8854,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8854,7 +8897,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8866,7 +8909,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8895,7 +8938,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8907,7 +8950,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8936,7 +8979,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8948,7 +8991,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8977,7 +9020,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -8992,7 +9035,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9001,7 +9044,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9010,7 +9053,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9030,9 +9073,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9077,9 +9120,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9124,7 +9167,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9145,7 +9188,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -9176,7 +9219,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9197,7 +9240,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -10438,12 +10481,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -10465,12 +10510,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-3: container: args: @@ -10492,12 +10539,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-purge-unused-features: container: args: @@ -10668,7 +10717,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", @@ -10681,7 +10730,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", @@ -10714,7 +10763,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10747,7 +10796,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", @@ -10900,6 +10949,9 @@ root: componentRef: name: comp-exit-handler-1 inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--additional_experiments: componentInputParameter: additional_experiments @@ -11024,6 +11076,13 @@ root: taskInfo: name: exit-handler-1 inputDefinitions: + artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex model to upload this model as a version of. + isOptional: true parameters: additional_experiments: description: Use this field to config private preview features. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml index c625e042bc..8889594111 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -40,6 +40,7 @@ # optimization_objective: str # optimization_objective_precision_value: float [Default: -1.0] # optimization_objective_recall_value: float [Default: -1.0] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -1018,6 +1019,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-tabular-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1109,6 +1112,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--cv_trainer_worker_pool_specs_override: parameterType: LIST @@ -1735,6 +1742,8 @@ components: taskOutputArtifact: outputArtifactKey: unmanaged_container_model producerTask: automl-tabular-ensemble-2 + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: taskOutputParameter: @@ -1875,6 +1884,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--split-materialized-data-materialized_eval_split: artifactType: schemaTitle: system.Artifact @@ -2077,6 +2090,8 @@ components: artifacts: explanation_metadata_artifact: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model parameters: @@ -2104,6 +2119,10 @@ components: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: parameterType: STRUCT @@ -2514,6 +2533,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model parameters: pipelinechannel--cv_trainer_worker_pool_specs_override: componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override @@ -2644,6 +2665,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--split-materialized-data-materialized_eval_split: taskOutputArtifact: outputArtifactKey: materialized_eval_split @@ -2873,6 +2896,11 @@ components: taskInfo: name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--apply_feature_selection_tuning: parameterType: BOOLEAN @@ -3481,6 +3509,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -3571,12 +3605,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an + + exception if used - use the "time_series_identifier_column" field - time series identifier column.' + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -3656,11 +3697,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -3695,6 +3750,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -5634,6 +5698,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -5686,6 +5755,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -6000,12 +6074,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -6228,12 +6310,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -6275,9 +6365,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6318,9 +6408,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6361,7 +6451,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -6373,7 +6463,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -6402,7 +6492,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -6414,7 +6504,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -6443,7 +6533,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6458,7 +6548,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -6467,7 +6557,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -6487,9 +6577,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -6909,7 +6999,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -6926,6 +7018,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -6940,8 +7033,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -6949,6 +7044,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -6978,8 +7074,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6995,7 +7091,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -7460,12 +7556,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -7487,12 +7585,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -7505,8 +7605,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -7549,7 +7649,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -7595,7 +7695,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-string-not-empty: container: args: @@ -7645,7 +7745,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7668,7 +7770,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-training-configurator-and-validator-2: container: args: @@ -7688,7 +7790,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -7711,7 +7815,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The AutoML Tabular pipeline v2. name: automl-tabular-v2 @@ -7761,6 +7865,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--apply_feature_selection_tuning: componentInputParameter: apply_feature_selection_tuning @@ -7918,6 +8025,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex Model to upload this model as a version to. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py index c1f753bd03..3655e1dcc4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -116,7 +116,7 @@ def tabular_feature_ranking_and_selection( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["feature_selection", "--data_source=', data_source.uri, '", "--target_column=', @@ -153,7 +153,7 @@ def tabular_feature_ranking_and_selection( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py index 4f93bbf285..634e9b4184 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -17,9 +17,6 @@ from typing import Optional from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Dataset -from kfp.dsl import Output @dsl.container_component @@ -27,22 +24,23 @@ def feature_transform_engine( root_dir: str, project: str, location: str, - dataset_stats: Output[Artifact], - materialized_data: Output[Dataset], - transform_output: Output[Artifact], + dataset_stats: dsl.Output[dsl.Artifact], + materialized_data: dsl.Output[dsl.Dataset], + transform_output: dsl.Output[dsl.Artifact], split_example_counts: dsl.OutputPath(str), - instance_schema: Output[Artifact], - training_schema: Output[Artifact], + instance_schema: dsl.Output[dsl.Artifact], + training_schema: dsl.Output[dsl.Artifact], bigquery_train_split_uri: dsl.OutputPath(str), bigquery_validation_split_uri: dsl.OutputPath(str), bigquery_test_split_uri: dsl.OutputPath(str), bigquery_downsampled_test_split_uri: dsl.OutputPath(str), - feature_ranking: Output[Artifact], + feature_ranking: dsl.Output[dsl.Artifact], gcp_resources: dsl.OutputPath(str), dataset_level_custom_transformation_definitions: Optional[list] = [], dataset_level_transformations: Optional[list] = [], forecasting_time_column: Optional[str] = '', - forecasting_time_series_identifier_column: Optional[str] = '', + forecasting_time_series_identifier_column: Optional[str] = None, + forecasting_time_series_identifier_columns: Optional[list] = [], forecasting_time_series_attribute_columns: Optional[list] = [], forecasting_unavailable_at_forecast_columns: Optional[list] = [], forecasting_available_at_forecast_columns: Optional[list] = [], @@ -59,6 +57,7 @@ def feature_transform_engine( training_fraction: Optional[float] = -1, validation_fraction: Optional[float] = -1, test_fraction: Optional[float] = -1, + stats_gen_execution_engine: Optional[str] = 'dataflow', tf_transform_execution_engine: Optional[str] = 'dataflow', tf_auto_transform_features: Optional[dict] = {}, tf_custom_transformation_definitions: Optional[list] = [], @@ -68,11 +67,14 @@ def feature_transform_engine( weight_column: Optional[str] = '', prediction_type: Optional[str] = '', model_type: Optional[str] = None, - multimodal_image_columns: Optional[list] = [], + multimodal_tabular_columns: Optional[list] = [], + multimodal_timeseries_columns: Optional[list] = [], multimodal_text_columns: Optional[list] = [], + multimodal_image_columns: Optional[list] = [], run_distill: Optional[bool] = False, run_feature_selection: Optional[bool] = False, feature_selection_algorithm: Optional[str] = 'AMI', + feature_selection_execution_engine: Optional[str] = 'dataflow', materialized_examples_format: Optional[str] = 'tfrecords_gzip', max_selected_features: Optional[int] = 1000, data_source_csv_filenames: Optional[str] = '', @@ -189,8 +191,12 @@ def feature_transform_engine( our target_column. Must be one of * 'DAY' * 'WEEK' output_column: Name of our output feature. forecasting_time_column: Forecasting time column. - forecasting_time_series_identifier_column: Forecasting - time series identifier column. + forecasting_time_series_identifier_column: + [Deprecated] A forecasting time series identifier column. Raises an + exception if used - use the "time_series_identifier_column" field + instead. + forecasting_time_series_identifier_columns: + The list of forecasting time series identifier columns. forecasting_time_series_attribute_columns: Forecasting time series attribute columns. forecasting_unavailable_at_forecast_columns: Forecasting @@ -229,6 +235,9 @@ def feature_transform_engine( training_fraction: Fraction of input data for training. validation_fraction: Fraction of input data for validation. test_fraction: Fraction of input data for testing. + stats_gen_execution_engine: Execution engine to perform + statistics generation. Can be one of: "dataflow" (by default) or + "bigquery". Using "bigquery" as the execution engine is experimental. tf_transform_execution_engine: Execution engine to perform row-level TF transformations. Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the execution engine is experimental and @@ -541,6 +550,7 @@ def feature_transform_engine( IEEE Transactions on pattern analysis and machine intelligence 27, no. 8: 1226-1238. + feature_selection_execution_engine: Execution engine to run feature selection, value can be dataflow, bigquery. materialized_examples_format: The format to use for the materialized examples. Should be either 'tfrecords_gzip' (default) or 'parquet'. @@ -568,9 +578,13 @@ def feature_transform_engine( model_type: Model type, which we wish to engineer features for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults to the empty value, `None`. - multimodal_image_columns: List of multimodal image - columns. Defaults to an empty list. + multimodal_tabular_columns: List of multimodal tabular + columns. Defaults to an empty list + multimodal_timeseries_columns: List of multimodal timeseries + columns. Defaults to an empty list multimodal_text_columns: List of multimodal text + columns. Defaults to an empty list + multimodal_image_columns: List of multimodal image columns. Defaults to an empty list. dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. @@ -626,7 +640,7 @@ def feature_transform_engine( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', command=[], args=[ 'feature_transform_engine', @@ -647,10 +661,20 @@ def feature_transform_engine( dsl.ConcatPlaceholder( items=['--forecasting_time_column=', forecasting_time_column] ), + dsl.IfPresentPlaceholder( + # Singular time series ID backwards support. + input_name='forecasting_time_series_identifier_column', + then=dsl.ConcatPlaceholder( + items=[ + '--forecasting_time_series_identifier_column=', + forecasting_time_series_identifier_column, + ] + ), + ), dsl.ConcatPlaceholder( items=[ - '--forecasting_time_series_identifier_column=', - forecasting_time_series_identifier_column, + '--forecasting_time_series_identifier_columns=', + forecasting_time_series_identifier_columns, ] ), dsl.ConcatPlaceholder( @@ -729,6 +753,12 @@ def feature_transform_engine( items=['--validation_fraction=', validation_fraction] ), dsl.ConcatPlaceholder(items=['--test_fraction=', test_fraction]), + dsl.ConcatPlaceholder( + items=[ + '--stats_gen_execution_engine=', + stats_gen_execution_engine, + ] + ), dsl.ConcatPlaceholder( items=[ '--tf_transform_execution_engine=', @@ -783,8 +813,14 @@ def feature_transform_engine( ), dsl.ConcatPlaceholder( items=[ - '--multimodal_image_columns=', - multimodal_image_columns, + '--multimodal_tabular_columns=', + multimodal_tabular_columns, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_timeseries_columns=', + multimodal_timeseries_columns, ] ), dsl.ConcatPlaceholder( @@ -793,6 +829,12 @@ def feature_transform_engine( multimodal_text_columns, ] ), + dsl.ConcatPlaceholder( + items=[ + '--multimodal_image_columns=', + multimodal_image_columns, + ] + ), dsl.ConcatPlaceholder(items=['--run_distill=', run_distill]), dsl.ConcatPlaceholder( items=['--run_feature_selection=', run_feature_selection] @@ -819,6 +861,12 @@ def feature_transform_engine( feature_selection_algorithm, ] ), + dsl.ConcatPlaceholder( + items=[ + '--feature_selection_execution_engine=', + feature_selection_execution_engine, + ] + ), dsl.ConcatPlaceholder( items=['--feature_ranking_path=', feature_ranking.uri] ), @@ -921,8 +969,8 @@ def feature_transform_engine( dsl.ConcatPlaceholder( items=['--dataflow_machine_type=', dataflow_machine_type] ), - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', - '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', + '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', dsl.ConcatPlaceholder( items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] ), diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py index a9b09479a8..c749dd2f61 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -181,7 +181,7 @@ def tabnet_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -189,7 +189,7 @@ def tabnet_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml index e687acd6bf..59208869c2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -31,6 +31,7 @@ # model_description: str [Default: ''] # model_display_name: str [Default: ''] # parallel_trial_count: int +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -528,6 +529,8 @@ components: - get-best-hyperparameter-tuning-job-trial inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -698,6 +701,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--bigquery_staging_full_dataset_id: parameterType: STRING @@ -1010,6 +1018,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1100,12 +1114,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1185,11 +1206,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1224,6 +1259,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2916,12 +2960,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier - forecasting only.' + column. Used by forecasting only. Raises exception if used - + + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -2963,7 +3015,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2978,7 +3030,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3019,7 +3071,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -3036,6 +3090,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -3050,8 +3105,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -3059,6 +3116,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -3088,8 +3146,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3105,7 +3163,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3121,8 +3179,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3179,7 +3237,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-tabnet-study-spec-parameters: container: args: @@ -3695,7 +3753,7 @@ deploymentSpec: \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ \ {extra_override_str} were not found in the params and '\n 'will\ \ be ignored.'\n )\n\n return study_spec_parameters\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-model-batch-predict: container: args: @@ -3949,8 +4007,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3993,7 +4051,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -4039,7 +4097,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-tabnet-hyperparameter-tuning-job: container: args: @@ -4067,11 +4125,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", @@ -4115,7 +4173,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -4138,7 +4198,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. name: automl-tabular-tabnet-hyperparameter-tuning-job @@ -4176,6 +4236,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--bigquery_staging_full_dataset_id: componentInputParameter: bigquery_staging_full_dataset_id @@ -4319,6 +4382,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py index e0ceeb08f9..53956587d9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -204,7 +204,7 @@ def tabnet_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -212,7 +212,7 @@ def tabnet_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml index 32f5b41c9e..f0133e6d52 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -50,6 +50,7 @@ # num_transformer_layers: int [Default: 4.0] # num_transformer_layers_ratio: float [Default: 0.25] # optimization_metric: str [Default: ''] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -490,6 +491,8 @@ components: - tabnet-trainer inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -691,6 +694,11 @@ components: taskInfo: name: training-configurator-and-validator inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--alpha_focal_loss: parameterType: NUMBER_DOUBLE @@ -1037,6 +1045,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1127,12 +1141,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1212,11 +1233,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1251,6 +1286,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2983,12 +3027,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -3030,7 +3082,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3045,7 +3097,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3086,7 +3138,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -3103,6 +3157,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -3117,8 +3172,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -3126,6 +3183,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -3155,8 +3213,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3172,7 +3230,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3429,8 +3487,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3473,7 +3531,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -3519,7 +3577,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-tabnet-trainer: container: args: @@ -3537,11 +3595,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230619_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -3603,7 +3661,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -3626,7 +3686,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 pipelineInfo: description: The TabNet training pipeline. name: automl-tabular-tabnet-trainer @@ -3664,6 +3724,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--alpha_focal_loss: componentInputParameter: alpha_focal_loss @@ -3841,6 +3904,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py index 6f76075d48..caa5ed2ab1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -181,7 +181,7 @@ def wide_and_deep_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -189,7 +189,7 @@ def wide_and_deep_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml index f6c3308c7f..42683860a8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -31,6 +31,7 @@ # model_description: str [Default: ''] # model_display_name: str [Default: ''] # parallel_trial_count: int +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -480,6 +481,8 @@ components: - get-best-hyperparameter-tuning-job-trial inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -650,6 +653,11 @@ components: taskInfo: name: wide-and-deep-hyperparameter-tuning-job inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--bigquery_staging_full_dataset_id: parameterType: STRING @@ -962,6 +970,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1052,12 +1066,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1137,11 +1158,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1176,6 +1211,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2552,12 +2596,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier - forecasting only.' + column. Used by forecasting only. Raises exception if used - + + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -2796,7 +2848,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2811,7 +2863,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -2852,7 +2904,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -2869,6 +2923,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -2883,8 +2938,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -2892,6 +2949,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -2921,8 +2979,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2938,7 +2996,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -2954,8 +3012,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3012,7 +3070,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-wide-and-deep-study-spec-parameters: container: args: @@ -3306,8 +3364,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3350,7 +3408,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -3396,7 +3454,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -3416,7 +3474,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -3439,7 +3499,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-wide-and-deep-hyperparameter-tuning-job: container: args: @@ -3467,11 +3527,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", @@ -3533,6 +3593,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--bigquery_staging_full_dataset_id: componentInputParameter: bigquery_staging_full_dataset_id @@ -3676,6 +3739,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py index 19eaddb481..bc4b5d00fb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -192,7 +192,7 @@ def wide_and_deep_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -200,7 +200,7 @@ def wide_and_deep_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml index 748711a0dd..06e25c73d9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -50,6 +50,7 @@ # model_display_name: str [Default: ''] # optimization_metric: str [Default: ''] # optimizer_type: str [Default: 'adam'] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -451,6 +452,8 @@ components: - wide-and-deep-trainer inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -648,6 +651,11 @@ components: taskInfo: name: wide-and-deep-trainer inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--batch_size: parameterType: NUMBER_INTEGER @@ -990,6 +998,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1080,12 +1094,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1165,11 +1186,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1204,6 +1239,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2520,12 +2564,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -2850,7 +2902,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2865,7 +2917,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -2906,7 +2958,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -2923,6 +2977,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -2937,8 +2992,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -2946,6 +3003,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -2975,8 +3033,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2992,7 +3050,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3205,8 +3263,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3249,7 +3307,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -3295,7 +3353,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -3315,7 +3373,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -3338,7 +3398,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-wide-and-deep-trainer: container: args: @@ -3356,11 +3416,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230619_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -3436,6 +3496,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--batch_size: componentInputParameter: batch_size @@ -3609,6 +3672,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml index 8c3017aa09..13071125a4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -29,6 +29,7 @@ # model_display_name: str [Default: ''] # objective: str # parallel_trial_count: int +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # project: str # root_dir: str @@ -524,6 +525,8 @@ components: - get-best-hyperparameter-tuning-job-trial inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -636,6 +639,11 @@ components: taskInfo: name: xgboost-hyperparameter-tuning-job inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--bigquery_staging_full_dataset_id: parameterType: STRING @@ -948,6 +956,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1038,12 +1052,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1123,11 +1144,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1162,6 +1197,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2638,12 +2682,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier - forecasting only.' + column. Used by forecasting only. Raises exception if used - + + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -2771,7 +2823,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2818,7 +2870,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -2835,6 +2889,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -2849,8 +2904,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -2858,6 +2915,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -2887,8 +2945,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2904,7 +2962,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -2974,7 +3032,7 @@ deploymentSpec: \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ - \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230817_0125',\n\ \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ @@ -2987,7 +3045,7 @@ deploymentSpec: \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ - \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325',\n\ + \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230817_0125',\n\ \ ],\n },\n }\n\n # Add optional arguments if set\n if\ \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ @@ -3019,8 +3077,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'tensorflow==2.8.0' 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3077,7 +3135,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-prediction-type-for-xgboost: container: args: @@ -3650,8 +3708,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3694,7 +3752,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -3740,7 +3798,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -3760,7 +3818,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -3783,7 +3843,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-xgboost-hyperparameter-tuning-job: container: args: @@ -3851,6 +3911,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--bigquery_staging_full_dataset_id: componentInputParameter: bigquery_staging_full_dataset_id @@ -3994,6 +4057,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml index 0fc86f8c67..185af0e76a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -49,6 +49,7 @@ # num_parallel_tree: int [Default: 1.0] # objective: str # one_drop: int [Default: 0.0] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # process_type: str [Default: 'default'] # project: str @@ -585,6 +586,8 @@ components: - xgboost-trainer inputs: artifacts: + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -678,6 +681,11 @@ components: taskInfo: name: xgboost-trainer inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--base_score: parameterType: NUMBER_DOUBLE @@ -1048,6 +1056,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1138,12 +1152,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1223,11 +1244,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1262,6 +1297,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2902,12 +2946,20 @@ components: isOptional: true parameterType: LIST time_series_identifier_column: - defaultValue: '' - description: 'Time series idenfier column. Used by + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - - forecasting only.' + use the "time_series_identifier_column" field instead.' isOptional: true parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST unavailable_at_forecast_columns: defaultValue: [] description: 'The names of the columns that are @@ -2974,7 +3026,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3021,7 +3073,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -3038,6 +3092,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -3052,8 +3107,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -3061,6 +3118,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -3090,8 +3148,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3107,7 +3165,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3233,10 +3291,10 @@ deploymentSpec: \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ - \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230619_1325'\n\ + \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230817_0125'\n\ \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ - \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230619_1325',\n\ + \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230817_0125',\n\ \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ @@ -3484,8 +3542,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3528,7 +3586,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-split-materialized-data: container: args: @@ -3574,7 +3632,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 exec-training-configurator-and-validator: container: args: @@ -3594,7 +3652,9 @@ deploymentSpec: - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' - - '{"Concat": ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' @@ -3617,7 +3677,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-xgboost-trainer: container: args: @@ -3677,6 +3737,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--base_score: componentInputParameter: base_score @@ -3878,6 +3941,12 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Parent model if this model is uploaded as a version. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml index 14c7dd13b2..2a9cb6156a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml @@ -633,8 +633,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -655,7 +655,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-create-dataset-2: container: args: @@ -668,8 +668,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -690,7 +690,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -703,8 +703,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -724,7 +724,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-query-job: container: args: @@ -785,7 +785,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-first-valid: container: args: @@ -815,7 +815,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-model-metadata: container: args: @@ -828,8 +828,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -854,7 +854,7 @@ deploymentSpec: \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ \ options.time_series_id_column,\n options.time_series_data_column,\n\ \ options.horizon,\n )\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-table-location: container: args: @@ -867,8 +867,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -890,7 +890,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-load-table-from-uri: container: args: @@ -903,8 +903,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -931,7 +931,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-maybe-replace-with-default: container: args: @@ -959,7 +959,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-validate-inputs: container: args: @@ -1061,7 +1061,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 pipelineInfo: description: Forecasts using a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-prediction diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml index 5ccd0fc5be..6c1832bafe 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -1766,6 +1766,7 @@ components: name: comp-feature-transform-engine dependentTasks: - bigquery-create-dataset-2 + - wrapped-in-list inputs: parameters: autodetect_csv_schema: @@ -1790,8 +1791,10 @@ components: componentInputParameter: pipelinechannel--window_column forecasting_time_column: componentInputParameter: pipelinechannel--time_column - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + taskOutputParameter: + outputParameterKey: Output + producerTask: wrapped-in-list forecasting_window_max_count: componentInputParameter: pipelinechannel--window_max_count forecasting_window_stride_length: @@ -1926,6 +1929,17 @@ components: componentInputParameter: pipelinechannel--window_stride_length taskInfo: name: validate-inputs + wrapped-in-list: + cachingOptions: + enableCache: true + componentRef: + name: comp-wrapped-in-list + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--time_series_identifier_column + taskInfo: + name: wrapped-in-list inputDefinitions: parameters: pipelinechannel--bigquery_destination_uri: @@ -2175,6 +2189,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -2265,12 +2285,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an + + exception if used - use the "time_series_identifier_column" field - time series identifier column.' + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -2350,11 +2377,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -2389,6 +2430,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -3425,6 +3475,16 @@ components: window_stride_length: isOptional: true parameterType: NUMBER_INTEGER + comp-wrapped-in-list: + executorLabel: exec-wrapped-in-list + inputDefinitions: + parameters: + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST deploymentSpec: executors: exec-bigquery-create-dataset: @@ -3439,8 +3499,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3461,7 +3521,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-create-dataset-2: container: args: @@ -3474,8 +3534,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3496,7 +3556,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-create-model-job: container: args: @@ -3535,8 +3595,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3556,7 +3616,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-list-rows: container: args: @@ -3569,8 +3629,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3594,7 +3654,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-list-rows-2: container: args: @@ -3607,8 +3667,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -3632,7 +3692,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-query-job: container: args: @@ -3801,7 +3861,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-2: container: args: @@ -3835,7 +3895,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-3: container: args: @@ -3869,7 +3929,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-4: container: args: @@ -3903,7 +3963,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-5: container: args: @@ -3937,7 +3997,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-6: container: args: @@ -3971,7 +4031,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-serialized-query-parameters: container: args: @@ -4048,7 +4108,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-serialized-query-parameters-2: container: args: @@ -4125,7 +4185,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-serialized-query-parameters-3: container: args: @@ -4202,7 +4262,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-cond: container: args: @@ -4230,7 +4290,7 @@ deploymentSpec: \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ \ return true_str if predicate else false_str\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-create-metrics-artifact: container: args: @@ -4257,13 +4317,12 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef create_metrics_artifact(\n metrics_rows: List[Dict[str, str]],\n\ \ evaluation_metrics: dsl.Output[dsl.Metrics],\n) -> None:\n \"\"\"\ - Converts the rows of a metrics table into an Artifact.\"\"\"\n # Use the\ - \ Vertex Eval component's Metrics metadata naming from\n\ - \ metric_name_map = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE':\ - \ 'rootMeanSquaredError',\n 'MAPE': 'meanAbsolutePercentageError',\n\ - \ }\n metrics = {metric_name_map[k]: v for k, v in dict(metrics_rows[0]).items()}\n\ - \ evaluation_metrics.metadata = metrics\n\n" - image: python:3.7-slim + Converts the rows of a metrics table into an Artifact.\"\"\"\n metric_name_map\ + \ = {\n 'MAE': 'meanAbsoluteError',\n 'RMSE': 'rootMeanSquaredError',\n\ + \ 'MAPE': 'meanAbsolutePercentageError',\n }\n metrics = {metric_name_map[k]:\ + \ v for k, v in dict(metrics_rows[0]).items()}\n evaluation_metrics.metadata\ + \ = metrics\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-feature-transform-engine: container: args: @@ -4273,7 +4332,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -4290,6 +4351,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -4304,8 +4366,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -4313,6 +4377,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -4342,8 +4407,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -4359,7 +4424,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-get-fte-suffix: container: args: @@ -4372,8 +4437,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -4393,7 +4458,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-table-location: container: args: @@ -4406,8 +4471,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -4429,7 +4494,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-value: container: args: @@ -4456,7 +4521,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-window-query-priority: container: args: @@ -4486,7 +4551,7 @@ deploymentSpec: \ depending on the window number.\"\"\"\n if int(window['window_number'])\ \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-maybe-replace-with-default: container: args: @@ -4514,7 +4579,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-query-with-retry: container: args: @@ -4527,8 +4592,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -4568,7 +4633,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-query-with-retry-2: container: args: @@ -4581,8 +4646,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -4622,7 +4687,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-query-with-retry-3: container: args: @@ -4635,8 +4700,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -4676,7 +4741,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri: container: args: @@ -4712,7 +4777,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -4748,7 +4813,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-validate-inputs: container: args: @@ -4850,7 +4915,34 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + exec-wrapped-in-list: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - wrapped_in_list + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ + \ in a list.\"\"\"\n return [value]\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 pipelineInfo: description: Trains a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-train diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml index 6cdb273900..843c8412d1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -1420,8 +1420,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -1442,7 +1442,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -1455,8 +1455,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -1476,7 +1476,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-query-job: container: args: @@ -1564,7 +1564,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-build-job-configuration-query-2: container: args: @@ -1598,7 +1598,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-first-valid: container: args: @@ -1628,7 +1628,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-table-location: container: args: @@ -1641,8 +1641,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -1664,7 +1664,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-table-location-2: container: args: @@ -1677,8 +1677,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -1700,7 +1700,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-load-table-from-uri: container: args: @@ -1713,8 +1713,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -1741,7 +1741,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-make-vertex-model-artifact: container: args: @@ -1771,7 +1771,7 @@ deploymentSpec: Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ \ f'/v1/{model_resource_name}')\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-maybe-replace-with-default: container: args: @@ -1799,7 +1799,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-model-batch-predict: container: args: @@ -1884,7 +1884,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-table-to-uri-2: container: args: @@ -1920,7 +1920,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-validate-inputs: container: args: @@ -2022,7 +2022,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 pipelineInfo: description: Creates a batch prediction using a Prophet model. name: prophet-predict diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py index 7c3bb6111b..5961bce1cc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -131,15 +131,17 @@ def prophet_trainer( '"machine_spec": {"machine_type": "n1-standard-4"}, ', ( '"container_spec":' - ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", ' + ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", ' ), '"args": ["prophet_trainer", "', - f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "', ( - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", "' + f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "' ), ( - '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325", "' + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "' + ), + ( + '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230817_0125", "' ), '--artifacts_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml index 2fadb6830e..a89f24fe42 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -437,6 +437,7 @@ components: name: comp-feature-transform-engine dependentTasks: - bigquery-create-dataset + - wrapped-in-list inputs: parameters: autodetect_csv_schema: @@ -461,8 +462,10 @@ components: componentInputParameter: pipelinechannel--window_column forecasting_time_column: componentInputParameter: pipelinechannel--time_column - forecasting_time_series_identifier_column: - componentInputParameter: pipelinechannel--time_series_identifier_column + forecasting_time_series_identifier_columns: + taskOutputParameter: + outputParameterKey: Output + producerTask: wrapped-in-list forecasting_window_max_count: componentInputParameter: pipelinechannel--window_max_count forecasting_window_stride_length: @@ -689,6 +692,17 @@ components: componentInputParameter: pipelinechannel--window_stride_length taskInfo: name: validate-inputs + wrapped-in-list: + cachingOptions: + enableCache: true + componentRef: + name: comp-wrapped-in-list + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--time_series_identifier_column + taskInfo: + name: wrapped-in-list inputDefinitions: parameters: pipelinechannel--data_granularity_unit: @@ -948,6 +962,12 @@ components: \ 27, no.\n 8: 1226-1238." isOptional: true parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING forecasting_apply_windowing: defaultValue: true description: Whether to apply window strategy. @@ -1038,12 +1058,19 @@ components: isOptional: true parameterType: LIST forecasting_time_series_identifier_column: - defaultValue: '' - description: 'Forecasting + description: '[Deprecated] A forecasting time series identifier column. + Raises an - time series identifier column.' + exception if used - use the "time_series_identifier_column" field + + instead.' isOptional: true parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST forecasting_unavailable_at_forecast_columns: defaultValue: [] description: 'Forecasting @@ -1123,11 +1150,25 @@ components: columns. Defaults to an empty list.' isOptional: true parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST multimodal_text_columns: defaultValue: [] description: 'List of multimodal text - columns. Defaults to an empty list.' + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' isOptional: true parameterType: LIST predefined_split_key: @@ -1162,6 +1203,15 @@ components: should be applied to the dataset.' isOptional: true parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING stratified_split_key: defaultValue: '' description: Stratified split key. @@ -2102,6 +2152,16 @@ components: window_stride_length: isOptional: true parameterType: NUMBER_INTEGER + comp-wrapped-in-list: + executorLabel: exec-wrapped-in-list + inputDefinitions: + parameters: + value: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST deploymentSpec: executors: exec-bigquery-create-dataset: @@ -2116,8 +2176,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -2138,7 +2198,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -2151,8 +2211,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -2172,7 +2232,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-bigquery-query-job: container: args: @@ -2233,7 +2293,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-feature-transform-engine: container: args: @@ -2243,7 +2303,9 @@ deploymentSpec: - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' - - '{"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' @@ -2260,6 +2322,7 @@ deploymentSpec: - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' @@ -2274,8 +2337,10 @@ deploymentSpec: - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", "{{$.inputs.parameters[''model_type'']}}"]}}}' - - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' @@ -2283,6 +2348,7 @@ deploymentSpec: - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' @@ -2312,8 +2378,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2329,7 +2395,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 exec-get-fte-suffix: container: args: @@ -2342,8 +2408,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -2363,7 +2429,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-get-table-location: container: args: @@ -2376,8 +2442,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.34.4'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -2399,7 +2465,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-model-evaluation-regression: container: args: @@ -2508,10 +2574,10 @@ deploymentSpec: ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": - {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325\", + {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125\", ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", - \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325\", - \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230619_1325\", + \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125\", + \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230817_0125\", \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", @@ -2575,7 +2641,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-validate-inputs: container: args: @@ -2677,7 +2743,34 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + exec-wrapped-in-list: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - wrapped_in_list + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ + \ in a list.\"\"\"\n return [value]\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 pipelineInfo: description: Trains one Prophet model per time series. name: prophet-train diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml index 3c4fbb6d46..b251e1779e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -33,6 +33,7 @@ # optimization_objective: str # optimization_objective_precision_value: float [Default: -1.0] # optimization_objective_recall_value: float [Default: -1.0] +# parent_model: system.Artifact # predefined_split_key: str [Default: ''] # prediction_type: str # project: str @@ -1501,6 +1502,8 @@ components: taskOutputArtifact: outputArtifactKey: explanation_metadata_artifact producerTask: automl-tabular-ensemble + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: taskOutputArtifact: outputArtifactKey: unmanaged_container_model @@ -1532,6 +1535,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--tabular-stats-and-example-gen-dataset_schema: artifactType: schemaTitle: system.Artifact @@ -2150,6 +2157,8 @@ components: taskOutputArtifact: outputArtifactKey: unmanaged_container_model producerTask: automl-tabular-ensemble-2 + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: taskOutputParameter: @@ -2350,6 +2359,10 @@ components: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 pipelinechannel--tabular-stats-and-example-gen-dataset_schema: artifactType: schemaTitle: system.Artifact @@ -2570,6 +2583,8 @@ components: artifacts: explanation_metadata_artifact: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact + parent_model: + componentInputArtifact: pipelinechannel--parent_model unmanaged_container_model: componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model parameters: @@ -2597,6 +2612,10 @@ components: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: parameterType: STRUCT @@ -3876,6 +3895,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--tabular-stats-and-example-gen-dataset_schema: taskOutputArtifact: outputArtifactKey: dataset_schema @@ -3998,6 +4019,8 @@ components: taskOutputArtifact: outputArtifactKey: splits producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model pipelinechannel--tabular-stats-and-example-gen-dataset_schema: taskOutputArtifact: outputArtifactKey: dataset_schema @@ -4219,6 +4242,11 @@ components: taskInfo: name: tabular-stats-and-example-gen inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 parameters: pipelinechannel--additional_experiments: parameterType: STRUCT @@ -8029,6 +8057,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8081,6 +8114,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8133,6 +8171,11 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel @@ -8538,9 +8581,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8581,9 +8624,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8624,7 +8667,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8636,7 +8679,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8665,7 +8708,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8677,7 +8720,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8706,7 +8749,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8718,7 +8761,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8747,7 +8790,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -8762,7 +8805,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8771,7 +8814,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8780,7 +8823,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8800,9 +8843,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8847,9 +8890,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8894,7 +8937,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -8915,7 +8958,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -8946,7 +8989,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -8967,7 +9010,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -10174,12 +10217,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-2: container: args: @@ -10201,12 +10246,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-model-upload-3: container: args: @@ -10228,12 +10275,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-read-input-uri: container: args: @@ -10314,8 +10363,8 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'google-cloud-aiplatform==1.24.1'\ - \ 'kfp==2.0.0-beta.17' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -10358,7 +10407,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: python:3.7-slim + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 exec-string-not-empty: container: args: @@ -10405,7 +10454,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10438,7 +10487,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", @@ -10593,6 +10642,9 @@ root: dependentTasks: - set-optional-inputs inputs: + artifacts: + pipelinechannel--parent_model: + componentInputArtifact: parent_model parameters: pipelinechannel--additional_experiments: componentInputParameter: additional_experiments @@ -10744,11 +10796,17 @@ root: name: set-optional-inputs inputDefinitions: artifacts: + parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Vertex Model to upload this model as a version of. + isOptional: true vertex_dataset: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: The Vertex dataset artifact, + description: The Vertex dataset artifact. parameters: additional_experiments: description: Use this field to config private preview features. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py index 716d6f1ba4..448f1187a9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -105,11 +105,11 @@ def automl_tabular_cv_trainer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["l2l_cv_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', ( f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' ' "--training_base_dir=' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py index 1afdbfa157..6e7f0eaeeb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -113,7 +113,7 @@ def automl_tabular_ensemble( ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["ensemble", "--transform_output_path=', transform_output.uri, '", "--model_output_path=', @@ -144,7 +144,7 @@ def automl_tabular_ensemble( '", "--warmup_data=', warmup_data.uri, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', '", "--model_path=', model.uri, '", "--custom_model_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py index ea36d7d297..350da2b07b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -73,7 +73,7 @@ def automl_tabular_finalizer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["cancel_l2l_tuner", "--error_file_path=', root_dir, ( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py index 8fc6b00ec9..ca147dd5fa 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -33,7 +33,7 @@ def automl_tabular_infra_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230619_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', command=[], args=['--executor_input', '{{$}}'], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py index 29091ded20..b0175154b6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -52,7 +52,7 @@ def split_materialized_data( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', command=[ 'sh', '-ec', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py index 095837620d..8e0c9a7c60 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -122,11 +122,11 @@ def automl_tabular_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "--feature_selection_result_path=', feature_ranking.uri, '", "--disable_early_stopping=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py index 6c7e915dbe..6013e0d8d1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -174,7 +174,7 @@ def tabular_stats_and_example_gen( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', '", "args": ["stats_generator",', '"--train_spec={\\"prediction_type\\": \\"', prediction_type, @@ -253,7 +253,7 @@ def tabular_stats_and_example_gen( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py index d4ff9c5473..7c42727ac1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -17,19 +17,16 @@ from typing import Optional from kfp import dsl -from kfp.dsl import Artifact -from kfp.dsl import Input -from kfp.dsl import Output @dsl.container_component def training_configurator_and_validator( - dataset_stats: Input[Artifact], + dataset_stats: dsl.Input[dsl.Artifact], split_example_counts: str, - training_schema: Input[Artifact], - instance_schema: Input[Artifact], - metadata: Output[Artifact], - instance_baseline: Output[Artifact], + training_schema: dsl.Input[dsl.Artifact], + instance_schema: dsl.Input[dsl.Artifact], + metadata: dsl.Output[dsl.Artifact], + instance_baseline: dsl.Output[dsl.Artifact], target_column: Optional[str] = '', weight_column: Optional[str] = '', prediction_type: Optional[str] = '', @@ -39,7 +36,8 @@ def training_configurator_and_validator( run_evaluation: Optional[bool] = False, run_distill: Optional[bool] = False, enable_probabilistic_inference: Optional[bool] = False, - time_series_identifier_column: Optional[str] = '', + time_series_identifier_column: Optional[str] = None, + time_series_identifier_columns: Optional[list] = [], time_column: Optional[str] = '', time_series_attribute_columns: Optional[list] = [], available_at_forecast_columns: Optional[list] = [], @@ -106,8 +104,11 @@ def training_configurator_and_validator( For example, the mean of a predictive distribution is the point prediction that minimizes RMSE loss. If quantiles are specified, then the quantiles of the distribution are also returned. - time_series_identifier_column: Time series idenfier column. Used by - forecasting only. + time_series_identifier_column: [Deprecated] The time series identifier + column. Used by forecasting only. Raises exception if used - + use the "time_series_identifier_column" field instead. + time_series_identifier_columns: The list of time series identifier columns. + Used by forecasting only. time_column: The column that indicates the time. Used by forecasting only. time_series_attribute_columns: The column names of the time series @@ -143,7 +144,7 @@ def training_configurator_and_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230619_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', command=[], args=[ 'training_configurator_and_validator', @@ -189,10 +190,20 @@ def training_configurator_and_validator( enable_probabilistic_inference, ] ), + dsl.IfPresentPlaceholder( + # Singular time series ID backwards support. + input_name='time_series_identifier_column', + then=dsl.ConcatPlaceholder( + items=[ + '--time_series_identifier_column=', + time_series_identifier_column, + ] + ), + ), dsl.ConcatPlaceholder( items=[ - '--time_series_identifier_column=', - time_series_identifier_column, + '--time_series_identifier_columns=', + time_series_identifier_columns, ] ), dsl.ConcatPlaceholder(items=['--time_column=', time_column]), diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py index c9ab7ef401..af5542192d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -116,7 +116,7 @@ def automl_tabular_transform( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230619_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', ( '", "args": ["transform", "--is_mp=true",' ' "--transform_output_artifact_path=' @@ -175,7 +175,7 @@ def automl_tabular_transform( '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230619_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', '", "--dataflow_disk_size_gb=', dataflow_disk_size_gb, '", "--dataflow_subnetwork_fully_qualified=', From f323acf4eba80d9909fa23dfafff0ef8adcf05a9 Mon Sep 17 00:00:00 2001 From: Kevin Naughton Date: Fri, 18 Aug 2023 15:01:51 -0700 Subject: [PATCH 106/253] fix(components): Fix proto reference from range to _range in model evaluation preview utils function PiperOrigin-RevId: 558256414 --- .../preview/model_evaluation/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py index 5dbb96cd86..af413ef3ec 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py @@ -61,7 +61,7 @@ def create_slice_specs_list( ) elif isinstance(value, list): configs[feature] = ModelEvaluationSlice.Slice.SliceSpec.SliceConfig( - range=ModelEvaluationSlice.Slice.SliceSpec.Range( + range_=ModelEvaluationSlice.Slice.SliceSpec.Range( low=value[0], high=value[1] ) ) From c3b9550bf5ab608d1a71cf341fe3d68fbdd2d8b0 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 18 Aug 2023 18:10:29 -0700 Subject: [PATCH 107/253] chore(components): INTERNAL PiperOrigin-RevId: 558295863 --- .../preview/automl/tabular/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py index 764539056a..95f7aff748 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -14,6 +14,8 @@ """Preview AutoML tabular components.""" +import os + from google_cloud_pipeline_components.preview.automl.tabular.feature_selection import tabular_feature_ranking_and_selection as FeatureSelectionOp from google_cloud_pipeline_components.preview.automl.tabular.feature_transform_engine import feature_transform_engine as FeatureTransformEngineOp from google_cloud_pipeline_components.preview.automl.tabular.tabnet_hyperparameter_tuning_job import tabnet_hyperparameter_tuning_job as TabNetHyperparameterTuningJobOp @@ -22,6 +24,7 @@ from google_cloud_pipeline_components.preview.automl.tabular.wide_and_deep_trainer import wide_and_deep_trainer as WideAndDeepTrainerOp from google_cloud_pipeline_components.preview.automl.tabular.xgboost_hyperparameter_tuning_job import xgboost_hyperparameter_tuning_job as XGBoostHyperparameterTuningJobOp from google_cloud_pipeline_components.preview.automl.tabular.xgboost_trainer import xgboost_trainer as XGBoostTrainerOp +from kfp import components __all__ = [ 'FeatureSelectionOp', @@ -33,3 +36,9 @@ 'XGBoostHyperparameterTuningJobOp', 'XGBoostTrainerOp', ] + +tabnet_trainer_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join(os.path.dirname(__file__), 'tabnet_trainer_pipeline.yaml') +) From beb66abcf634890f4ce9013d696789e392df0521 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 18 Aug 2023 19:57:34 -0700 Subject: [PATCH 108/253] chore(components): INTERNAL PiperOrigin-RevId: 558309338 --- .../preview/automl/tabular/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py index 95f7aff748..02da0b3e29 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -42,3 +42,11 @@ # the generated file. os.path.join(os.path.dirname(__file__), 'tabnet_trainer_pipeline.yaml') ) + +wide_and_deep_trainer_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), 'wide_and_deep_trainer_pipeline.yaml' + ) +) From 611298a8ee68f406e09009debb909a44de0ae99e Mon Sep 17 00:00:00 2001 From: Googler Date: Sat, 19 Aug 2023 11:37:31 -0700 Subject: [PATCH 109/253] feat(components): Add rlhf and infer pipelines to preview/llm. Add llm related components to _implementation/llm PiperOrigin-RevId: 558430903 --- .../_implementation/llm/bulk_inferrer.py | 297 ++++++++++++++ .../_implementation/llm/deploy_llm_model.py | 145 +++++++ .../_implementation/llm/env.py | 40 ++ .../_implementation/llm/function_based.py | 373 ++++++++++++++++++ .../llm/private_text_comparison_importer.py | 86 ++++ .../llm/private_text_importer.py | 93 +++++ .../_implementation/llm/reinforcer.py | 116 ++++++ .../llm/reward_model_trainer.py | 104 +++++ .../llm/supervised_fine_tuner.py | 105 +++++ .../_implementation/llm/upload_llm_model.py | 128 ++++++ .../_implementation/llm/utils.py | 113 ++++++ .../preview/llm/__init__.py | 22 ++ .../preview/llm/infer_pipeline.py | 129 ++++++ .../preview/llm/rlhf_pipeline.py | 301 ++++++++++++++ 14 files changed, 2052 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/deploy_llm_model.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_trainer.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/supervised_fine_tuner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_llm_model.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py new file mode 100644 index 0000000000..4d605ff87b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py @@ -0,0 +1,297 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Container component that performs bulk inference.""" + +from typing import NamedTuple, Optional + +from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +import kfp + + +@kfp.dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def get_default_bulk_inference_machine_specs( + large_model_reference: str, + use_gpu_defaults: bool = False, + accelerator_type_override: Optional[str] = None, + accelerator_count_override: Optional[int] = None, +) -> NamedTuple( + 'MachineSpec', accelerator_type=str, accelerator_count=int, machine_type=str +): + """Gets default machine specs for bulk inference and overrides params if provided. + + Args: + large_model_reference: Foundational model to use for default specs. + use_gpu_defaults: Whether to get default gpu specs (otherwise will get TPU + specs). + accelerator_type_override: Accelerator type to override the default. + accelerator_count_override: Accelerator count to override the default. + + Returns: + MachineSpec, including accelerator_type, accelerator_count, machine_type. + + Raises: + ValueError: If large_model_reference is invalid or overridden values are + invalid. + """ + # pylint: disable=g-import-not-at-top,redefined-outer-name,reimported + import collections + # pylint: enable=g-import-not-at-top,redefined-outer-name,reimported + + machine_spec = collections.namedtuple( + 'MachineSpec', ['accelerator_type', 'accelerator_count', 'machine_type'] + ) + + # machine types + cloud_tpu = 'cloud-tpu' + ultra_gpu_1g = 'a2-ultragpu-1g' + ultra_gpu_2g = 'a2-ultragpu-2g' + ultra_gpu_4g = 'a2-ultragpu-4g' + ultra_gpu_8g = 'a2-ultragpu-8g' + high_gpu_1g = 'a2-highgpu-1g' + high_gpu_2g = 'a2-highgpu-2g' + high_gpu_4g = 'a2-highgpu-4g' + high_gpu_8g = 'a2-highgpu-8g' + mega_gpu_16g = 'a2-megagpu-16g' + + # accelerator types + tpu_v2 = 'TPU_V2' + tpu_v3 = 'TPU_V3' + nvidia_a100_40g = 'NVIDIA_TESLA_A100' + nvidia_a100_80g = 'NVIDIA_A100_80GB' + tpu_accelerator_types = frozenset([tpu_v2, tpu_v3]) + gpu_accelerator_types = frozenset([nvidia_a100_40g, nvidia_a100_80g]) + valid_accelerator_types = frozenset( + list(gpu_accelerator_types) + list(tpu_accelerator_types) + ) + + # base models + palm_tiny = 'PALM_TINY' + gecko = 'GECKO' + otter = 'OTTER' + bison = 'BISON' + elephant = 'ELEPHANT' + t5_small = 'T5_SMALL' + t5_large = 'T5_LARGE' + t5_xl = 'T5_XL' + t5_xxl = 'T5_XXL' + + def _get_machine_type(accelerator_type: str, accelerator_count: int) -> str: + if accelerator_count < 1: + raise ValueError('accelerator_count must be at least 1.') + + if accelerator_type in tpu_accelerator_types: + return cloud_tpu + + elif accelerator_type == nvidia_a100_40g: + if accelerator_count == 1: + return high_gpu_1g + + elif accelerator_count == 2: + return high_gpu_2g + + elif accelerator_count <= 4: + return high_gpu_4g + + elif accelerator_count <= 8: + return high_gpu_8g + + elif accelerator_count <= 16: + return mega_gpu_16g + + else: + raise ValueError( + f'Too many {accelerator_type} requested. Must be <= 16.' + ) + + elif accelerator_type == nvidia_a100_80g: + if accelerator_count == 1: + return ultra_gpu_1g + + elif accelerator_count == 2: + return ultra_gpu_2g + + elif accelerator_count <= 4: + return ultra_gpu_4g + + elif accelerator_count <= 8: + return ultra_gpu_8g + + else: + raise ValueError( + f'Too many {accelerator_type} requested. Must be <= 8.' + ) + + else: + raise ValueError( + 'accelerator_type_override must be one of' + f' {sorted(valid_accelerator_types)}.' + ) + + accepted_reference_models = frozenset( + [palm_tiny, gecko, otter, bison, elephant, t5_small, t5_xxl] + ) + + # Default GPU specs are based on study here: + # https://docs.google.com/spreadsheets/d/1_ZKqfyLQ5vYrOQH5kfdMb_OoNT48r6vNbqv3dKDxDTw/edit?resourcekey=0-3kgDrn4XDdvlJAc8Kils-Q#gid=255356424 + reference_model_to_model_specs_gpu = { + palm_tiny: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=1, + machine_type=high_gpu_1g, + ), + gecko: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=1, + machine_type=high_gpu_1g, + ), + otter: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=2, + machine_type=high_gpu_2g, + ), + bison: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=8, + machine_type=high_gpu_8g, + ), + elephant: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=8, + machine_type=high_gpu_8g, + ), + t5_small: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=1, + machine_type=high_gpu_1g, + ), + t5_large: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=1, + machine_type=high_gpu_1g, + ), + t5_xl: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=1, + machine_type=high_gpu_1g, + ), + t5_xxl: machine_spec( + accelerator_type=nvidia_a100_40g, + accelerator_count=2, + machine_type=high_gpu_2g, + ), + } + + # Get defaults + if large_model_reference not in accepted_reference_models: + raise ValueError( + 'large_model_reference must be one of' + f' {sorted(accepted_reference_models)}.' + ) + + if use_gpu_defaults: + default_machine_spec = reference_model_to_model_specs_gpu[ + large_model_reference + ] + + else: + # This is the only config available for TPUs in our shared reservation pool. + default_machine_spec = machine_spec( + accelerator_type=tpu_v3, + accelerator_count=32, + machine_type=cloud_tpu, + ) + + # Override default behavior we defer validations of these to the resource + # provisioner. + if any([accelerator_type_override, accelerator_count_override]): + if not all([accelerator_type_override, accelerator_count_override]): + raise ValueError('Accelerator type and count must both be set.') + accelerator_type = accelerator_type_override + accelerator_count = accelerator_count_override + else: + accelerator_type = default_machine_spec.accelerator_type + accelerator_count = default_machine_spec.accelerator_count + + return machine_spec( + accelerator_type, + accelerator_count, + _get_machine_type(accelerator_type, accelerator_count), + ) + + +@kfp.dsl.container_component +def BulkInferrer( # pylint: disable=invalid-name + project: str, + location: str, + inputs_sequence_length: int, + targets_sequence_length: int, + accelerator_type: str, + accelerator_count: int, + machine_type: str, + image_uri: str, + dataset_split: str, + large_model_reference: str, + input_model: str, + input_dataset_path: str, + output_prediction: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + output_prediction_gcs_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation +) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args + """Performs bulk inference. + + Args: + project: Project used to run the job. + location: Location used to run the job. + inputs_sequence_length: Maximum encoder/prefix length. Inputs will be padded + or truncated to this length. + targets_sequence_length: Maximum decoder steps. Outputs will be at most this + length. + accelerator_type: Type of accelerator. + accelerator_count: Number of accelerators. + machine_type: Type of machine. + image: Location of reward model Docker image. + input_model: Model to use for inference. + large_model_reference: Predefined model used to create the ``input_model``. + input_dataset_path: Path to dataset to use for inference. + dataset_split: Perform inference on this split of the input dataset. + + Returns: + output_prediction: Where to save the output prediction. + gcp_resources: GCP resources that can be used to track the custom finetuning + job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='BulkInferrer', + accelerator_type=accelerator_type, + accelerator_count=accelerator_count, + machine_type=machine_type, + image_uri=image_uri, + args=[ + f'--input_model={input_model}', + f'--input_dataset={input_dataset_path}', + f'--dataset_split={dataset_split}', + f'--large_model_reference={large_model_reference}', + f'--inputs_sequence_length={inputs_sequence_length}', + f'--targets_sequence_length={targets_sequence_length}', + f'--output_prediction={output_prediction}', + f'--output_prediction_gcs_path={output_prediction_gcs_path}', + ], + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/deploy_llm_model.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/deploy_llm_model.py new file mode 100644 index 0000000000..7fbad47ee3 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/deploy_llm_model.py @@ -0,0 +1,145 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Component for deploy_llm_model.""" + +from google_cloud_pipeline_components import _image +from kfp import dsl + + +# pylint: disable=g-import-not-at-top, invalid-name +# pylint: disable=g-doc-args +# pytype: disable=invalid-annotation +# pytype: disable=import-error +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def create_endpoint_and_deploy_model( + project: str, + location: str, + model_resource_name: str, + display_name: str, + regional_endpoint: str, + endpoint_resource_name: dsl.OutputPath(str), + create_endpoint_gcp_resources: dsl.OutputPath(str), + deploy_model_gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: str = '', + service_account: str = '', + deploy_model: bool = True, +): + """Creates a vertex endpoint and deploy the specified model. + + Args: + project: Name of the GCP project. + location: Location for model upload and deployment. + model_resource_name: Path to the created Model on Model Registry. + display_name: Name of the model (shown in Model Registry). + regional_endpoint: Regional API endpoint. + encryption_spec_key_name: Customer-managed encryption key. + service_account: If set, then a custom service account will be used. + deploy_model: Whether to deploy the model to an endpoint. Default is + ``True``. If ``False``, the model will not be deployed and output + artifacts will contain empty strings. + + Returns: + endpoint_resource_name: Path to the created endpoint on Online Prediction. + create_endpoint_gcp_resources: Serialized JSON of GCP resources for + creating an endpoint. + deploy_model_gcp_resources: Serialized JSON of GCP resources for deploying + the model. + """ + import json + import logging + import os + import sys + from typing import Any, Dict + + try: + from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner + except ImportError: + from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner + + def run_lro_remote_runner( + url: str, payload: Dict[str, Any], gcp_resources: str + ) -> Any: + remote_runner = lro_remote_runner.LroRemoteRunner(location) + lro = remote_runner.create_lro(url, json.dumps(payload), gcp_resources) + return remote_runner.poll_lro(lro=lro) + + try: + os.makedirs(os.path.dirname(endpoint_resource_name), exist_ok=True) + + if not deploy_model: + with open(endpoint_resource_name, 'w') as fout: + fout.write('') + return + + regional_endpoint = regional_endpoint.rstrip('/') + + create_endpoint_payload = { + 'displayName': display_name, + } + + pipeline_labels_str = os.getenv('VERTEX_AI_PIPELINES_RUN_LABELS') + if pipeline_labels_str: + create_endpoint_payload['labels'] = json.loads(pipeline_labels_str) + + if encryption_spec_key_name: + create_endpoint_payload['encryption_spec'] = { + 'kms_key_name': encryption_spec_key_name + } + + create_endpoint_lro = run_lro_remote_runner( + url=( + f'{regional_endpoint}/projects/{project}/locations/{location}' + '/endpoints' + ), + payload=create_endpoint_payload, + gcp_resources=create_endpoint_gcp_resources, + ) + + response_endpoint = create_endpoint_lro['response']['name'] + with open(endpoint_resource_name, 'w') as fout: + fout.write(response_endpoint) + + logging.info( + 'Endpoint created successfully. Deploying model %s to endpoint', + model_resource_name, + ) + + deploy_model_payload = { + 'deployedModel': { + 'model': model_resource_name, + 'displayName': display_name, + 'automaticResources': {'minReplicaCount': 1, 'maxReplicaCount': 1}, + } + } + if service_account: + deploy_model_payload['deployedModel']['service_account'] = service_account + + _ = run_lro_remote_runner( + url=f'{regional_endpoint}/{response_endpoint}:deployModel', + payload=deploy_model_payload, + gcp_resources=deploy_model_gcp_resources, + ) + + logging.info('Model deployed successfully!') + except Exception as e: # pylint: disable=broad-exception-caught + if isinstance(e, ValueError): + raise + logging.exception(str(e)) + sys.exit(13) + + +# pytype: enable=import-error +# pytype: enable=invalid-annotation +# pylint: enable=g-doc-args +# pylint: enable=g-import-not-at-top, invalid-name diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py new file mode 100644 index 0000000000..967788f6eb --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py @@ -0,0 +1,40 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A collection of constants shared across components and pipelines.""" +import os + + +def get_private_image_tag() -> str: + return os.getenv('PRIVATE_IMAGE_TAG', 'live') + + +def get_use_test_machine_spec() -> bool: + str_value = os.getenv('USE_TEST_MACHINE_SPEC', 'False') + return str_value.lower() in {'true', '1'} + + +# Variables associated with private images: +CLOUD_ML_REGION = os.getenv('CLOUD_ML_REGION', 'europe-west4') +PRIVATE_ARTIFACT_REGISTRY_PROJECT: str = os.getenv( + 'PRIVATE_ARTIFACT_REGISTRY_PROJECT', 'vertex-ai-restricted' +) +PRIVATE_ARTIFACT_REGISTRY_LOCATION: str = os.getenv( + 'PRIVATE_ARTIFACT_REGISTRY_LOCATION', 'us' +) +PRIVATE_ARTIFACT_REGISTRY: str = os.getenv('PRIVATE_ARTIFACT_REGISTRY', 'rlhf') +PRIVATE_IMAGE_NAME_PREFIX: str = os.getenv('PRIVATE_IMAGE_NAME_PREFIX', 'rlhf_') +PRIVATE_IMAGE_TAG: str = get_private_image_tag() + +# Dataset variables: +TRAIN_SPLIT: str = 'train' diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py new file mode 100644 index 0000000000..9dfb057c1c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -0,0 +1,373 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python function-based components used in KFP pipelies.""" +import functools +from typing import List, NamedTuple, Optional + +from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components._implementation.llm import env +from kfp import dsl + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_machine_spec( + location: str, + use_test_spec: bool = False, +) -> NamedTuple( + 'MachineSpec', machine_type=str, accelerator_type=str, accelerator_count=int +): + """Returns machine spec to use for a given location. + + Args: + location: Where the machine will run. + use_test_spec: Whether to use a lower resource machine for testing. + + Returns: + Machine spec. + + Raises: + ValueError: If accelerators are requested in an unsupported location. + """ + outputs = NamedTuple( + 'MachineSpec', + machine_type=str, + accelerator_type=str, + accelerator_count=int, + ) + tpu_regions = {'europe-west4'} + gpu_regions = {'us-central1'} + if use_test_spec: + return outputs( + machine_type='a2-highgpu-1g', + accelerator_type='NVIDIA_TESLA_A100', + accelerator_count=1, + ) + elif location in tpu_regions: + return outputs( + machine_type='cloud-tpu', + accelerator_type='TPU_V3', + accelerator_count=32, + ) + elif location in gpu_regions: + return outputs( + machine_type='a2-ultragpu-8g', + accelerator_type='NVIDIA_A100_80GB', + accelerator_count=8, + ) + raise ValueError( + f'Unsupported accelerator location {location}. Must be one of' + f' {tpu_regions | gpu_regions}.' + ) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_image_uri( + image_name: str, + project: str, + location: str, + artifact_registry: str, + image_name_prefix: str, + tag: str, + accelerator_type: str = '', + accelerator_count: int = 0, +) -> str: + """Generates image uri based on base image name and accelerator type. + + Args: + image_name: Base image name, e.g. ``'sft'`` or ``'reward_model'``. + project: Project that contains the artifact registry. + location: Region that contains the artifact registry. + artifact_registry: Registry that contains Docker images. + image_name_prefix: Text to prepend to the base image name. + tag: Image tag. + accelerator_type: One of the supported accelerator types, e.g. ``'TPU_V3'``. + accelerator_count: Number of accelerators. + + Returns: + Docker image uri + + Raises: + ValueError: if an unsupported accelerator type is provided. + """ + cpu_only_images = { + 'text_importer', + 'text_comparison_importer', + } + + if image_name in cpu_only_images: + accelerator_postfix = '' + elif accelerator_type == 'TPU_V3': + accelerator_postfix = '_tpu' + elif accelerator_type == 'NVIDIA_A100_80GB' and accelerator_count == 8: + accelerator_postfix = '_gpu_test' + else: + accelerator_postfix = '_gpu' + + backup_images = { + 'sft', + 'reward_model', + 'reinforcer', + 'infer', + 'text_importer', + 'text_comparison_importer', + } + if image_name in backup_images and accelerator_postfix != '_gpu_test': + accelerator_postfix += '_backup' + return f'{location}-docker.pkg.dev/{project}/{artifact_registry}/{image_name_prefix}{image_name}{accelerator_postfix}:{tag}' + + +# Resolves image uri from the environment's private artifact registry. +# By default this resolves an image in the vertex private registry. +resolve_private_image_uri = functools.partial( + resolve_image_uri, + project=env.PRIVATE_ARTIFACT_REGISTRY_PROJECT, + location=env.PRIVATE_ARTIFACT_REGISTRY_LOCATION, + artifact_registry=env.PRIVATE_ARTIFACT_REGISTRY, + image_name_prefix=env.PRIVATE_IMAGE_NAME_PREFIX, + tag=env.get_private_image_tag(), +) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_data_paths( + input_dataset: str, +) -> NamedTuple('DataPaths', tfds_data_dir=str, tfds_name=str): + """Resolves dataset paths needed by downstream components.""" + # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported + import os + # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported + outputs = NamedTuple('DataPaths', tfds_data_dir=str, tfds_name=str) + tfds_data_dir, tfds_name = os.path.split(input_dataset) + return outputs( + tfds_data_dir=tfds_data_dir, + tfds_name=tfds_name, + ) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_reference_model_metadata( + large_model_reference: str, + reference_model_path: Optional[str] = None, +) -> NamedTuple( + 'BaseModelMetadata', + large_model_reference=str, + reference_model_path=str, + reward_model_reference=str, + reward_model_path=str, +): + """Resolves reference model metadata needed by downstream components. + + Args: + large_model_reference: User-provided reference model name. + reference_model_path: Optional path to a tuned based model to use in place + of the default base model. If specified, the model at this path must be a + tuned version of the base model associated with ``large_model_reference``. + + Returns: + Base model name (used by downstream components to find gin configs and load + vocabularies) and the path to the base model checkpoint. + + Raises: + ValueError: if no metadata exists for the given base model. + """ + + # TODO(latture): Move this logic to a container component and use + # PredefinedModels enum to resolve model paths. + outputs = NamedTuple( + 'BaseModelMetadata', + large_model_reference=str, + reference_model_path=str, + reward_model_reference=str, + reward_model_path=str, + ) + reference_model_key = large_model_reference.upper().replace('-', '_') + predefined_model_paths = { + 'PALM_TINY': ( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny/' + ), + 'GECKO': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko/', + 'OTTER': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter/', + 'BISON': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_bison/', + 'ELEPHANT': ( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_elephant/' + ), + 'T5_SMALL': 'gs://t5-data/pretrained_models/t5x/flan_t5_small/', + 'T5_LARGE': 'gs://t5-data/pretrained_models/t5x/flan_t5_large/', + 'T5_XL': 'gs://t5-data/pretrained_models/t5x/flan_t5_xl/', + 'T5_XXL': 'gs://t5-data/pretrained_models/t5x/flan_t5_xxl/', + } + predefined_reward_model_paths = { + 'PALM_TINY': ( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny' + ), + 'GECKO': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko_pretrain', + 'OTTER': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain', + 'ELEPHANT': ( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_elephant/' + ), + 'T5_SMALL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_small', + 'T5_LARGE': 'gs://t5-data/pretrained_models/t5x/t5_1_1_large', + 'T5_XL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_xl', + 'T5_XXL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_xxl', + } + + if reference_model_key not in predefined_model_paths: + raise ValueError( + f'No metadata found for `{reference_model_key}`. ' + f'Base model must be one of {list(predefined_model_paths.keys())}.' + ) + + # Mapping from base model to its corresponding reward model. + reference_model_to_reward_model = { + 'PALM_TINY': 'PALM_TINY', + 'GECKO': 'GECKO', + 'OTTER': 'OTTER', + 'BISON': 'OTTER', + 'ELEPHANT': 'ELEPHANT', + 'T5_SMALL': 'T5_SMALL', + 'T5_LARGE': 'T5_LARGE', + 'T5_XL': 'T5_XL', + 'T5_XXL': 'T5_XXL', + } + + reward_model_key = reference_model_to_reward_model[reference_model_key] + + return outputs( + large_model_reference=reference_model_key, + reference_model_path=( + reference_model_path or predefined_model_paths[reference_model_key] + ), + reward_model_reference=reward_model_key, + reward_model_path=predefined_reward_model_paths[reward_model_key], + ) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def convert_to_delimited_string(items: List[str], delimiter: str = ',') -> str: + """Converts a list of strings to single string delimited by the specified character.""" + return delimiter.join(items) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def generate_default_instruction( + task: str, + target_sequence_length: int, + instruction_override: str = '', +) -> str: + """Generates a default instruction if no override is provided.""" + if instruction_override: + return instruction_override + task = task.lower() + if task == 'summarization': + return f'Summarize in less than {target_sequence_length} words.' + else: + raise ValueError( + f'Task not recognized: {task}. Supported tasks are: summarization.' + ) + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_upload_location(upload_location: Optional[str] = None) -> str: + """Gets the region to upload the model. + + Args: + upload_location: User-specified region to upload the model to. + + Returns: + Where to upload the model. If no location is specified, the model will be + uploaded to the region where the pipeline is running. + """ + # pylint: disable=g-import-not-at-top + import os + # pylint: enable=g-import-not-at-top + return upload_location or os.environ['CLOUD_ML_REGION'] + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_regional_endpoint(upload_location: str) -> str: + """Gets the regional endpoint used to upload a model to the registry. + + Args: + upload_location: Region where the model will be uploaded. + + Returns: + Regional endpoint. + """ + return f'https://{upload_location}-aiplatform.googleapis.com/ui' + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_model_display_name( + large_model_reference: str, + model_display_name: Optional[str] = None, +) -> str: + """Gets the model display name shown in the registry and used for endpoints. + + Args: + large_model_reference: Base model tuned by the pipeline. + model_display_name: User-provided display name. If not provided, a default + display name will be created. + + Returns: + Either the user-provided name or a default display name with the form + ``{large_model_reference}-{timestamp}`` + """ + # pylint: disable=g-import-not-at-top + import datetime + # pylint: enable=g-import-not-at-top + now = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + return model_display_name or f'{large_model_reference.lower()}-{now}' + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_deploy_model( + deploy_model: bool, large_model_reference: str +) -> bool: + """Resolves runtime parameter that determines whether the tuned model should be deployed.""" + supported_models = {'BISON'} + if deploy_model and large_model_reference in supported_models: + return True + return False + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def value_exists(value: Optional[str] = None) -> bool: + """Returns whether a runtime parameter was provided. + + Args: + value: That might have been provided. + + Returns: + Whether the string is not None and non-empty. + """ + if not value: + return False + return True + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_candidate_columns( + candidate_columns: Optional[List[str]] = None, +) -> List[str]: + """Returns candidate columns provided by the user or the default: ['candidate_0', 'candidate_1'].""" + return candidate_columns or ['candidate_0', 'candidate_1'] + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_upload_model(large_model_reference: str) -> bool: + """Returns whether the model should be uploaded.""" + supported_models = {'BISON'} + if large_model_reference in supported_models: + return True + return False diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py new file mode 100644 index 0000000000..3c81443af9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_comparison_importer.py @@ -0,0 +1,86 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Container component that imports Tensorflow Datasets.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +import kfp + + +@kfp.dsl.container_component +def PrivateTextComparisonImporter( # pylint: disable=invalid-name + project: str, + location: str, + input_text: str, + inputs_field_name: str, + comma_separated_candidates_field_names: str, + choice_field_name: str, + split: str, + large_model_reference: str, + image_uri: str, + output_dataset_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + machine_type: str = 'e2-highmem-8', + instruction: str = '', +) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args + """Import a text dataset. + + Args: + project: Project used to run the job. + location: Location used to run the job. + input_text: Path to text data. Supports glob patterns. + inputs_field_name: Name of field that contains input text. + comma_separated_candidates_field_names: Comma separated list of fields that + contain candidate text, e.g. ``'field_1,field_2,field_3'``. + choice_field_name: Name of field that specifies the index of the best + candidate. + split: The created seqio task has 1 split, its name is specified by this + argument. + large_model_reference: Predefined model used to create the model to be + trained. This paramerter is used for obtaining model vocabulary because + this component tokenizes and then caches the tokenized tasks. + machine_type: The type of the machine to provision for the custom job. + instruction: Optional instruction to prepend to inputs field. + image_uri: Location of the text comparison importer image. + dataflow_worker_image_uri: Location of the Dataflow worker image. + + Returns: + output_dataset_path: Path to cached SeqIO task created from input dataset. + gcp_resources: GCP resources that can be used to track the custom job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='TfdsComparisonImporter', + machine_type=machine_type, + image_uri=image_uri, + args=[ + f'--input_text={input_text}', + f'--inputs_field_name={inputs_field_name}', + f'--comma_separated_candidates_field_names={comma_separated_candidates_field_names}', + f'--choice_field_name={choice_field_name}', + f'--split={split}', + f'--output_cache_dir={output_dataset_path}', + f'--instruction={instruction}', + f'--large_model_reference={large_model_reference}', + ( + '--private_bucket_subdir=' + f'{kfp.dsl.PIPELINE_TASK_NAME_PLACEHOLDER}_' + f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + ], + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py new file mode 100644 index 0000000000..ecfd40c0fb --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py @@ -0,0 +1,93 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Container component that imports Tensorflow Datasets.""" +import os +from typing import Optional + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +from kfp import dsl + + +def _resolve_image(default: str = '') -> str: + return os.environ.get('TEXT_IMPORTER_IMAGE_OVERRIDE', default) + +# pytype: disable=unsupported-operands +@dsl.container_component +def PrivateTextImporter( # pylint: disable=invalid-name + project: str, + location: str, + input_text: str, + inputs_field_name: str, + targets_field_name: str, + large_model_reference: str, + imported_data: dsl.Output[dsl.Dataset], # pylint: disable=unused-argument + imported_data_path: dsl.OutputPath(str), # pytype: disable=invalid-annotation + gcp_resources: dsl.OutputPath(str), # pytype: disable=invalid-annotation + instruction: str = '', + image_uri: str = utils.get_default_image_uri('text_importer_backup'), + machine_type: str = 'e2-highmem-8', + output_split_name: str = 'all', + max_num_input_examples: Optional[int] = None, +) -> dsl.ContainerSpec: # pylint: disable=g-doc-args + """Import a text dataset. + + Args: + project: Project used to run the job. + location: Location used to run the job. + input_text: Path to text data. Supports glob patterns. + inputs_field_name: Name of field that contains input text. + targets_field_name: Name of field that contains target text. + large_model_reference: Predefined model used to create the model to be + trained. This paramerter is used for obtaining model vocabulary because + this component tokenizes and then caches the tokenized tasks. + instruction: Optional instruction to prepend to inputs field. + image_uri: Optional location of the text importer image. + machine_type: The type of the machine to provision for the custom job. + output_split_name: The created seqio task has 1 split, its name is specified + by this argument. + max_num_input_examples: Maximum number of examples to import. + + Returns: + imported_data: Artifact representing the imported data and cached Tasks. + imported_data_path: Path to cached SeqIO task created from input dataset. + gcp_resources: Tracker for GCP resources created by this component. + """ + subdir = ( + f'{dsl.PIPELINE_TASK_NAME_PLACEHOLDER}_{dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ) + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='TextImporter', + machine_type=machine_type, + image_uri=_resolve_image(image_uri), + args=[ + f'--input_text={input_text}', + f'--inputs_field_name={inputs_field_name}', + f'--targets_field_name={targets_field_name}', + f'--output_split_name={output_split_name}', + f'--instruction={instruction}', + f'--large_model_reference={large_model_reference}', + f'--private_bucket_subdir={subdir}', + f'--output_dataset_path={dsl.PIPELINE_ROOT_PLACEHOLDER}{subdir}', + f'--imported_data_path={imported_data_path}', + f'--max_num_input_examples={max_num_input_examples}', + '--executor_input={{$.json_escape[1]}}', + ], + ), + gcp_resources=gcp_resources, + ) +# pytype: enable=unsupported-operands diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcer.py new file mode 100644 index 0000000000..82d0ab2543 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcer.py @@ -0,0 +1,116 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP container component that performs reinforcement learning.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +import kfp + + +@kfp.dsl.container_component +def Reinforcer( # pylint: disable=invalid-name + project: str, + location: str, + train_steps: int, + accelerator_type: str, + accelerator_count: int, + large_model_reference: str, + reward_model_reference: str, + machine_type: str, + image_uri: str, + inputs_sequence_length: int, + targets_sequence_length: int, + input_reference_model_path: str, + input_reward_model_path: str, + input_dataset_path: str, + output_model_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + output_adapter_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + tensorboard_metrics: kfp.dsl.Output[kfp.dsl.Artifact], # pytype: disable=unsupported-operands + gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + train_split: str = 'train', + batch_size: int = 64, + learning_rate_multiplier: float = 1.0, + kl_coeff: float = 0.1, + lora_dim: int = 0, +) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args + """Trains a model using reinforcement learning. + + Args: + project: Project used to run the job. + location: Location used to run the job. + input_reference_model_path: Path to the base model to fine tune. + input_reward_model_path: Path to the reward model to use during + reinforcement learning. + input_dataset_path: Path to training dataset. + train_steps: Number of training steps. These are the number of steps + on top of any steps used to train the base model. + targets_length: Maximum decoder steps. Outputs will be at most this length. + accelerator_type: Type of TPU accelerator. Can be either TPU_V2 or TPU_V3. + accelerator_count: Number of TPU accelerators. + large_model_reference: Predefined model used to create the + ``input_reference_model``. + machine_type: The type of the machine to provision for the custom job. Must + be a valid GCE instance type and compatible with the accelerator type. + image_uri: Location of reinforcement learning Docker image. + inputs_sequence_length: Maximum number of input tokens per row. + targets_sequence_length: Maximum number of target tokens per row. + train_split: Name of the split in the input dataset that contains training + data. Default is ``'train'``. + batch_size: Number of examples in each finetuning step. Default is 64. + kl_coeff: Coefficient for KL penalty. This regularizes the policy model and + penalizes if it diverges from its initial distribution. If set to 0, then + the reference LM is not loaded into memory. + lora_dim: The rank of the LoRA adapter. If >0, then use LoRA-tuning. If =0, + then use full-tuning. + learning_rate_multiplier: Constant multiplied by the base learning rate used + to adjust the learning rate during reinforcement learning. + + Returns: + output_model_path: Path to the trained model checkpoint. + output_adapter_path: Path to the trained model adapter if LoRA tuning was + used. + tensorboard_metrics: Training stats (tensorboard) path. + gcp_resources: GCP resources that can be used to track the custom finetuning + job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='Reinforcer', + accelerator_type=accelerator_type, + accelerator_count=accelerator_count, + machine_type=machine_type, + image_uri=image_uri, + args=[ + f'--input_reference_model_path={input_reference_model_path}', + f'--input_reward_model_path={input_reward_model_path}', + f'--input_dataset_path={input_dataset_path}', + f'--train_steps={train_steps}', + f'--output_model_path={output_model_path}', + f'--output_adapter_path={output_adapter_path}', + f'--tensorboard_metrics_path={tensorboard_metrics.path}', + f'--large_model_reference={large_model_reference}', + f'--reward_model_reference={reward_model_reference}', + f'--inputs_sequence_length={inputs_sequence_length}', + f'--targets_sequence_length={targets_sequence_length}', + f'--train_split={train_split}', + f'--batch_size={batch_size}', + f'--learning_rate_multiplier={learning_rate_multiplier}', + f'--kl_coeff={kl_coeff}', + f'--lora_dim={lora_dim}', + ], + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_trainer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_trainer.py new file mode 100644 index 0000000000..cdf0bba4db --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_trainer.py @@ -0,0 +1,104 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP container component that trains a reward model.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +import kfp + + +@kfp.dsl.container_component +def RewardModelTrainer( # pylint: disable=invalid-name + project: str, + location: str, + train_steps: int, + accelerator_type: str, + accelerator_count: int, + large_model_reference: str, + machine_type: str, + image_uri: str, + inputs_sequence_length: int, + targets_sequence_length: int, + input_model_path: str, + input_dataset_path: str, + output_model_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + tensorboard_metrics: kfp.dsl.Output[kfp.dsl.Artifact], # pytype: disable=unsupported-operands + gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + train_split: str = 'train', + batch_size: int = 64, + learning_rate_multiplier: float = 1.0, + lora_dim: int = 0, +) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args + """Trains a reward model. + + Args: + project: Project used to run the job. + location: Location used to run the job. + input_model_path: Path to the base model to fine tune. + input_dataset_path: Path to dataset to use to train a reward model. + train_steps: Number of training steps. These are the number of steps + on top of any steps used to train the base model. + accelerator_type: Type of TPU accelerator. Can be either TPU_V2 or TPU_V3. + accelerator_count: Number of TPU accelerators. + large_model_reference: Predefined model used to create the ``input_model``. + machine_type: The type of the machine to provision for the custom job. Must + be a valid GCE instance type and compatible with the accelerator type. + image_uri: Location of reward model Docker image. + inputs_sequence_length: Maximum number of input tokens per row. + targets_sequence_length: Maximum number of target tokens per row. + train_split: Name of the split in the input dataset that contains training + data. Default is ``'train'``. + batch_size: Number of examples in each finetuning step. Default is 64. + lora_dim: The rank of the LoRA adapter. If >0, then use LoRA-tuning. If =0, + then use full-tuning. + learning_rate_multiplier: Constant multiplied by the base learning rate used + to adjust the learning rate when training a reward model. + + Returns: + output_model: Trained reward model. + tensorboard_metrics: Training stats (tensorboard) path. + gcp_resources: GCP resources that can be used to track the custom finetuning + job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='RewardModelTrainer', + accelerator_type=accelerator_type, + accelerator_count=accelerator_count, + machine_type=machine_type, + image_uri=image_uri, + args=[ + f'--train_steps={train_steps}', + f'--input_model_path={input_model_path}', + f'--input_dataset_path={input_dataset_path}', + f'--output_model_path={output_model_path}', + f'--tensorboard_metrics_path={tensorboard_metrics.path}', + f'--large_model_reference={large_model_reference}', + f'--inputs_sequence_length={inputs_sequence_length}', + f'--targets_sequence_length={targets_sequence_length}', + f'--train_split={train_split}', + f'--batch_size={batch_size}', + f'--learning_rate_multiplier={learning_rate_multiplier}', + ( + '--private_bucket_subdir=' + f'{kfp.dsl.PIPELINE_TASK_NAME_PLACEHOLDER}_' + f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + f'--lora_dim={lora_dim}', + ], + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/supervised_fine_tuner.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/supervised_fine_tuner.py new file mode 100644 index 0000000000..7254ce2407 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/supervised_fine_tuner.py @@ -0,0 +1,105 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Container component that performs supervised fine tuning.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.llm import utils +import kfp + + +@kfp.dsl.container_component +def SupervisedFineTuner( # pylint: disable=invalid-name + project: str, + location: str, + train_steps: int, + inputs_sequence_length: int, + targets_sequence_length: int, + accelerator_type: str, + accelerator_count: int, + large_model_reference: str, + machine_type: str, + image_uri: str, + input_model_path: str, + input_dataset_path: str, + output_model_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + tensorboard_metrics: kfp.dsl.Output[kfp.dsl.Artifact], # pytype: disable=unsupported-operands + gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + train_split: str = 'train', + batch_size: int = 64, + learning_rate_multiplier: float = 1.0, + lora_dim: int = 0, +) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args + """Performs supervised fine tuning. + + Args: + project: Project used to run the job. + location: Location used to run the job. + input_model_path: Path to the base model to fine tune. + train_steps: Number of training steps. These are the number of steps on top + of any steps used to train the base model. + accelerator_type: Type of TPU accelerator. Can be either TPU_V2 or TPU_V3. + accelerator_count: Number of TPU accelerators. + input_dataset_path: Path to training dataset + large_model_reference: Predefined model used to create the ``input_model``. + machine_type: The type of the machine to provision for the custom job. Must + be a valid GCE instance type and compatible with the accelerator type. + image_uri: Location of reinforcement learning Docker image. + inputs_sequence_length: Maximum number of input tokens per row. + targets_sequence_length: Maximum number of target tokens per row. + train_split: Name of the split in the input dataset that contains training + data. Default is ``'train'``. + batch_size: The batch size is the number of training examples used to train + a single forward and backward pass. Default is 64. + lora_dim: The rank of the LoRA adapter. If >0, then use LoRA-tuning. If =0, + then use full-tuning. + learning_rate_multiplier: Constant multiplied by the base learning rate used + to adjust the learning rate during supervised fine tuning. + + Returns: + output_model_path: Fine-tuned model path. + tensorboard_metrics: Training stats (tensorboard) path. + gcp_resources: GCP resources that can be used to track the custom finetuning + job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_payload( + display_name='SupervisedFineTuner', + accelerator_type=accelerator_type, + accelerator_count=accelerator_count, + machine_type=machine_type, + image_uri=image_uri, + args=[ + f'--input_model_path={input_model_path}', + f'--train_steps={train_steps}', + f'--inputs_sequence_length={inputs_sequence_length}', + f'--targets_sequence_length={targets_sequence_length}', + f'--input_dataset_path={input_dataset_path}', + f'--output_model_path={output_model_path}', + f'--tensorboard_metrics_path={tensorboard_metrics.path}', + f'--large_model_reference={large_model_reference}', + f'--train_split={train_split}', + f'--batch_size={batch_size}', + f'--learning_rate_multiplier={learning_rate_multiplier}', + ( + '--private_bucket_subdir=' + f'{kfp.dsl.PIPELINE_TASK_NAME_PLACEHOLDER}_' + f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + f'--lora_dim={lora_dim}', + ], + ), + gcp_resources=gcp_resources, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_llm_model.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_llm_model.py new file mode 100644 index 0000000000..da566b7d45 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_llm_model.py @@ -0,0 +1,128 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Component for upload_llm_model.""" + +from google_cloud_pipeline_components import _image +from kfp import dsl + + +# pylint: disable=g-import-not-at-top, invalid-name, +# pylint: disable=g-doc-args +# pytype: disable=invalid-annotation +# pytype: disable=unsupported-operands +# pytype: disable=import-error +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def upload_llm_model( + project: str, + location: str, + artifact_uri: dsl.Input[dsl.Artifact], + model_reference_name: str, + model_display_name: str, + regional_endpoint: str, + model_resource_name: dsl.OutputPath(str), + gcp_resources: dsl.OutputPath(str), + encryption_spec_key_name: str = '', + upload_model: bool = True, +): + """Uploads LLM model. + + Args: + project: Name of the GCP project. + location: Location for model upload and deployment. + artifact_uri: KFP Artifact for adapter. + model_reference_name: Large model reference name. + model_display_name: Name of the model (shown in Model Registry). + regional_endpoint: Regional API endpoint. + encryption_spec_key_name: Customer-managed encryption key. + upload_model: Whether to upload the model to the Model Registry. Default + is ``True``. If ``False``, the model will not be uploaded and output + artifacts will contain empty strings. + + Returns: + model_resource_name: Path to the created Model on Model Registry. + gcp_resources: Serialized JSON of `gcp_resources`. + """ + import json + import logging + import os + import sys + + try: + from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner + except ImportError: + from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner + + try: + os.makedirs(os.path.dirname(model_resource_name), exist_ok=True) + + if not upload_model: + with open(model_resource_name, 'w') as fout: + fout.write('') + return + + pipeline_labels_str = os.getenv('VERTEX_AI_PIPELINES_RUN_LABELS') + labels = json.loads(pipeline_labels_str) if pipeline_labels_str else {} + labels['google-vertex-llm-tuning-base-model-id'] = ( + model_reference_name.replace('@', '-') + ) + + model_upload_payload = { + 'model': { + 'displayName': model_display_name, + 'largeModelReference': {'name': model_reference_name}, + 'labels': labels, + 'generatedModelSource': {'genie_source': {'base_model_uri': ''}}, + 'artifactUri': artifact_uri.uri, + } + } + if encryption_spec_key_name: + model_upload_payload['model']['encryption_spec'] = { + 'kms_key_name': encryption_spec_key_name + } + + regional_endpoint = regional_endpoint.rstrip('/') + upload_model_uri = ( + f'{regional_endpoint}/projects/{project}/locations/{location}/models:' + 'upload' + ) + + remote_runner = lro_remote_runner.LroRemoteRunner(location) + upload_model_lro = remote_runner.create_lro( + upload_model_uri, + json.dumps(model_upload_payload), + gcp_resources, + ) + upload_model_lro = remote_runner.poll_lro(lro=upload_model_lro) + model_resource = upload_model_lro['response']['model'] + model_version_id = upload_model_lro['response'].get( + 'model_version_id' + ) or upload_model_lro['response'].get('modelVersionId') + if model_version_id: + model_resource += f'@{model_version_id}' + + with open(model_resource_name, 'w') as fout: + fout.write(model_resource) + + except Exception as e: # pylint: disable=broad-exception-caught + if isinstance(e, ValueError): + raise + logging.exception(str(e)) + sys.exit(13) + + +# pytype: enable=import-error +# pytype: enable=unsupported-operands +# pytype: enable=invalid-annotation +# pylint: enable=g-doc-args +# pylint: enable=g-import-not-at-top, invalid-name, broad-exception-caught diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py new file mode 100644 index 0000000000..b22c4051f1 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py @@ -0,0 +1,113 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utility functions used to create custom Kubeflow components.""" +import os +from typing import Any + +from google_cloud_pipeline_components._implementation.llm import env +import kfp + + +def build_payload( + *, + display_name: str, + machine_type: str, + image_uri: str, + args: list[str], + accelerator_type: str = '', + accelerator_count: int = 0, +) -> dict[str, Any]: + """Generates payload for a custom training job. + + Args: + display_name: Component display name. Can contain up to 128 UTF-8 + characters. + machine_type: The type of the machine to provision for the custom job. Must + be a valid GCE instance type and compatible with the accelerator type. + image_uri: Docker image URI to use for the custom job. + args: Arguments to pass to the Docker image. + accelerator_type: Type of accelerator. By default no accelerator is + requested. + accelerator_count: Number of accelerators. By default no accelerators are + requested. + + Returns: + Custom job payload. + + Raises: + ValueError: if one of ``accelerator_count`` or ``accelerator_type`` is + specified, but the corresponding field is not valid. + """ + payload = { + 'display_name': display_name, + 'job_spec': { + 'worker_pool_specs': [{ + 'replica_count': '1', + 'machine_spec': {'machine_type': str(machine_type)}, + 'container_spec': {'image_uri': str(image_uri), 'args': args}, + }] + }, + } + + if accelerator_type and accelerator_count: + payload['job_spec']['worker_pool_specs'][0]['machine_spec'][ + 'accelerator_type' + ] = str(accelerator_type) + payload['job_spec']['worker_pool_specs'][0]['machine_spec'][ + 'accelerator_count' + ] = accelerator_count + elif accelerator_type and accelerator_count < 1: + raise ValueError( + 'Accelerator count must be at least 1 if accelerator type ' + f'is specified. Received accelerator_count == {accelerator_count}' + ) + elif accelerator_count and not accelerator_type: + raise ValueError( + 'Accelerator type must be specified if accelerator count is not 0.' + f'Received accelerator_type == {accelerator_type}.' + ) + + return payload + + +def get_temp_location() -> str: + """Gets a task-specific location to store temporary files.""" + return os.path.join( + kfp.dsl.PIPELINE_ROOT_PLACEHOLDER, + kfp.dsl.PIPELINE_JOB_ID_PLACEHOLDER, + kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER, + 'temp', + ) + + +def get_default_image_uri(image_name: str) -> str: + """Gets the default image URI for a given image. + + The URI is resolved using environment variables that define the artifact + registry, image name modifications and tag. This method only works for images + that are not selected dynamically based on accelerator type. This is typically + true for CPU-only images. + + Args: + image_name: Name of the image to resolve. + + Returns: + URI of the image. + """ + return '/'.join([ + f'{env.PRIVATE_ARTIFACT_REGISTRY_LOCATION}-docker.pkg.dev', + env.PRIVATE_ARTIFACT_REGISTRY_PROJECT, + env.PRIVATE_ARTIFACT_REGISTRY, + f'{env.PRIVATE_IMAGE_NAME_PREFIX}{image_name}:{env.get_private_image_tag()}', + ]) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py new file mode 100644 index 0000000000..47592e6fad --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Large-language model preview components.""" + +from google_cloud_pipeline_components.preview.llm.infer_pipeline import infer_pipeline +from google_cloud_pipeline_components.preview.llm.rlhf_pipeline import rlhf_pipeline + +__all__ = [ + 'infer_pipeline', + 'rlhf_pipeline', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py new file mode 100644 index 0000000000..ae7b50aaef --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py @@ -0,0 +1,129 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pipeline that performs bulk inference using a large-language model.""" + +from typing import NamedTuple, Optional + +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.llm import bulk_inferrer +from google_cloud_pipeline_components._implementation.llm import env +from google_cloud_pipeline_components._implementation.llm import function_based +from google_cloud_pipeline_components._implementation.llm import private_text_importer +import kfp + + +PipelineOutput = NamedTuple('Outputs', output_prediction_gcs_path=str) + + +@kfp.dsl.pipeline( + name='infer-eval-template', + description=( + 'Performs bulk inference on a dataset using a model checkpoint.' + ), +) +def infer_pipeline( + large_model_reference: str, + model_checkpoint: str, + prompt_dataset: str, + prompt_sequence_length: int = 512, + target_sequence_length: int = 64, + instruction: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, + location: str = _placeholders.LOCATION_PLACEHOLDER, +) -> PipelineOutput: + """Uses a large-language model to perform bulk inference on a prompt dataset. + + Args: + large_model_reference: Name of the base model. Supported values are + ``BISON``, ``T5_SMALL``, ``T5_LARGE``, ``T5_XL``, and ``T5_XXL``. + ``BISON`` and ``T5_SMALL`` are supported in ``us-central1` and + ``europe-west4``. ``T5_LARGE``, ``T5_XL`` and ``T5_XXL`` are only + supported in ``europe-west4``. + model_checkpoint: Cloud storage path to the model checkpoint. + prompt_dataset: Cloud storage path to an unlabled prompt dataset used for + reinforcement learning. The dataset format is jsonl. Each example in the + dataset must have an ``input_text`` field that contains the prompt. + prompt_sequence_length: Maximum tokenized sequence length for input text. + Higher values increase memory overhead. This value should be at most 8192. + Default value is 512. + target_sequence_length: Maximum tokenized sequence length for target text. + Higher values increase memory overhead. This value should be at most 1024. + Default value is 64. + instruction: This field lets the model know what task it needs to perform. + Base models have been trained over a large set of varied instructions. You + can give a simple and intuitive description of the task and the model will + follow it, e.g. "Classify this movie review as positive or negative" or + "Translate this sentence to Danish". Do not specify this if your dataset + already prepends the instruction to the inputs field. + project: Project used to run custom jobs. If not specified the project used + to run the pipeline will be used. + location: Location used to run custom jobs. If not specified the location + used to run the pipeline will be used. + + Returns: + Cloud storage path to output predictions. + """ + prompt_column = 'input_text' + machine_spec = function_based.resolve_machine_spec( + location=location, + use_test_spec=env.get_use_test_machine_spec(), + ) + reference_model_metadata = function_based.resolve_reference_model_metadata( + large_model_reference=large_model_reference + ).set_display_name('BaseModelMetadataResolver') + + prompt_dataset_image_uri = function_based.resolve_private_image_uri( + image_name='text_importer', + ).set_display_name('PromptDatasetImageUriResolver') + prompt_dataset_importer = private_text_importer.PrivateTextImporter( + project=project, + location=location, + input_text=prompt_dataset, + inputs_field_name=prompt_column, + targets_field_name='', # ignore targets_field_name + output_split_name=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + image_uri=prompt_dataset_image_uri.output, + instruction=instruction, + ).set_display_name('PromptDatasetImporter') + + bulk_inferrer_image_uri = function_based.resolve_private_image_uri( + image_name='infer', + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + ).set_display_name('BulkInferrerImageUriResolver') + bulk_inference = bulk_inferrer.BulkInferrer( + project=project, + location=location, + input_model=model_checkpoint, + input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'], + dataset_split=env.TRAIN_SPLIT, + inputs_sequence_length=prompt_sequence_length, + targets_sequence_length=target_sequence_length, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + machine_type=machine_spec.outputs['machine_type'], + image_uri=bulk_inferrer_image_uri.output, + ).set_display_name('Bulk Inferrer') + + return PipelineOutput( + output_prediction_gcs_path=bulk_inference.outputs[ + 'output_prediction_gcs_path' + ] + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py new file mode 100644 index 0000000000..1dc94103c5 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py @@ -0,0 +1,301 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Defines a pipeline that performs reinforcement learning from human feedback.""" + +import json +from typing import NamedTuple, Optional + +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.llm import deploy_llm_model +from google_cloud_pipeline_components._implementation.llm import env +from google_cloud_pipeline_components._implementation.llm import function_based +from google_cloud_pipeline_components._implementation.llm import private_text_comparison_importer +from google_cloud_pipeline_components._implementation.llm import private_text_importer +from google_cloud_pipeline_components._implementation.llm import reinforcer +from google_cloud_pipeline_components._implementation.llm import reward_model_trainer +from google_cloud_pipeline_components._implementation.llm import upload_llm_model +from google_cloud_pipeline_components.preview.llm import infer_pipeline +import kfp + + +PipelineOutput = NamedTuple( + 'Outputs', model_resource_name=str, endpoint_resource_name=str +) + + +@kfp.dsl.pipeline( + name='rlhf-train-template', + description='Performs reinforcement learning from human feedback.', +) +def rlhf_pipeline( + prompt_dataset: str, + preference_dataset: str, + large_model_reference: str, + model_display_name: Optional[str] = None, + prompt_sequence_length: int = 512, + target_sequence_length: int = 64, + reward_model_learning_rate_multiplier: float = 1.0, + reinforcement_learning_rate_multiplier: float = 1.0, + reward_model_train_steps: int = 1000, + reinforcement_learning_train_steps: int = 1000, + kl_coeff: float = 0.1, + instruction: Optional[str] = None, + deploy_model: bool = True, + eval_dataset: Optional[str] = None, + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, + location: str = _placeholders.LOCATION_PLACEHOLDER, +) -> PipelineOutput: + """Performs reinforcement learning from human feedback. + + Args: + prompt_dataset: Cloud storage path to an unlabled prompt dataset used for + reinforcement learning. The dataset format is jsonl. Each example in the + dataset must have an ``input_text`` field that contains the prompt. + preference_dataset: Cloud storage path to a human preference dataset used to + train a reward model. The dataset format is jsonl. Each example in the + dataset must contain the following fields: ``input_text`` that contains + the prompt, ``candidate_0`` and ``candidate_1`` that contain candidate + responses, ``choice`` that specifies the preferred candidate. + large_model_reference: Name of the base model. Supported values are + ``BISON``, ``T5_SMALL``, ``T5_LARGE``, ``T5_XL``, and ``T5_XXL``. + ``BISON`` and ``T5_SMALL`` are supported in ``us-central1` and + ``europe-west4``. ``T5_LARGE``, ``T5_XL`` and ``T5_XXL`` are only + supported in ``europe-west4``. + model_display_name: Name of the fine-tuned model shown in the Model + Registry. If not provided, a default name will be created. + prompt_sequence_length: Maximum tokenized sequence length for input text. + Higher values increase memory overhead. This value should be at most 8192. + Default value is 512. + target_sequence_length: Maximum tokenized sequence length for target text. + Higher values increase memory overhead. This value should be at most 1024. + Default value is 64. + reward_model_learning_rate_multiplier: Constant used to adjust the base + learning rate used when training a reward model. Multiply by a number > 1 + to increase the magnitude of updates applied at each training step or + multiply by a number < 1 to decrease the magnitude of updates. Default + value is 1.0. + reinforcement_learning_rate_multiplier: Constant used to adjust the base + learning rate used during reinforcement learning. Multiply by a number > 1 + to increase the magnitude of updates applied at each training step or + multiply by a number < 1 to decrease the magnitude of updates. Default + value is 1.0. + reward_model_train_steps: Number of steps to use when training a reward + model. Default value is 1000. + reinforcement_learning_train_steps: Number of reinforcement learning steps + to perform when tuning a base model. Default value is 1000. + kl_coeff: Coefficient for KL penalty. This regularizes the policy model and + penalizes if it diverges from its initial distribution. If set to 0, the + reference language model is not loaded into memory. Default value is 0.1. + instruction: This field lets the model know what task it needs to perform. + Base models have been trained over a large set of varied instructions. You + can give a simple and intuitive description of the task and the model will + follow it, e.g. "Classify this movie review as positive or negative" or + "Translate this sentence to Danish". Do not specify this if your dataset + already prepends the instruction to the inputs field. + deploy_model: Whether to deploy the model to an endpoint in ``us-central1``. + Default is True. + eval_dataset: Optional Cloud storage path to an evaluation dataset. If + provided, inference will be performed on this dataset after training. The + dataset format is jsonl. Each example in the dataset must contain a field + ``input_text`` that contains the prompt. + project: Project used to run custom jobs. If not specified the project used + to run the pipeline will be used. + location: Location used to run custom jobs. If not specified the location + used to run the pipeline will be used. + + Returns: + model_resource_name: Path to the model uploaded to the Model Registry. This + will be an empty string if the model was not deployed. + endpoint_resource_name: Path the Online Prediction Endpoint. This will be an + empty string if the model was not deployed. + """ + policy_model_lora_dim = 1 + reward_model_lora_dim = 0 + batch_size = 64 + prompt_column = 'input_text' + candidate_columns = ['candidate_0', 'candidate_1'] + choice_column = 'choice' + upload_location = 'us-central1' + machine_spec = function_based.resolve_machine_spec( + location=location, use_test_spec=env.get_use_test_machine_spec() + ) + + reference_model_metadata = function_based.resolve_reference_model_metadata( + large_model_reference=large_model_reference, + ).set_display_name('BaseModelMetadataResolver') + + prompt_dataset_image_uri = function_based.resolve_private_image_uri( + image_name='text_importer' + ).set_display_name('PromptDatasetImageUriResolver') + prompt_dataset_importer = private_text_importer.PrivateTextImporter( + project=project, + location=location, + input_text=prompt_dataset, + inputs_field_name=prompt_column, + # Target field name does not matter because this field is not used. + targets_field_name='non_existent_targets_field_name', + output_split_name=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + image_uri=prompt_dataset_image_uri.output, + instruction=instruction, + ).set_display_name('PromptDatasetImporter') + + preference_dataset_image_uri = function_based.resolve_private_image_uri( + image_name='text_comparison_importer' + ).set_display_name('PreferenceDatasetImageUriResolver') + comma_separated_candidates_field_names = ( + function_based.convert_to_delimited_string(items=candidate_columns) + ) + preference_dataset_importer = private_text_comparison_importer.PrivateTextComparisonImporter( + project=project, + location=location, + input_text=preference_dataset, + inputs_field_name=prompt_column, + comma_separated_candidates_field_names=comma_separated_candidates_field_names.output, + choice_field_name=choice_column, + split=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + image_uri=preference_dataset_image_uri.output, + instruction=instruction, + ).set_display_name( + 'PreferenceDatasetImporter' + ) + + reward_model_image_uri = function_based.resolve_private_image_uri( + image_name='reward_model', + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + ).set_display_name('RewardModelImageUriResolver') + reward_model = reward_model_trainer.RewardModelTrainer( + project=project, + location=location, + input_model_path=reference_model_metadata.outputs['reward_model_path'], + input_dataset_path=preference_dataset_importer.outputs[ + 'output_dataset_path' + ], + train_steps=reward_model_train_steps, + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + large_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + machine_type=machine_spec.outputs['machine_type'], + image_uri=reward_model_image_uri.output, + inputs_sequence_length=prompt_sequence_length, + targets_sequence_length=target_sequence_length, + batch_size=batch_size, + learning_rate_multiplier=reward_model_learning_rate_multiplier, + lora_dim=reward_model_lora_dim, + ).set_display_name('RewardModelTrainer') + + rl_image_uri = function_based.resolve_private_image_uri( + image_name='reinforcer', + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + ).set_display_name('ReinforcerImageUriResolver') + rl_model = reinforcer.Reinforcer( + project=project, + location=location, + input_reference_model_path=reference_model_metadata.outputs[ + 'reference_model_path' + ], + input_reward_model_path=reward_model.outputs['output_model_path'], + input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'], + train_steps=reinforcement_learning_train_steps, + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + reward_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + machine_type=machine_spec.outputs['machine_type'], + image_uri=rl_image_uri.output, + inputs_sequence_length=prompt_sequence_length, + targets_sequence_length=target_sequence_length, + batch_size=batch_size, + learning_rate_multiplier=reinforcement_learning_rate_multiplier, + kl_coeff=kl_coeff, + lora_dim=policy_model_lora_dim, + ).set_display_name('Reinforcer') + + should_perform_inference = function_based.value_exists(value=eval_dataset) + with kfp.dsl.Condition( + should_perform_inference.output == True, name='Perform Inference' # pylint: disable=singleton-comparison + ): + infer_pipeline.infer_pipeline( + project=project, + location=location, + large_model_reference=large_model_reference, + model_checkpoint=rl_model.outputs['output_model_path'], + prompt_dataset=eval_dataset, + prompt_sequence_length=prompt_sequence_length, + target_sequence_length=target_sequence_length, + instruction=instruction, + ) + + adapter_artifact = kfp.dsl.importer( + artifact_uri=rl_model.outputs['output_adapter_path'], + artifact_class=kfp.dsl.Artifact, + ) + regional_endpoint = function_based.resolve_regional_endpoint( + upload_location=upload_location + ) + display_name = function_based.resolve_model_display_name( + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + model_display_name=model_display_name, + ) + upload_model = function_based.resolve_upload_model( + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ] + ) + upload_task = upload_llm_model.upload_llm_model( + project=_placeholders.PROJECT_ID_PLACEHOLDER, + location=upload_location, + regional_endpoint=regional_endpoint.output, + artifact_uri=adapter_artifact.output, + model_display_name=display_name.output, + model_reference_name='text-bison@001', + upload_model=upload_model.output, + ).set_env_variable( + name='VERTEX_AI_PIPELINES_RUN_LABELS', + value=json.dumps({'tune-type': 'rlhf'}), + ) + deploy_model = function_based.resolve_deploy_model( + deploy_model=deploy_model, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + ) + deploy_task = deploy_llm_model.create_endpoint_and_deploy_model( + project=_placeholders.PROJECT_ID_PLACEHOLDER, + location=upload_location, + model_resource_name=upload_task.outputs['model_resource_name'], + display_name=display_name.output, + regional_endpoint=regional_endpoint.output, + deploy_model=deploy_model.output, + ) + + return PipelineOutput( + model_resource_name=upload_task.outputs['model_resource_name'], + endpoint_resource_name=deploy_task.outputs['endpoint_resource_name'], + ) From 9184062f110d49dbfb5d4862bd3af9bcf7fded76 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 07:38:14 -0700 Subject: [PATCH 110/253] docs(components): add LLM docs to GCPC readthedocs PiperOrigin-RevId: 558570554 --- components/google-cloud/docs/source/api/preview/index.rst | 1 + components/google-cloud/docs/source/api/preview/llm.rst | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 components/google-cloud/docs/source/api/preview/llm.rst diff --git a/components/google-cloud/docs/source/api/preview/index.rst b/components/google-cloud/docs/source/api/preview/index.rst index 171ef3d672..1b5072b589 100644 --- a/components/google-cloud/docs/source/api/preview/index.rst +++ b/components/google-cloud/docs/source/api/preview/index.rst @@ -6,4 +6,5 @@ Preview Components automl/index dataflow + llm model_evaluation \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/preview/llm.rst b/components/google-cloud/docs/source/api/preview/llm.rst new file mode 100644 index 0000000000..e68c374216 --- /dev/null +++ b/components/google-cloud/docs/source/api/preview/llm.rst @@ -0,0 +1,4 @@ +LLM +========================== + +.. automodule:: preview.llm \ No newline at end of file From 2451b510703f17e18456e48cbdbc0e8bc18d05db Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 08:09:01 -0700 Subject: [PATCH 111/253] chore(components): use type generics from typing module in GCPC PiperOrigin-RevId: 558574225 --- .../_implementation/llm/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py index b22c4051f1..ab6474d26d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py @@ -13,7 +13,7 @@ # limitations under the License. """Utility functions used to create custom Kubeflow components.""" import os -from typing import Any +from typing import Any, Dict, List from google_cloud_pipeline_components._implementation.llm import env import kfp @@ -24,10 +24,10 @@ def build_payload( display_name: str, machine_type: str, image_uri: str, - args: list[str], + args: List[str], accelerator_type: str = '', accelerator_count: int = 0, -) -> dict[str, Any]: +) -> Dict[str, Any]: """Generates payload for a custom training job. Args: From 7d282b91f48106a68b6b980e6a2c94c26d3ac722 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 09:13:19 -0700 Subject: [PATCH 112/253] chore(components): reorganize gcpc llm components PiperOrigin-RevId: 558581292 --- .../preview/llm/__init__.py | 4 ++-- .../preview/llm/infer/__init__.py | 0 .../llm/{infer_pipeline.py => infer/component.py} | 0 .../preview/llm/rlhf/__init__.py | 13 +++++++++++++ .../llm/{rlhf_pipeline.py => rlhf/component.py} | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/__init__.py rename components/google-cloud/google_cloud_pipeline_components/preview/llm/{infer_pipeline.py => infer/component.py} (100%) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/__init__.py rename components/google-cloud/google_cloud_pipeline_components/preview/llm/{rlhf_pipeline.py => rlhf/component.py} (99%) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py index 47592e6fad..e35f70ef04 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/__init__.py @@ -13,8 +13,8 @@ # limitations under the License. """Large-language model preview components.""" -from google_cloud_pipeline_components.preview.llm.infer_pipeline import infer_pipeline -from google_cloud_pipeline_components.preview.llm.rlhf_pipeline import rlhf_pipeline +from google_cloud_pipeline_components.preview.llm.infer.component import infer_pipeline +from google_cloud_pipeline_components.preview.llm.rlhf.component import rlhf_pipeline __all__ = [ 'infer_pipeline', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py similarity index 100% rename from components/google-cloud/google_cloud_pipeline_components/preview/llm/infer_pipeline.py rename to components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/__init__.py new file mode 100644 index 0000000000..aa8704bef8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py similarity index 99% rename from components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py rename to components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index 1dc94103c5..79bff1ce97 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -25,7 +25,7 @@ from google_cloud_pipeline_components._implementation.llm import reinforcer from google_cloud_pipeline_components._implementation.llm import reward_model_trainer from google_cloud_pipeline_components._implementation.llm import upload_llm_model -from google_cloud_pipeline_components.preview.llm import infer_pipeline +from google_cloud_pipeline_components.preview.llm.infer.component import infer_pipeline import kfp From 59aa41d78ad32d39c301bdaa593800224552d0ba Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 10:03:19 -0700 Subject: [PATCH 113/253] chore(components): fix infer_pipeline invocation PiperOrigin-RevId: 558586379 --- .../preview/llm/rlhf/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index 79bff1ce97..2c104f6fd2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -25,7 +25,7 @@ from google_cloud_pipeline_components._implementation.llm import reinforcer from google_cloud_pipeline_components._implementation.llm import reward_model_trainer from google_cloud_pipeline_components._implementation.llm import upload_llm_model -from google_cloud_pipeline_components.preview.llm.infer.component import infer_pipeline +from google_cloud_pipeline_components.preview.llm.infer import component import kfp @@ -239,7 +239,7 @@ def rlhf_pipeline( with kfp.dsl.Condition( should_perform_inference.output == True, name='Perform Inference' # pylint: disable=singleton-comparison ): - infer_pipeline.infer_pipeline( + component.infer_pipeline( project=project, location=location, large_model_reference=large_model_reference, From 4e10c5838e108bf22ebdce367ca057732c89b989 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 10:07:53 -0700 Subject: [PATCH 114/253] chore(components): GCPC 2.3.0 release PiperOrigin-RevId: 558586924 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 6 ++++++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 01a4d3e13f..89cc16dc10 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.2.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.3.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index d8fb87a929..0a360f2dc9 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,11 @@ ## Upcoming release +## Release 2.3.0 +* Add `preview.llm.infer_pipeline` and `preview.llm.rlhf_pipeline` +* Add `automl_tabular_tabnet_trainer` and `automl_tabular_wide_and_deep_trainer` to `preview.automl.tabular` and `v1.automl.tabular` +* Minor feature additions to AutoML components +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.2.0 * Add `preview.model_evaluation.evaluation_llm_classification_pipeline.evaluation_llm_classification_pipeline` * Change AutoML Vision Error Analysis pipeline names (`v1.model_evaluation.vision_model_error_analysis_pipeline' and 'v1.model_evaluation.evaluated_annotation_pipeline') diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index 38a3204abb..5615ee72d8 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.3.0", + "title": "2.3.0", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.2.0", "title": "2.2.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 14b0d50a81..4cfe937969 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.2.0" +__version__ = "2.3.0" From 5b59e4a76fa4a086ebb4e6b590557bdd72e86cb5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Sun, 20 Aug 2023 10:35:35 -0700 Subject: [PATCH 115/253] chore(components): add `__init__.py` file to gcpc `_implementation/llm` PiperOrigin-RevId: 558589929 --- .../_implementation/llm/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/__init__.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/__init__.py new file mode 100644 index 0000000000..aa8704bef8 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. From b086020a249bd7c99ceaf54b6c8d4535f9f73df0 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 21 Aug 2023 16:08:35 -0400 Subject: [PATCH 116/253] feat(deployment): add option to deploy mysql in KFP standalone (#9855) * add option to deploy mysql * fix deployment errors * remove temp code in development * keep mysql deployment path same as before * change the generic folder * manifest error * revert default cache path * address comments --- .../kustomize/base/cache/kustomization.yaml | 2 +- .../cache/postgresql/cache-deployment.yaml | 97 +++++++++++ .../base/cache/postgresql/kustomization.yaml | 13 ++ .../base/installs/generic/kustomization.yaml | 2 +- .../generic/pipeline-install-config.yaml | 7 +- .../generic/postgres/kustomization.yaml | 48 ++++++ .../installs/generic/postgres/params.yaml | 10 ++ .../postgres/pipeline-install-config.yaml | 95 +++++++++++ .../generic/postgres/postgres-secret.yaml | 7 + .../ml-pipeline-apiserver-deployment.yaml | 34 ++++ .../base/pipeline/postgres/kustomization.yaml | 50 ++++++ .../ml-pipeline-apiserver-deployment.yaml | 161 ++++++++++++++++++ .../env/dev/postgresql/kustomization.yaml | 19 +++ .../kustomization.yaml | 19 +++ 14 files changed, 560 insertions(+), 4 deletions(-) create mode 100644 manifests/kustomize/base/cache/postgresql/cache-deployment.yaml create mode 100644 manifests/kustomize/base/cache/postgresql/kustomization.yaml create mode 100644 manifests/kustomize/base/installs/generic/postgres/kustomization.yaml create mode 100644 manifests/kustomize/base/installs/generic/postgres/params.yaml create mode 100644 manifests/kustomize/base/installs/generic/postgres/pipeline-install-config.yaml create mode 100644 manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml create mode 100644 manifests/kustomize/base/pipeline/postgres/kustomization.yaml create mode 100644 manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml create mode 100644 manifests/kustomize/env/dev/postgresql/kustomization.yaml create mode 100644 manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml index 56c40afbe4..2c2001ed0b 100644 --- a/manifests/kustomize/base/cache/kustomization.yaml +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -2,10 +2,10 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - cache-deployment.yaml - - cache-service.yaml - cache-role.yaml - cache-rolebinding.yaml - cache-sa.yaml + - cache-service.yaml commonLabels: app: cache-server images: diff --git a/manifests/kustomize/base/cache/postgresql/cache-deployment.yaml b/manifests/kustomize/base/cache/postgresql/cache-deployment.yaml new file mode 100644 index 0000000000..8e6cc316ce --- /dev/null +++ b/manifests/kustomize/base/cache/postgresql/cache-deployment.yaml @@ -0,0 +1,97 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cache-server + labels: + app: cache-server +spec: + replicas: 1 + selector: + matchLabels: + app: cache-server + template: + metadata: + labels: + app: cache-server + spec: + containers: + - name: server + image: gcr.io/ml-pipeline/cache-server:dummy + env: + - name: DEFAULT_CACHE_STALENESS + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: DEFAULT_CACHE_STALENESS + - name: MAXIMUM_CACHE_STALENESS + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: MAXIMUM_CACHE_STALENESS + - name: CACHE_IMAGE + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: cacheImage + - name: CACHE_NODE_RESTRICTIONS + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: cacheNodeRestrictions + - name: DBCONFIG_DRIVER + value: pgx + - name: DBCONFIG_DB_NAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: cacheDb + - name: DBCONFIG_HOST_NAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresDbHost + - name: DBCONFIG_PORT + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresDbPort + - name: DBCONFIG_USER + valueFrom: + secretKeyRef: + name: postgres-secret + key: username + - name: DBCONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: password + - name: NAMESPACE_TO_WATCH + valueFrom: + fieldRef: + fieldPath: metadata.namespace + # If you update WEBHOOK_PORT, also change the value of the + # containerPort "webhook-api" to match. + - name: WEBHOOK_PORT + value: "8443" + args: ["--db_driver=$(DBCONFIG_DRIVER)", + "--db_host=$(DBCONFIG_HOST_NAME)", + "--db_port=$(DBCONFIG_PORT)", + "--db_name=$(DBCONFIG_DB_NAME)", + "--db_user=$(DBCONFIG_USER)", + "--db_password=$(DBCONFIG_PASSWORD)", + "--namespace_to_watch=$(NAMESPACE_TO_WATCH)", + "--listen_port=$(WEBHOOK_PORT)", + ] + imagePullPolicy: Always + ports: + - containerPort: 8443 + name: webhook-api + volumeMounts: + - name: webhook-tls-certs + mountPath: /etc/webhook/certs + readOnly: true + volumes: + - name: webhook-tls-certs + secret: + secretName: webhook-server-tls + serviceAccountName: kubeflow-pipelines-cache diff --git a/manifests/kustomize/base/cache/postgresql/kustomization.yaml b/manifests/kustomize/base/cache/postgresql/kustomization.yaml new file mode 100644 index 0000000000..89adcab9d9 --- /dev/null +++ b/manifests/kustomize/base/cache/postgresql/kustomization.yaml @@ -0,0 +1,13 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - cache-deployment.yaml + - ../cache-role.yaml + - ../cache-rolebinding.yaml + - ../cache-sa.yaml + - ../cache-service.yaml +commonLabels: + app: cache-server +images: + - name: gcr.io/ml-pipeline/cache-server + newTag: 2.0.0 diff --git a/manifests/kustomize/base/installs/generic/kustomization.yaml b/manifests/kustomize/base/installs/generic/kustomization.yaml index a3786b2077..4ef5fd2471 100644 --- a/manifests/kustomize/base/installs/generic/kustomization.yaml +++ b/manifests/kustomize/base/installs/generic/kustomization.yaml @@ -45,4 +45,4 @@ vars: fieldref: fieldpath: data.defaultPipelineRoot configurations: -- params.yaml + - params.yaml diff --git a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml index 47ce6b687d..cd50fe5dce 100644 --- a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml +++ b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml @@ -12,8 +12,11 @@ data: namespace: `kubectl rollout restart deployment -n `. appName: pipeline appVersion: 2.0.1 - dbHost: mysql - dbPort: "3306" + dbHost: mysql # relic to be removed after release + dbPort: "3306" # relic to be removed after release + dbType: mysql + mysqlHost: mysql + mysqlPort: "3306" mlmdDb: metadb cacheDb: cachedb pipelineDb: mlpipeline diff --git a/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml b/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml new file mode 100644 index 0000000000..bd435cc712 --- /dev/null +++ b/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml @@ -0,0 +1,48 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow +bases: +- ../../../pipeline/postgres +- ../../../cache/postgres +- ../../../cache-deployer +resources: +- pipeline-install-config.yaml +- postgres-secret.yaml +vars: +- name: kfp-namespace + objref: + kind: Deployment + apiVersion: apps/v1 + name: ml-pipeline + fieldref: + fieldpath: metadata.namespace +- name: kfp-app-name + objref: + kind: ConfigMap + name: pipeline-install-config + apiVersion: v1 + fieldref: + fieldpath: data.appName +- name: kfp-app-version + objref: + kind: ConfigMap + name: pipeline-install-config + apiVersion: v1 + fieldref: + fieldpath: data.appVersion +- name: kfp-artifact-bucket-name + objref: + kind: ConfigMap + name: pipeline-install-config + apiVersion: v1 + fieldref: + fieldpath: data.bucketName +- name: kfp-default-pipeline-root + objref: + kind: ConfigMap + name: pipeline-install-config + apiVersion: v1 + fieldref: + fieldpath: data.defaultPipelineRoot +configurations: + - params.yaml diff --git a/manifests/kustomize/base/installs/generic/postgres/params.yaml b/manifests/kustomize/base/installs/generic/postgres/params.yaml new file mode 100644 index 0000000000..9b8a7efd52 --- /dev/null +++ b/manifests/kustomize/base/installs/generic/postgres/params.yaml @@ -0,0 +1,10 @@ +# Allow Kustomize var to replace following fields. +varReference: +- path: data/config + kind: ConfigMap +- path: data/defaultPipelineRoot + kind: ConfigMap +- path: metadata/name + kind: Application +- path: spec/descriptor/version + kind: Application diff --git a/manifests/kustomize/base/installs/generic/postgres/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/postgres/pipeline-install-config.yaml new file mode 100644 index 0000000000..f5d87c6797 --- /dev/null +++ b/manifests/kustomize/base/installs/generic/postgres/pipeline-install-config.yaml @@ -0,0 +1,95 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: pipeline-install-config +data: + warning: | + 1. Do not use kubectl to edit this configmap, because some values are used + during kustomize build. Instead, change the configmap and apply the entire + kustomize manifests again. + 2. After updating the configmap, some deployments may need to be restarted + until the changes take effect. A quick way to restart all deployments in a + namespace: `kubectl rollout restart deployment -n `. + appName: pipeline + appVersion: 2.0.0 + dbHost: postgres # relic to be removed after release + dbPort: "5432" # relic to be removed after release + dbType: postgres + postgresHost: postgres + postgresPort: "5432" + mlmdDb: metadb + cacheDb: cachedb + pipelineDb: mlpipeline + bucketName: mlpipeline + ## defaultPipelineRoot: Optional. Default pipeline root in v2 compatible mode. + ## https://www.kubeflow.org/docs/components/pipelines/sdk/v2/v2-compatibility/ + ## + ## If the field is not set, kfp-launcher configmaps won't be created and + ## v2 compatible mode defaults to minio://mlpipeline/v2/artifacts as pipeline + ## root. + ## + ## When not in Kubeflow Pipelines multi-user mode, the config works as you + ## would normally expect. + ## + ## In Kubeflow Pipelines multi-user mode, the config creates default + ## kfp-launcher configmaps in each user's namespace. Users can edit the + ## kfp-launcher configmap's defaultPipelineRoot field afterwards to configure + ## namespace-specific default pipeline root. The namespace specific changes in + ## kfp-launcher configmap won't be overridden by pipeline-install-config. + ## + ## Caveat: when you update the config from a non-empty value, only new + ## namespaces get the updated config by default. Owners of existing namespaces + ## must delete the kfp-launcher configmap to get the new default config value. + ## + ## Examples: + ## defaultPipelineRoot: minio://mlpipeline/v2/artifacts + ## defaultPipelineRoot: gs://your-bucket/path/to/artifacts + ## defaultPipelineRoot: s3://your-bucket/path/to/artifacts + ## + ## V2 Compatible Mode Feature stage: + ## [Beta](https://github.com/kubeflow/pipelines/blob/master/docs/release/feature-stages.md#beta) + defaultPipelineRoot: "" + ## autoUpdatePipelineDefaultVersion: States if the pipeline version + ## should be updated by defult for a versioned pipeline or not when a new + ## version is uploaded. This sets the deployment wide definition. + autoUpdatePipelineDefaultVersion: "true" + ## cronScheduleTimezone: States the timezone which should be used for + ## the cron scheduler. If not specified the local timezone of the + ## cluster will be used. Valid values are UTC, Local or values according to + ## the IANA Time Zone database, such as "America/New_York" and "Asia/Shanghai". + ## Feature stage: + ## [Alpha](https://github.com/kubeflow/pipelines/blob/master/docs/release/feature-stages.md#alpha) + cronScheduleTimezone: "UTC" + ## cacheImage is the image that the mutating webhook will use to patch + ## cached steps with. Will be used to echo a message announcing that + ## the cached step result will be used. If not set it will default to + ## 'gcr.io/google-containers/busybox' + cacheImage: "gcr.io/google-containers/busybox" + ## cacheNodeRestrictions the dummy container runing if output is cached + ## will run with the same affinity and node selector as the default pipeline + ## step. This is defaulted to 'false' to allow the pod to be scheduled on + ## any node and avoid defaulting to specific nodes. Allowed values are: + ## 'false' and 'true'. + cacheNodeRestrictions: "false" + ## MAXIMUM_CACHE_STALENESS configures caching according to + ## https://www.kubeflow.org/docs/components/pipelines/overview/caching/ and + ## https://www.kubeflow.org/docs/components/pipelines/overview/caching-v2/. + ## Larger than MAXIMUM_CACHE_STALENESS per pipeline user set values are + ## reduced to MAXIMUM_CACHE_STALENESS. + ## The administrator of the storage backend can rely on it to delete old cache + ## artifacts. + MAXIMUM_CACHE_STALENESS: "" + ## MAXIMUM_CACHE_STALENESS: "P30D" + ## DEFAULT_CACHE_STALENESS configures caching according to + ## https://www.kubeflow.org/docs/components/pipelines/overview/caching/ and + ## https://www.kubeflow.org/docs/components/pipelines/overview/caching-v2/. + ## This value is used if the user did not set a value in the pipeline. + DEFAULT_CACHE_STALENESS: "" + ## DEFAULT_CACHE_STALENESS: "P7D" + ## ConMaxLifeTime will set the connection max lifetime for MySQL + ## this is very important to setup when using external databases. + ## See this issue for more details: https://github.com/kubeflow/pipelines/issues/5329 + ## Note: this value should be a string that can be parsed by `time.ParseDuration`. + ## If this value doesn't include a unit abbreviation, the units will be assumed + ## to be nanoseconds. + ConMaxLifeTime: "120s" diff --git a/manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml b/manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml new file mode 100644 index 0000000000..734ce0b5f5 --- /dev/null +++ b/manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml @@ -0,0 +1,7 @@ +kind: Secret +apiVersion: v1 +metadata: + name: postgres-secret +stringData: + username: user + password: "password" diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml index fea2381110..d296a72b83 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-apiserver-deployment.yaml @@ -33,6 +33,7 @@ spec: configMapKeyRef: name: pipeline-install-config key: bucketName + # relic variables - name: DBCONFIG_USER valueFrom: secretKeyRef: @@ -58,11 +59,44 @@ spec: configMapKeyRef: name: pipeline-install-config key: dbPort + # end of relic variables - name: DBCONFIG_CONMAXLIFETIME valueFrom: configMapKeyRef: name: pipeline-install-config key: ConMaxLifeTime + - name: DB_DRIVER_NAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: dbType + # MySQL Config + - name: DBCONFIG_MYSQLCONFIG_USER + valueFrom: + secretKeyRef: + name: mysql-secret + key: username + - name: DBCONFIG_MYSQLCONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: mysql-secret + key: password + - name: DBCONFIG_MYSQLCONFIG_DBNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: pipelineDb + - name: DBCONFIG_MYSQLCONFIG_HOST + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: mysqlHost + - name: DBCONFIG_MYSQLCONFIG_PORT + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: mysqlPort + # end of MySQL Config - name: OBJECTSTORECONFIG_ACCESSKEY valueFrom: secretKeyRef: diff --git a/manifests/kustomize/base/pipeline/postgres/kustomization.yaml b/manifests/kustomize/base/pipeline/postgres/kustomization.yaml new file mode 100644 index 0000000000..08e991d972 --- /dev/null +++ b/manifests/kustomize/base/pipeline/postgres/kustomization.yaml @@ -0,0 +1,50 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +bases: + - ../metadata-writer +resources: + - ml-pipeline-apiserver-deployment.yaml + - ../ml-pipeline-apiserver-role.yaml + - ../ml-pipeline-apiserver-rolebinding.yaml + - ../ml-pipeline-apiserver-sa.yaml + - ../ml-pipeline-apiserver-service.yaml + - ../ml-pipeline-persistenceagent-deployment.yaml + - ../ml-pipeline-persistenceagent-role.yaml + - ../ml-pipeline-persistenceagent-rolebinding.yaml + - ../ml-pipeline-persistenceagent-sa.yaml + - ../ml-pipeline-scheduledworkflow-deployment.yaml + - ../ml-pipeline-scheduledworkflow-role.yaml + - ../ml-pipeline-scheduledworkflow-rolebinding.yaml + - ../ml-pipeline-scheduledworkflow-sa.yaml + - ../ml-pipeline-ui-deployment.yaml + - ../ml-pipeline-ui-configmap.yaml + - ../ml-pipeline-ui-role.yaml + - ../ml-pipeline-ui-rolebinding.yaml + - ../ml-pipeline-ui-sa.yaml + - ../ml-pipeline-ui-service.yaml + - ../ml-pipeline-viewer-crd-role.yaml + - ../ml-pipeline-viewer-crd-rolebinding.yaml + - ../ml-pipeline-viewer-crd-deployment.yaml + - ../ml-pipeline-viewer-crd-sa.yaml + - ../ml-pipeline-visualization-deployment.yaml + - ../ml-pipeline-visualization-sa.yaml + - ../ml-pipeline-visualization-service.yaml + - ../pipeline-runner-role.yaml + - ../pipeline-runner-rolebinding.yaml + - ../pipeline-runner-sa.yaml + - ../container-builder-sa.yaml + - ../viewer-sa.yaml + - ../kfp-launcher-configmap.yaml +images: + - name: gcr.io/ml-pipeline/api-server + newTag: 2.0.0 + - name: gcr.io/ml-pipeline/persistenceagent + newTag: 2.0.0 + - name: gcr.io/ml-pipeline/scheduledworkflow + newTag: 2.0.0 + - name: gcr.io/ml-pipeline/frontend + newTag: 2.0.0 + - name: gcr.io/ml-pipeline/viewer-crd-controller + newTag: 2.0.0 + - name: gcr.io/ml-pipeline/visualization-server + newTag: 2.0.0 diff --git a/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml b/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml new file mode 100644 index 0000000000..0cbd4bd729 --- /dev/null +++ b/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml @@ -0,0 +1,161 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: ml-pipeline + name: ml-pipeline +spec: + selector: + matchLabels: + app: ml-pipeline + template: + metadata: + labels: + app: ml-pipeline + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + spec: + containers: + - env: + - name: AUTO_UPDATE_PIPELINE_DEFAULT_VERSION + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: autoUpdatePipelineDefaultVersion + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: OBJECTSTORECONFIG_SECURE + value: "false" + - name: OBJECTSTORECONFIG_BUCKETNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: bucketName + # relic variables + - name: DBCONFIG_USER + valueFrom: + secretKeyRef: + name: mysql-secret + key: username + - name: DBCONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: mysql-secret + key: password + - name: DBCONFIG_DBNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: pipelineDb + - name: DBCONFIG_HOST + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: dbHost + - name: DBCONFIG_PORT + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: dbPort + # end of relic variables + - name: DBCONFIG_CONMAXLIFETIME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: ConMaxLifeTime + - name: DB_DRIVER_NAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: dbType + # PostgreSQL Config + - name: DBCONFIG_POSTGRESQLCONFIG_USER + valueFrom: + secretKeyRef: + name: postgres-secret + key: username + - name: DBCONFIG_POSTGRESQLCONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: password + - name: DBCONFIG_POSTGRESQLCONFIG_DBNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: pipelineDb + - name: DBCONFIG_POSTGRESQLCONFIG_HOST + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresHost + - name: DBCONFIG_POSTGRESQLCONFIG_PORT + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresPort + # end of PostgreSQL variables + - name: OBJECTSTORECONFIG_ACCESSKEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: accesskey + - name: OBJECTSTORECONFIG_SECRETACCESSKEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: secretkey + image: gcr.io/ml-pipeline/api-server:dummy + imagePullPolicy: IfNotPresent + name: ml-pipeline-api-server + ports: + - name: http + containerPort: 8888 + - name: grpc + containerPort: 8887 + readinessProbe: + exec: + command: + - wget + - -q # quiet + - -S # show server response + - -O + - "-" # Redirect output to stdout + - http://localhost:8888/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + livenessProbe: + exec: + command: + - wget + - -q # quiet + - -S # show server response + - -O + - "-" # Redirect output to stdout + - http://localhost:8888/apis/v1beta1/healthz + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 2 + # This startup probe provides up to a 60 second grace window before the + # liveness probe takes over to accomodate the occasional database + # migration. + startupProbe: + exec: + command: + - wget + - -q # quiet + - -S # show server response + - -O + - "-" # Redirect output to stdout + - http://localhost:8888/apis/v1beta1/healthz + failureThreshold: 12 + periodSeconds: 5 + timeoutSeconds: 2 + resources: + requests: + cpu: 250m + memory: 500Mi + serviceAccountName: ml-pipeline diff --git a/manifests/kustomize/env/dev/postgresql/kustomization.yaml b/manifests/kustomize/env/dev/postgresql/kustomization.yaml new file mode 100644 index 0000000000..25aec96ef0 --- /dev/null +++ b/manifests/kustomize/env/dev/postgresql/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + # Application controller is used to provide Google Cloud Console integration. + - ../../../third-party/application + - ../../../base/application + - ../../platform-agnostic-postgresql + - ../../gcp/inverse-proxy + +# Identifier for application manager to apply ownerReference. +# The ownerReference ensures the resources get garbage collected +# when application is deleted. +commonLabels: + application-crd-id: kubeflow-pipelines + +# !!! If you want to customize the namespace, +# please refer sample/cluster-scoped-resources to update the namespace for cluster-scoped-resources +namespace: kubeflow diff --git a/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml new file mode 100644 index 0000000000..99e03a3e6c --- /dev/null +++ b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - ../../base/installs/generic/postgres + - ../../base/metadata/base + - ../../third-party/argo/installs/namespace + - ../../third-party/minio/base + - ../../third-party/postgresql/base + +# Identifier for application manager to apply ownerReference. +# The ownerReference ensures the resources get garbage collected +# when application is deleted. +commonLabels: + application-crd-id: kubeflow-pipelines + +# !!! If you want to customize the namespace, +# please also update base/cache-deployer/cluster-scoped/cache-deployer-clusterrolebinding.yaml +namespace: kubeflow From 0fed207e40535825a74c8b228ad2e9fce87b0a71 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Mon, 21 Aug 2023 13:08:41 -0700 Subject: [PATCH 117/253] fix(manifests): Update persistence agent manifests in marketplace helm chart (#9908) * fix(manifests): Update persistence agent manifests in marketplace helm chart. * snapshot * indent * schema.yaml --- .../templates/pipeline.yaml | 24 +++++++++++++++++ manifests/gcp_marketplace/schema.yaml | 3 +++ .../gcp_marketplace/test/snapshot-base.yaml | 26 ++++++++++++++++++- .../test/snapshot-emissary.yaml | 26 ++++++++++++++++++- ...apshot-managed-storage-with-db-prefix.yaml | 26 ++++++++++++++++++- .../test/snapshot-managed-storage.yaml | 26 ++++++++++++++++++- 6 files changed, 127 insertions(+), 4 deletions(-) diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml index 92e961b525..3807b02fb8 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/pipeline.yaml @@ -149,6 +149,19 @@ rules: - get - list - watch + - apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report + - apiGroups: + - '' + resources: + - namespaces + verbs: + - get - apiGroups: - '' resources: @@ -560,7 +573,18 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org --- apiVersion: apps/v1 kind: Deployment diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index 46cd2b385e..21305e19de 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -121,6 +121,9 @@ x-google-marketplace: - apiGroups: ['kubeflow.org'] resources: ['*'] verbs: ['*'] + - apiGroups: ['pipelines.kubeflow.org'] + resources: ['*'] + verbs: ['*'] - apiGroups: [''] resources: ['configmaps', 'events', 'pods/log', 'persistentvolumes', 'persistentvolumeclaims', 'persistentvolumeclaims/finalizers', 'pods', 'pods/exec'] verbs: ['*'] diff --git a/manifests/gcp_marketplace/test/snapshot-base.yaml b/manifests/gcp_marketplace/test/snapshot-base.yaml index 7769633694..426ac6b01f 100644 --- a/manifests/gcp_marketplace/test/snapshot-base.yaml +++ b/manifests/gcp_marketplace/test/snapshot-base.yaml @@ -1353,6 +1353,19 @@ rules: - get - list - watch + - apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report + - apiGroups: + - '' + resources: + - namespaces + verbs: + - get - apiGroups: - '' resources: @@ -2410,7 +2423,18 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org --- # Source: kubeflow-pipelines/templates/pipeline.yaml apiVersion: apps/v1 @@ -2714,7 +2738,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.0-beta.1 + version: 2.0.1 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/test/snapshot-emissary.yaml b/manifests/gcp_marketplace/test/snapshot-emissary.yaml index 7769633694..426ac6b01f 100644 --- a/manifests/gcp_marketplace/test/snapshot-emissary.yaml +++ b/manifests/gcp_marketplace/test/snapshot-emissary.yaml @@ -1353,6 +1353,19 @@ rules: - get - list - watch + - apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report + - apiGroups: + - '' + resources: + - namespaces + verbs: + - get - apiGroups: - '' resources: @@ -2410,7 +2423,18 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org --- # Source: kubeflow-pipelines/templates/pipeline.yaml apiVersion: apps/v1 @@ -2714,7 +2738,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.0-beta.1 + version: 2.0.1 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/test/snapshot-managed-storage-with-db-prefix.yaml b/manifests/gcp_marketplace/test/snapshot-managed-storage-with-db-prefix.yaml index 8ade0429dd..488067d4a0 100644 --- a/manifests/gcp_marketplace/test/snapshot-managed-storage-with-db-prefix.yaml +++ b/manifests/gcp_marketplace/test/snapshot-managed-storage-with-db-prefix.yaml @@ -1352,6 +1352,19 @@ rules: - get - list - watch + - apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report + - apiGroups: + - '' + resources: + - namespaces + verbs: + - get - apiGroups: - '' resources: @@ -2451,7 +2464,18 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org --- # Source: kubeflow-pipelines/templates/pipeline.yaml apiVersion: apps/v1 @@ -2764,7 +2788,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.0-beta.1 + version: 2.0.1 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/test/snapshot-managed-storage.yaml b/manifests/gcp_marketplace/test/snapshot-managed-storage.yaml index a36ef81590..f6abd1a557 100644 --- a/manifests/gcp_marketplace/test/snapshot-managed-storage.yaml +++ b/manifests/gcp_marketplace/test/snapshot-managed-storage.yaml @@ -1352,6 +1352,19 @@ rules: - get - list - watch + - apiGroups: + - pipelines.kubeflow.org + resources: + - scheduledworkflows + - workflows + verbs: + - report + - apiGroups: + - '' + resources: + - namespaces + verbs: + - get - apiGroups: - '' resources: @@ -2451,7 +2464,18 @@ spec: requests: cpu: 120m memory: 500Mi + volumeMounts: + - mountPath: /var/run/secrets/kubeflow/tokens + name: persistenceagent-sa-token serviceAccountName: ml-pipeline-persistenceagent + volumes: + - name: persistenceagent-sa-token + projected: + sources: + - serviceAccountToken: + path: persistenceagent-sa-token + expirationSeconds: 3600 + audience: pipelines.kubeflow.org --- # Source: kubeflow-pipelines/templates/pipeline.yaml apiVersion: apps/v1 @@ -2764,7 +2788,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.0-beta.1 + version: 2.0.1 description: |- Reusable end-to-end ML workflow maintainers: From e8fefc90b180bffb2c527ee0ce977e1306cf141f Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:45:08 -0700 Subject: [PATCH 118/253] chore(frontend): Update caniuse-lite in package-lock.json (#9857) --- frontend/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index d259ae6ec0..681d067591 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -14887,9 +14887,9 @@ } }, "caniuse-lite": { - "version": "1.0.30001373", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001373.tgz", - "integrity": "sha512-pJYArGHrPp3TUqQzFYRmP/lwJlj8RCbVe3Gd3eJQkAV8SAC6b19XS9BjMvRdvaS8RMkaTN8ZhoHP6S1y8zzwEQ==", + "version": "1.0.30001519", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001519.tgz", + "integrity": "sha512-0QHgqR+Jv4bxHMp8kZ1Kn8CH55OikjKJ6JmKkZYP1F3D7w+lnFXF70nG5eNfsZS89jadi5Ywy5UCSKLAglIRkg==", "dev": true }, "capture-exit": { From eaec515812f9902cf67a648f88d8451540001b59 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Tue, 22 Aug 2023 10:18:09 -0700 Subject: [PATCH 119/253] chore(mlmd): Upgrade ML Metadata to 1.14.0. (#9856) * chore(mlmd): Upgrade ML Metadata to 1.14.0. * fix requirements, skip visualization. * go mod tidy * mlmd new enum in frontend * mlmd license * mlmd license again, using the failure test diff * again --- .cloudbuild.yaml | 2 +- .release.cloudbuild.yaml | 10 +- backend/metadata_writer/requirements.in | 2 +- backend/src/v2/test/requirements.txt | 2 +- backend/third_party_licenses/apiserver.csv | 2 +- frontend/README.md | 14 + frontend/src/mlmd/Utils.tsx | 1 + frontend/src/pages/ExecutionDetails.tsx | 1 + .../ml_metadata/proto/metadata_store_pb.d.ts | 236 + .../ml_metadata/proto/metadata_store_pb.js | 5338 +++-- .../metadata_store_service_grpc_web_pb.d.ts | 120 + .../metadata_store_service_grpc_web_pb.js | 612 + .../proto/metadata_store_service_pb.d.ts | 691 + .../proto/metadata_store_service_pb.js | 19221 ++++++++++------ go.mod | 3 +- go.sum | 7 +- hack/update-all-requirements.sh | 1 - .../base/metadata-grpc-deployment.yaml | 2 +- test/tag_for_hosted.sh | 4 +- third_party/ml-metadata/README.md | 8 + third_party/ml-metadata/VERSION | 2 +- third_party/ml-metadata/go.mod | 1 - third_party/ml-metadata/go.sum | 6 - .../go/ml_metadata/metadata_store.pb.go | 2695 ++- .../ml_metadata/metadata_store_service.pb.go | 7162 ++++-- .../metadata_store_service_grpc.pb.go | 506 +- .../ml_metadata/proto/metadata_store.proto | 411 +- .../proto/metadata_store_service.proto | 537 +- third_party/ml-metadata/update_version.sh | 4 +- 29 files changed, 26167 insertions(+), 11434 deletions(-) diff --git a/.cloudbuild.yaml b/.cloudbuild.yaml index d94f837d59..a2f8a0524a 100644 --- a/.cloudbuild.yaml +++ b/.cloudbuild.yaml @@ -158,7 +158,7 @@ steps: # * manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml # * test/tag_for_hosted.sh - name: 'gcr.io/cloud-builders/docker' - args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0'] + args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0'] id: 'pullMetadataServer' - name: 'gcr.io/cloud-builders/docker' args: ['pull', 'gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance'] diff --git a/.release.cloudbuild.yaml b/.release.cloudbuild.yaml index 0c0eee6707..591c3ed198 100644 --- a/.release.cloudbuild.yaml +++ b/.release.cloudbuild.yaml @@ -259,14 +259,14 @@ steps: # * manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml # * test/tag_for_hosted.sh - name: 'gcr.io/cloud-builders/docker' - args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0'] + args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0'] id: 'pullMetadataServer' - name: 'gcr.io/cloud-builders/docker' - args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0', 'gcr.io/ml-pipeline/google/pipelines/metadataserver:$TAG_NAME'] + args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0', 'gcr.io/ml-pipeline/google/pipelines/metadataserver:$TAG_NAME'] id: 'tagMetadataServerForMarketplace' waitFor: ['pullMetadataServer'] - name: 'gcr.io/cloud-builders/docker' - args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0', 'gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$TAG_NAME'] + args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0', 'gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$TAG_NAME'] id: 'tagMetadataServerForMarketplaceTest' waitFor: ['pullMetadataServer'] - id: 'tagMetadataServerForMarketplaceMajorMinor' @@ -276,8 +276,8 @@ steps: args: - -ceux - | - docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0 gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver) - docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0 gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver) + docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0 gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver) + docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0 gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver) docker push gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver) docker push gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver) diff --git a/backend/metadata_writer/requirements.in b/backend/metadata_writer/requirements.in index 27130c336f..e45d3d7baf 100644 --- a/backend/metadata_writer/requirements.in +++ b/backend/metadata_writer/requirements.in @@ -1,3 +1,3 @@ kubernetes>=8.0.0,<11.0.0 -ml-metadata==1.5.0 +ml-metadata==1.14.0 lru-dict>=1.1.7,<2.0.0 diff --git a/backend/src/v2/test/requirements.txt b/backend/src/v2/test/requirements.txt index 7c9f5ac2b6..6eece4be3a 100644 --- a/backend/src/v2/test/requirements.txt +++ b/backend/src/v2/test/requirements.txt @@ -2,7 +2,7 @@ -e ../../../../sdk/python # TODO(chensun): remove the deprecated dependencies once migrated tests. -r ../../../../sdk/python/requirements-deprecated.txt -ml-metadata==1.5.0 +ml-metadata==1.14.0 minio==7.0.4 google-cloud-storage fire diff --git a/backend/third_party_licenses/apiserver.csv b/backend/third_party_licenses/apiserver.csv index ef4893cbb1..643f932b04 100644 --- a/backend/third_party_licenses/apiserver.csv +++ b/backend/third_party_licenses/apiserver.csv @@ -61,7 +61,7 @@ github.com/klauspost/cpuid,https://github.com/klauspost/cpuid/blob/v1.3.1/LICENS github.com/klauspost/pgzip,https://github.com/klauspost/pgzip/blob/v1.2.5/LICENSE,MIT github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/758c91f76784/api/LICENSE,Apache-2.0 github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0 -github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e78ed557ddcb/third_party/ml-metadata/LICENSE,Apache-2.0 +github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e1f0c010f800/third_party/ml-metadata/LICENSE,Apache-2.0 github.com/lann/builder,https://github.com/lann/builder/blob/47ae307949d0/LICENSE,MIT github.com/lann/ps,https://github.com/lann/ps/blob/62de8c46ede0/LICENSE,MIT github.com/lestrrat-go/strftime,https://github.com/lestrrat-go/strftime/blob/v1.0.4/LICENSE,MIT diff --git a/frontend/README.md b/frontend/README.md index 6880f3e72e..7a3766f01d 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -176,8 +176,22 @@ If a file in [pipelines/third_party/ml-metadata/ml_metadata/proto](third_party/m * Add `protoc` ([download](https://github.com/protocolbuffers/protobuf/releases)) to your system PATH + + ```bash + # Example: + apt install -y protobuf-compiler=3.15.8 + ``` + * Add `protoc-gen-grpc-web` ([download](https://github.com/grpc/grpc-web/releases)) to your system PATH + + ```bash + # Example: + curl -LO https://github.com/grpc/grpc-web/releases/download/1.4.2/protoc-gen-grpc-web-1.4.2-linux-x86_64 + mv protoc-gen-grpc-web-1.4.2-linux-x86_64 /usr/local/bin/protoc-gen-grpc-web + chmod +x /usr/local/bin/protoc-gen-grpc-web + ``` + * Replace `metadata_store.proto` and `metadata_store_service.proto` proto files with target mlmd version by running ```bash diff --git a/frontend/src/mlmd/Utils.tsx b/frontend/src/mlmd/Utils.tsx index 683f162c14..334d7e1f77 100644 --- a/frontend/src/mlmd/Utils.tsx +++ b/frontend/src/mlmd/Utils.tsx @@ -116,4 +116,5 @@ export function getMetadataValue(value?: Value): string | number | Struct | unde case Value.ValueCase.VALUE_NOT_SET: return ''; } + return ''; } diff --git a/frontend/src/pages/ExecutionDetails.tsx b/frontend/src/pages/ExecutionDetails.tsx index de896b69ba..15bec58dd6 100644 --- a/frontend/src/pages/ExecutionDetails.tsx +++ b/frontend/src/pages/ExecutionDetails.tsx @@ -262,6 +262,7 @@ function parseEventsByType( [Event.Type.OUTPUT]: [], [Event.Type.INTERNAL_INPUT]: [], [Event.Type.INTERNAL_OUTPUT]: [], + [Event.Type.PENDING_OUTPUT]: [], }; if (!response) { diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.d.ts b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.d.ts index f109b9df43..e4ec769838 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.d.ts +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.d.ts @@ -1,5 +1,6 @@ import * as jspb from 'google-protobuf' +import * as google_protobuf_any_pb from 'google-protobuf/google/protobuf/any_pb'; import * as google_protobuf_struct_pb from 'google-protobuf/google/protobuf/struct_pb'; import * as google_protobuf_descriptor_pb from 'google-protobuf/google/protobuf/descriptor_pb'; @@ -37,6 +38,14 @@ export class Value extends jspb.Message { hasStructValue(): boolean; clearStructValue(): Value; + getProtoValue(): google_protobuf_any_pb.Any | undefined; + setProtoValue(value?: google_protobuf_any_pb.Any): Value; + hasProtoValue(): boolean; + clearProtoValue(): Value; + + getBoolValue(): boolean; + setBoolValue(value: boolean): Value; + getValueCase(): Value.ValueCase; serializeBinary(): Uint8Array; @@ -53,6 +62,8 @@ export namespace Value { doubleValue: number, stringValue: string, structValue?: google_protobuf_struct_pb.Struct.AsObject, + protoValue?: google_protobuf_any_pb.Any.AsObject, + boolValue: boolean, } export enum ValueCase { @@ -61,6 +72,8 @@ export namespace Value { DOUBLE_VALUE = 2, STRING_VALUE = 3, STRUCT_VALUE = 4, + PROTO_VALUE = 5, + BOOL_VALUE = 6, } } @@ -80,6 +93,9 @@ export class Artifact extends jspb.Message { getUri(): string; setUri(value: string): Artifact; + getExternalId(): string; + setExternalId(value: string): Artifact; + getPropertiesMap(): jspb.Map; clearPropertiesMap(): Artifact; @@ -95,6 +111,11 @@ export class Artifact extends jspb.Message { getLastUpdateTimeSinceEpoch(): number; setLastUpdateTimeSinceEpoch(value: number): Artifact; + getSystemMetadata(): google_protobuf_any_pb.Any | undefined; + setSystemMetadata(value?: google_protobuf_any_pb.Any): Artifact; + hasSystemMetadata(): boolean; + clearSystemMetadata(): Artifact; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): Artifact.AsObject; static toObject(includeInstance: boolean, msg: Artifact): Artifact.AsObject; @@ -110,11 +131,13 @@ export namespace Artifact { typeId: number, type: string, uri: string, + externalId: string, propertiesMap: Array<[string, Value.AsObject]>, customPropertiesMap: Array<[string, Value.AsObject]>, state: Artifact.State, createTimeSinceEpoch: number, lastUpdateTimeSinceEpoch: number, + systemMetadata?: google_protobuf_any_pb.Any.AsObject, } export enum State { @@ -123,6 +146,8 @@ export namespace Artifact { LIVE = 2, MARKED_FOR_DELETION = 3, DELETED = 4, + ABANDONED = 5, + REFERENCE = 6, } } @@ -139,6 +164,9 @@ export class ArtifactType extends jspb.Message { getDescription(): string; setDescription(value: string): ArtifactType; + getExternalId(): string; + setExternalId(value: string): ArtifactType; + getPropertiesMap(): jspb.Map; clearPropertiesMap(): ArtifactType; @@ -159,6 +187,7 @@ export namespace ArtifactType { name: string, version: string, description: string, + externalId: string, propertiesMap: Array<[string, PropertyType]>, baseType: ArtifactType.SystemDefinedBaseType, } @@ -190,6 +219,11 @@ export class Event extends jspb.Message { getMillisecondsSinceEpoch(): number; setMillisecondsSinceEpoch(value: number): Event; + getSystemMetadata(): google_protobuf_any_pb.Any | undefined; + setSystemMetadata(value?: google_protobuf_any_pb.Any): Event; + hasSystemMetadata(): boolean; + clearSystemMetadata(): Event; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): Event.AsObject; static toObject(includeInstance: boolean, msg: Event): Event.AsObject; @@ -205,6 +239,7 @@ export namespace Event { path?: Event.Path.AsObject, type: Event.Type, millisecondsSinceEpoch: number, + systemMetadata?: google_protobuf_any_pb.Any.AsObject, } export class Path extends jspb.Message { @@ -267,6 +302,7 @@ export namespace Event { OUTPUT = 4, INTERNAL_INPUT = 5, INTERNAL_OUTPUT = 6, + PENDING_OUTPUT = 7, } } @@ -283,6 +319,9 @@ export class Execution extends jspb.Message { getType(): string; setType(value: string): Execution; + getExternalId(): string; + setExternalId(value: string): Execution; + getLastKnownState(): Execution.State; setLastKnownState(value: Execution.State): Execution; @@ -298,6 +337,11 @@ export class Execution extends jspb.Message { getLastUpdateTimeSinceEpoch(): number; setLastUpdateTimeSinceEpoch(value: number): Execution; + getSystemMetadata(): google_protobuf_any_pb.Any | undefined; + setSystemMetadata(value?: google_protobuf_any_pb.Any): Execution; + hasSystemMetadata(): boolean; + clearSystemMetadata(): Execution; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): Execution.AsObject; static toObject(includeInstance: boolean, msg: Execution): Execution.AsObject; @@ -312,11 +356,13 @@ export namespace Execution { name: string, typeId: number, type: string, + externalId: string, lastKnownState: Execution.State, propertiesMap: Array<[string, Value.AsObject]>, customPropertiesMap: Array<[string, Value.AsObject]>, createTimeSinceEpoch: number, lastUpdateTimeSinceEpoch: number, + systemMetadata?: google_protobuf_any_pb.Any.AsObject, } export enum State { @@ -343,6 +389,9 @@ export class ExecutionType extends jspb.Message { getDescription(): string; setDescription(value: string): ExecutionType; + getExternalId(): string; + setExternalId(value: string): ExecutionType; + getPropertiesMap(): jspb.Map; clearPropertiesMap(): ExecutionType; @@ -373,6 +422,7 @@ export namespace ExecutionType { name: string, version: string, description: string, + externalId: string, propertiesMap: Array<[string, PropertyType]>, inputType?: ArtifactStructType.AsObject, outputType?: ArtifactStructType.AsObject, @@ -402,6 +452,9 @@ export class ContextType extends jspb.Message { getDescription(): string; setDescription(value: string): ContextType; + getExternalId(): string; + setExternalId(value: string): ContextType; + getPropertiesMap(): jspb.Map; clearPropertiesMap(): ContextType; @@ -422,6 +475,7 @@ export namespace ContextType { name: string, version: string, description: string, + externalId: string, propertiesMap: Array<[string, PropertyType]>, baseType: ContextType.SystemDefinedBaseType, } @@ -444,6 +498,9 @@ export class Context extends jspb.Message { getType(): string; setType(value: string): Context; + getExternalId(): string; + setExternalId(value: string): Context; + getPropertiesMap(): jspb.Map; clearPropertiesMap(): Context; @@ -456,6 +513,11 @@ export class Context extends jspb.Message { getLastUpdateTimeSinceEpoch(): number; setLastUpdateTimeSinceEpoch(value: number): Context; + getSystemMetadata(): google_protobuf_any_pb.Any | undefined; + setSystemMetadata(value?: google_protobuf_any_pb.Any): Context; + hasSystemMetadata(): boolean; + clearSystemMetadata(): Context; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): Context.AsObject; static toObject(includeInstance: boolean, msg: Context): Context.AsObject; @@ -470,10 +532,12 @@ export namespace Context { name: string, typeId: number, type: string, + externalId: string, propertiesMap: Array<[string, Value.AsObject]>, customPropertiesMap: Array<[string, Value.AsObject]>, createTimeSinceEpoch: number, lastUpdateTimeSinceEpoch: number, + systemMetadata?: google_protobuf_any_pb.Any.AsObject, } } @@ -953,6 +1017,93 @@ export namespace SqliteMetadataSourceConfig { } } +export class PostgreSQLDatabaseConfig extends jspb.Message { + getHost(): string; + setHost(value: string): PostgreSQLDatabaseConfig; + + getHostaddr(): string; + setHostaddr(value: string): PostgreSQLDatabaseConfig; + + getPort(): string; + setPort(value: string): PostgreSQLDatabaseConfig; + + getUser(): string; + setUser(value: string): PostgreSQLDatabaseConfig; + + getPassword(): string; + setPassword(value: string): PostgreSQLDatabaseConfig; + + getPassfile(): string; + setPassfile(value: string): PostgreSQLDatabaseConfig; + + getDbname(): string; + setDbname(value: string): PostgreSQLDatabaseConfig; + + getSkipDbCreation(): boolean; + setSkipDbCreation(value: boolean): PostgreSQLDatabaseConfig; + + getSsloption(): PostgreSQLDatabaseConfig.SSLOptions | undefined; + setSsloption(value?: PostgreSQLDatabaseConfig.SSLOptions): PostgreSQLDatabaseConfig; + hasSsloption(): boolean; + clearSsloption(): PostgreSQLDatabaseConfig; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): PostgreSQLDatabaseConfig.AsObject; + static toObject(includeInstance: boolean, msg: PostgreSQLDatabaseConfig): PostgreSQLDatabaseConfig.AsObject; + static serializeBinaryToWriter(message: PostgreSQLDatabaseConfig, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): PostgreSQLDatabaseConfig; + static deserializeBinaryFromReader(message: PostgreSQLDatabaseConfig, reader: jspb.BinaryReader): PostgreSQLDatabaseConfig; +} + +export namespace PostgreSQLDatabaseConfig { + export type AsObject = { + host: string, + hostaddr: string, + port: string, + user: string, + password: string, + passfile: string, + dbname: string, + skipDbCreation: boolean, + ssloption?: PostgreSQLDatabaseConfig.SSLOptions.AsObject, + } + + export class SSLOptions extends jspb.Message { + getSslmode(): string; + setSslmode(value: string): SSLOptions; + + getSslcert(): string; + setSslcert(value: string): SSLOptions; + + getSslkey(): string; + setSslkey(value: string): SSLOptions; + + getSslpassword(): string; + setSslpassword(value: string): SSLOptions; + + getSslrootcert(): string; + setSslrootcert(value: string): SSLOptions; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): SSLOptions.AsObject; + static toObject(includeInstance: boolean, msg: SSLOptions): SSLOptions.AsObject; + static serializeBinaryToWriter(message: SSLOptions, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): SSLOptions; + static deserializeBinaryFromReader(message: SSLOptions, reader: jspb.BinaryReader): SSLOptions; + } + + export namespace SSLOptions { + export type AsObject = { + sslmode: string, + sslcert: string, + sslkey: string, + sslpassword: string, + sslrootcert: string, + } + } + +} + export class MigrationOptions extends jspb.Message { getEnableUpgradeMigration(): boolean; setEnableUpgradeMigration(value: boolean): MigrationOptions; @@ -1009,6 +1160,11 @@ export class ConnectionConfig extends jspb.Message { hasSqlite(): boolean; clearSqlite(): ConnectionConfig; + getPostgresql(): PostgreSQLDatabaseConfig | undefined; + setPostgresql(value?: PostgreSQLDatabaseConfig): ConnectionConfig; + hasPostgresql(): boolean; + clearPostgresql(): ConnectionConfig; + getRetryOptions(): RetryOptions | undefined; setRetryOptions(value?: RetryOptions): ConnectionConfig; hasRetryOptions(): boolean; @@ -1029,6 +1185,7 @@ export namespace ConnectionConfig { fakeDatabase?: FakeDatabaseConfig.AsObject, mysql?: MySQLDatabaseConfig.AsObject, sqlite?: SqliteMetadataSourceConfig.AsObject, + postgresql?: PostgreSQLDatabaseConfig.AsObject, retryOptions?: RetryOptions.AsObject, } @@ -1037,6 +1194,7 @@ export namespace ConnectionConfig { FAKE_DATABASE = 1, MYSQL = 2, SQLITE = 3, + POSTGRESQL = 5, } } @@ -1287,6 +1445,9 @@ export namespace ListOperationNextPageToken { } export class TransactionOptions extends jspb.Message { + getTag(): string; + setTag(value: string): TransactionOptions; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): TransactionOptions.AsObject; static toObject(includeInstance: boolean, msg: TransactionOptions): TransactionOptions.AsObject; @@ -1297,6 +1458,7 @@ export class TransactionOptions extends jspb.Message { export namespace TransactionOptions { export type AsObject = { + tag: string, } } @@ -1311,6 +1473,9 @@ export class LineageGraphQueryOptions extends jspb.Message { hasStopConditions(): boolean; clearStopConditions(): LineageGraphQueryOptions; + getMaxNodeSize(): number; + setMaxNodeSize(value: number): LineageGraphQueryOptions; + getQueryNodesCase(): LineageGraphQueryOptions.QueryNodesCase; serializeBinary(): Uint8Array; @@ -1325,6 +1490,7 @@ export namespace LineageGraphQueryOptions { export type AsObject = { artifactsOptions?: ListOperationOptions.AsObject, stopConditions?: LineageGraphQueryOptions.BoundaryConstraint.AsObject, + maxNodeSize: number, } export class BoundaryConstraint extends jspb.Message { @@ -1360,10 +1526,80 @@ export namespace LineageGraphQueryOptions { } } +export class LineageSubgraphQueryOptions extends jspb.Message { + getStartingArtifacts(): LineageSubgraphQueryOptions.StartingNodes | undefined; + setStartingArtifacts(value?: LineageSubgraphQueryOptions.StartingNodes): LineageSubgraphQueryOptions; + hasStartingArtifacts(): boolean; + clearStartingArtifacts(): LineageSubgraphQueryOptions; + + getStartingExecutions(): LineageSubgraphQueryOptions.StartingNodes | undefined; + setStartingExecutions(value?: LineageSubgraphQueryOptions.StartingNodes): LineageSubgraphQueryOptions; + hasStartingExecutions(): boolean; + clearStartingExecutions(): LineageSubgraphQueryOptions; + + getMaxNumHops(): number; + setMaxNumHops(value: number): LineageSubgraphQueryOptions; + + getDirection(): LineageSubgraphQueryOptions.Direction; + setDirection(value: LineageSubgraphQueryOptions.Direction): LineageSubgraphQueryOptions; + + getStartingNodesCase(): LineageSubgraphQueryOptions.StartingNodesCase; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): LineageSubgraphQueryOptions.AsObject; + static toObject(includeInstance: boolean, msg: LineageSubgraphQueryOptions): LineageSubgraphQueryOptions.AsObject; + static serializeBinaryToWriter(message: LineageSubgraphQueryOptions, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): LineageSubgraphQueryOptions; + static deserializeBinaryFromReader(message: LineageSubgraphQueryOptions, reader: jspb.BinaryReader): LineageSubgraphQueryOptions; +} + +export namespace LineageSubgraphQueryOptions { + export type AsObject = { + startingArtifacts?: LineageSubgraphQueryOptions.StartingNodes.AsObject, + startingExecutions?: LineageSubgraphQueryOptions.StartingNodes.AsObject, + maxNumHops: number, + direction: LineageSubgraphQueryOptions.Direction, + } + + export class StartingNodes extends jspb.Message { + getFilterQuery(): string; + setFilterQuery(value: string): StartingNodes; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): StartingNodes.AsObject; + static toObject(includeInstance: boolean, msg: StartingNodes): StartingNodes.AsObject; + static serializeBinaryToWriter(message: StartingNodes, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): StartingNodes; + static deserializeBinaryFromReader(message: StartingNodes, reader: jspb.BinaryReader): StartingNodes; + } + + export namespace StartingNodes { + export type AsObject = { + filterQuery: string, + } + } + + + export enum Direction { + DIRECTION_UNSPECIFIED = 0, + UPSTREAM = 1, + DOWNSTREAM = 2, + BIDIRECTIONAL = 3, + } + + export enum StartingNodesCase { + STARTING_NODES_NOT_SET = 0, + STARTING_ARTIFACTS = 1, + STARTING_EXECUTIONS = 2, + } +} + export enum PropertyType { UNKNOWN = 0, INT = 1, DOUBLE = 2, STRING = 3, STRUCT = 4, + PROTO = 5, + BOOLEAN = 6, } diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.js b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.js index 996c817cd1..c3801981dc 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.js +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_pb.js @@ -15,6 +15,8 @@ var jspb = require('google-protobuf'); var goog = jspb; var global = Function('return this')(); +var google_protobuf_any_pb = require('google-protobuf/google/protobuf/any_pb.js'); +goog.object.extend(proto, google_protobuf_any_pb); var google_protobuf_struct_pb = require('google-protobuf/google/protobuf/struct_pb.js'); goog.object.extend(proto, google_protobuf_struct_pb); var google_protobuf_descriptor_pb = require('google-protobuf/google/protobuf/descriptor_pb.js'); @@ -50,6 +52,10 @@ goog.exportSymbol('proto.ml_metadata.LineageGraph', null, global); goog.exportSymbol('proto.ml_metadata.LineageGraphQueryOptions', null, global); goog.exportSymbol('proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint', null, global); goog.exportSymbol('proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase', null, global); +goog.exportSymbol('proto.ml_metadata.LineageSubgraphQueryOptions', null, global); +goog.exportSymbol('proto.ml_metadata.LineageSubgraphQueryOptions.Direction', null, global); +goog.exportSymbol('proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes', null, global); +goog.exportSymbol('proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodesCase', null, global); goog.exportSymbol('proto.ml_metadata.ListArtifactStructType', null, global); goog.exportSymbol('proto.ml_metadata.ListOperationNextPageToken', null, global); goog.exportSymbol('proto.ml_metadata.ListOperationOptions', null, global); @@ -64,6 +70,8 @@ goog.exportSymbol('proto.ml_metadata.MySQLDatabaseConfig', null, global); goog.exportSymbol('proto.ml_metadata.MySQLDatabaseConfig.SSLOptions', null, global); goog.exportSymbol('proto.ml_metadata.NoneArtifactStructType', null, global); goog.exportSymbol('proto.ml_metadata.ParentContext', null, global); +goog.exportSymbol('proto.ml_metadata.PostgreSQLDatabaseConfig', null, global); +goog.exportSymbol('proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions', null, global); goog.exportSymbol('proto.ml_metadata.PropertyType', null, global); goog.exportSymbol('proto.ml_metadata.RetryOptions', null, global); goog.exportSymbol('proto.ml_metadata.SqliteMetadataSourceConfig', null, global); @@ -642,6 +650,48 @@ if (goog.DEBUG && !COMPILED) { */ proto.ml_metadata.SqliteMetadataSourceConfig.displayName = 'proto.ml_metadata.SqliteMetadataSourceConfig'; } +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PostgreSQLDatabaseConfig = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.PostgreSQLDatabaseConfig, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PostgreSQLDatabaseConfig.displayName = 'proto.ml_metadata.PostgreSQLDatabaseConfig'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.displayName = 'proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions'; +} /** * Generated by JsPbCodeGenerator. * @param {Array=} opt_data Optional initial data array, typically from a @@ -884,7 +934,7 @@ if (goog.DEBUG && !COMPILED) { * @constructor */ proto.ml_metadata.TransactionOptions = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, 1, null, null); + jspb.Message.initialize(this, opt_data, 0, 2, null, null); }; goog.inherits(proto.ml_metadata.TransactionOptions, jspb.Message); if (goog.DEBUG && !COMPILED) { @@ -966,6 +1016,48 @@ if (goog.DEBUG && !COMPILED) { */ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.displayName = 'proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint'; } +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.LineageSubgraphQueryOptions = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, proto.ml_metadata.LineageSubgraphQueryOptions.oneofGroups_); +}; +goog.inherits(proto.ml_metadata.LineageSubgraphQueryOptions, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.LineageSubgraphQueryOptions.displayName = 'proto.ml_metadata.LineageSubgraphQueryOptions'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.displayName = 'proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes'; +} @@ -1123,7 +1215,7 @@ proto.ml_metadata.SystemTypeExtension.prototype.hasTypeName = function() { * @private {!Array>} * @const */ -proto.ml_metadata.Value.oneofGroups_ = [[1,2,3,4]]; +proto.ml_metadata.Value.oneofGroups_ = [[1,2,3,4,5,6]]; /** * @enum {number} @@ -1133,7 +1225,9 @@ proto.ml_metadata.Value.ValueCase = { INT_VALUE: 1, DOUBLE_VALUE: 2, STRING_VALUE: 3, - STRUCT_VALUE: 4 + STRUCT_VALUE: 4, + PROTO_VALUE: 5, + BOOL_VALUE: 6 }; /** @@ -1177,7 +1271,9 @@ proto.ml_metadata.Value.toObject = function(includeInstance, msg) { intValue: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, doubleValue: (f = jspb.Message.getOptionalFloatingPointField(msg, 2)) == null ? undefined : f, stringValue: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - structValue: (f = msg.getStructValue()) && google_protobuf_struct_pb.Struct.toObject(includeInstance, f) + structValue: (f = msg.getStructValue()) && google_protobuf_struct_pb.Struct.toObject(includeInstance, f), + protoValue: (f = msg.getProtoValue()) && google_protobuf_any_pb.Any.toObject(includeInstance, f), + boolValue: (f = jspb.Message.getBooleanField(msg, 6)) == null ? undefined : f }; if (includeInstance) { @@ -1231,6 +1327,15 @@ proto.ml_metadata.Value.deserializeBinaryFromReader = function(msg, reader) { reader.readMessage(value,google_protobuf_struct_pb.Struct.deserializeBinaryFromReader); msg.setStructValue(value); break; + case 5: + var value = new google_protobuf_any_pb.Any; + reader.readMessage(value,google_protobuf_any_pb.Any.deserializeBinaryFromReader); + msg.setProtoValue(value); + break; + case 6: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setBoolValue(value); + break; default: reader.skipField(); break; @@ -1289,6 +1394,21 @@ proto.ml_metadata.Value.serializeBinaryToWriter = function(message, writer) { google_protobuf_struct_pb.Struct.serializeBinaryToWriter ); } + f = message.getProtoValue(); + if (f != null) { + writer.writeMessage( + 5, + f, + google_protobuf_any_pb.Any.serializeBinaryToWriter + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 6)); + if (f != null) { + writer.writeBool( + 6, + f + ); + } }; @@ -1437,6 +1557,79 @@ proto.ml_metadata.Value.prototype.hasStructValue = function() { }; +/** + * optional google.protobuf.Any proto_value = 5; + * @return {?proto.google.protobuf.Any} + */ +proto.ml_metadata.Value.prototype.getProtoValue = function() { + return /** @type{?proto.google.protobuf.Any} */ ( + jspb.Message.getWrapperField(this, google_protobuf_any_pb.Any, 5)); +}; + + +/** + * @param {?proto.google.protobuf.Any|undefined} value + * @return {!proto.ml_metadata.Value} returns this +*/ +proto.ml_metadata.Value.prototype.setProtoValue = function(value) { + return jspb.Message.setOneofWrapperField(this, 5, proto.ml_metadata.Value.oneofGroups_[0], value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.Value} returns this + */ +proto.ml_metadata.Value.prototype.clearProtoValue = function() { + return this.setProtoValue(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Value.prototype.hasProtoValue = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional bool bool_value = 6; + * @return {boolean} + */ +proto.ml_metadata.Value.prototype.getBoolValue = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 6, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.Value} returns this + */ +proto.ml_metadata.Value.prototype.setBoolValue = function(value) { + return jspb.Message.setOneofField(this, 6, proto.ml_metadata.Value.oneofGroups_[0], value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.Value} returns this + */ +proto.ml_metadata.Value.prototype.clearBoolValue = function() { + return jspb.Message.setOneofField(this, 6, proto.ml_metadata.Value.oneofGroups_[0], undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Value.prototype.hasBoolValue = function() { + return jspb.Message.getField(this, 6) != null; +}; + + @@ -1474,11 +1667,13 @@ proto.ml_metadata.Artifact.toObject = function(includeInstance, msg) { typeId: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, type: (f = jspb.Message.getField(msg, 8)) == null ? undefined : f, uri: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 11)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], customPropertiesMap: (f = msg.getCustomPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], state: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f, createTimeSinceEpoch: (f = jspb.Message.getField(msg, 9)) == null ? undefined : f, - lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 10)) == null ? undefined : f + lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 10)) == null ? undefined : f, + systemMetadata: (f = msg.getSystemMetadata()) && google_protobuf_any_pb.Any.toObject(includeInstance, f) }; if (includeInstance) { @@ -1535,6 +1730,10 @@ proto.ml_metadata.Artifact.deserializeBinaryFromReader = function(msg, reader) { var value = /** @type {string} */ (reader.readString()); msg.setUri(value); break; + case 11: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 4: var value = msg.getPropertiesMap(); reader.readMessage(value, function(message, reader) { @@ -1559,6 +1758,11 @@ proto.ml_metadata.Artifact.deserializeBinaryFromReader = function(msg, reader) { var value = /** @type {number} */ (reader.readInt64()); msg.setLastUpdateTimeSinceEpoch(value); break; + case 12: + var value = new google_protobuf_any_pb.Any; + reader.readMessage(value,google_protobuf_any_pb.Any.deserializeBinaryFromReader); + msg.setSystemMetadata(value); + break; default: reader.skipField(); break; @@ -1623,6 +1827,13 @@ proto.ml_metadata.Artifact.serializeBinaryToWriter = function(message, writer) { f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 11)); + if (f != null) { + writer.writeString( + 11, + f + ); + } f = message.getPropertiesMap(true); if (f && f.getLength() > 0) { f.serializeBinary(4, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.Value.serializeBinaryToWriter); @@ -1652,6 +1863,14 @@ proto.ml_metadata.Artifact.serializeBinaryToWriter = function(message, writer) { f ); } + f = message.getSystemMetadata(); + if (f != null) { + writer.writeMessage( + 12, + f, + google_protobuf_any_pb.Any.serializeBinaryToWriter + ); + } }; @@ -1663,7 +1882,9 @@ proto.ml_metadata.Artifact.State = { PENDING: 1, LIVE: 2, MARKED_FOR_DELETION: 3, - DELETED: 4 + DELETED: 4, + ABANDONED: 5, + REFERENCE: 6 }; /** @@ -1846,6 +2067,42 @@ proto.ml_metadata.Artifact.prototype.hasUri = function() { }; +/** + * optional string external_id = 11; + * @return {string} + */ +proto.ml_metadata.Artifact.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 11, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.Artifact} returns this + */ +proto.ml_metadata.Artifact.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 11, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.Artifact} returns this + */ +proto.ml_metadata.Artifact.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 11, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Artifact.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 11) != null; +}; + + /** * map properties = 4; * @param {boolean=} opt_noLazyCreate Do not create the map if @@ -1998,6 +2255,43 @@ proto.ml_metadata.Artifact.prototype.hasLastUpdateTimeSinceEpoch = function() { }; +/** + * optional google.protobuf.Any system_metadata = 12; + * @return {?proto.google.protobuf.Any} + */ +proto.ml_metadata.Artifact.prototype.getSystemMetadata = function() { + return /** @type{?proto.google.protobuf.Any} */ ( + jspb.Message.getWrapperField(this, google_protobuf_any_pb.Any, 12)); +}; + + +/** + * @param {?proto.google.protobuf.Any|undefined} value + * @return {!proto.ml_metadata.Artifact} returns this +*/ +proto.ml_metadata.Artifact.prototype.setSystemMetadata = function(value) { + return jspb.Message.setWrapperField(this, 12, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.Artifact} returns this + */ +proto.ml_metadata.Artifact.prototype.clearSystemMetadata = function() { + return this.setSystemMetadata(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Artifact.prototype.hasSystemMetadata = function() { + return jspb.Message.getField(this, 12) != null; +}; + + @@ -2034,6 +2328,7 @@ proto.ml_metadata.ArtifactType.toObject = function(includeInstance, msg) { name: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, version: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f, description: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, undefined) : [], baseType: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f }; @@ -2088,6 +2383,10 @@ proto.ml_metadata.ArtifactType.deserializeBinaryFromReader = function(msg, reade var value = /** @type {string} */ (reader.readString()); msg.setDescription(value); break; + case 7: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 3: var value = msg.getPropertiesMap(); reader.readMessage(value, function(message, reader) { @@ -2155,6 +2454,13 @@ proto.ml_metadata.ArtifactType.serializeBinaryToWriter = function(message, write f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 7)); + if (f != null) { + writer.writeString( + 7, + f + ); + } f = message.getPropertiesMap(true); if (f && f.getLength() > 0) { f.serializeBinary(3, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeEnum); @@ -2324,6 +2630,42 @@ proto.ml_metadata.ArtifactType.prototype.hasDescription = function() { }; +/** + * optional string external_id = 7; + * @return {string} + */ +proto.ml_metadata.ArtifactType.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 7, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.ArtifactType} returns this + */ +proto.ml_metadata.ArtifactType.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 7, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ArtifactType} returns this + */ +proto.ml_metadata.ArtifactType.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 7, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactType.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 7) != null; +}; + + /** * map properties = 3; * @param {boolean=} opt_noLazyCreate Do not create the map if @@ -2418,7 +2760,8 @@ proto.ml_metadata.Event.toObject = function(includeInstance, msg) { executionId: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, path: (f = msg.getPath()) && proto.ml_metadata.Event.Path.toObject(includeInstance, f), type: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f, - millisecondsSinceEpoch: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f + millisecondsSinceEpoch: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f, + systemMetadata: (f = msg.getSystemMetadata()) && google_protobuf_any_pb.Any.toObject(includeInstance, f) }; if (includeInstance) { @@ -2476,6 +2819,11 @@ proto.ml_metadata.Event.deserializeBinaryFromReader = function(msg, reader) { var value = /** @type {number} */ (reader.readInt64()); msg.setMillisecondsSinceEpoch(value); break; + case 6: + var value = new google_protobuf_any_pb.Any; + reader.readMessage(value,google_protobuf_any_pb.Any.deserializeBinaryFromReader); + msg.setSystemMetadata(value); + break; default: reader.skipField(); break; @@ -2541,10 +2889,18 @@ proto.ml_metadata.Event.serializeBinaryToWriter = function(message, writer) { f ); } -}; - - -/** + f = message.getSystemMetadata(); + if (f != null) { + writer.writeMessage( + 6, + f, + google_protobuf_any_pb.Any.serializeBinaryToWriter + ); + } +}; + + +/** * @enum {number} */ proto.ml_metadata.Event.Type = { @@ -2554,7 +2910,8 @@ proto.ml_metadata.Event.Type = { INPUT: 3, OUTPUT: 4, INTERNAL_INPUT: 5, - INTERNAL_OUTPUT: 6 + INTERNAL_OUTPUT: 6, + PENDING_OUTPUT: 7 }; @@ -3120,6 +3477,43 @@ proto.ml_metadata.Event.prototype.hasMillisecondsSinceEpoch = function() { }; +/** + * optional google.protobuf.Any system_metadata = 6; + * @return {?proto.google.protobuf.Any} + */ +proto.ml_metadata.Event.prototype.getSystemMetadata = function() { + return /** @type{?proto.google.protobuf.Any} */ ( + jspb.Message.getWrapperField(this, google_protobuf_any_pb.Any, 6)); +}; + + +/** + * @param {?proto.google.protobuf.Any|undefined} value + * @return {!proto.ml_metadata.Event} returns this +*/ +proto.ml_metadata.Event.prototype.setSystemMetadata = function(value) { + return jspb.Message.setWrapperField(this, 6, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.Event} returns this + */ +proto.ml_metadata.Event.prototype.clearSystemMetadata = function() { + return this.setSystemMetadata(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Event.prototype.hasSystemMetadata = function() { + return jspb.Message.getField(this, 6) != null; +}; + + @@ -3156,11 +3550,13 @@ proto.ml_metadata.Execution.toObject = function(includeInstance, msg) { name: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f, typeId: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, type: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 10)) == null ? undefined : f, lastKnownState: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], customPropertiesMap: (f = msg.getCustomPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], createTimeSinceEpoch: (f = jspb.Message.getField(msg, 8)) == null ? undefined : f, - lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 9)) == null ? undefined : f + lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 9)) == null ? undefined : f, + systemMetadata: (f = msg.getSystemMetadata()) && google_protobuf_any_pb.Any.toObject(includeInstance, f) }; if (includeInstance) { @@ -3213,6 +3609,10 @@ proto.ml_metadata.Execution.deserializeBinaryFromReader = function(msg, reader) var value = /** @type {string} */ (reader.readString()); msg.setType(value); break; + case 10: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 3: var value = /** @type {!proto.ml_metadata.Execution.State} */ (reader.readEnum()); msg.setLastKnownState(value); @@ -3237,6 +3637,11 @@ proto.ml_metadata.Execution.deserializeBinaryFromReader = function(msg, reader) var value = /** @type {number} */ (reader.readInt64()); msg.setLastUpdateTimeSinceEpoch(value); break; + case 11: + var value = new google_protobuf_any_pb.Any; + reader.readMessage(value,google_protobuf_any_pb.Any.deserializeBinaryFromReader); + msg.setSystemMetadata(value); + break; default: reader.skipField(); break; @@ -3294,6 +3699,13 @@ proto.ml_metadata.Execution.serializeBinaryToWriter = function(message, writer) f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 10)); + if (f != null) { + writer.writeString( + 10, + f + ); + } f = /** @type {!proto.ml_metadata.Execution.State} */ (jspb.Message.getField(message, 3)); if (f != null) { writer.writeEnum( @@ -3323,6 +3735,14 @@ proto.ml_metadata.Execution.serializeBinaryToWriter = function(message, writer) f ); } + f = message.getSystemMetadata(); + if (f != null) { + writer.writeMessage( + 11, + f, + google_protobuf_any_pb.Any.serializeBinaryToWriter + ); + } }; @@ -3483,6 +3903,42 @@ proto.ml_metadata.Execution.prototype.hasType = function() { }; +/** + * optional string external_id = 10; + * @return {string} + */ +proto.ml_metadata.Execution.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 10, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.Execution} returns this + */ +proto.ml_metadata.Execution.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 10, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.Execution} returns this + */ +proto.ml_metadata.Execution.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 10, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Execution.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 10) != null; +}; + + /** * optional State last_known_state = 3; * @return {!proto.ml_metadata.Execution.State} @@ -3635,6 +4091,43 @@ proto.ml_metadata.Execution.prototype.hasLastUpdateTimeSinceEpoch = function() { }; +/** + * optional google.protobuf.Any system_metadata = 11; + * @return {?proto.google.protobuf.Any} + */ +proto.ml_metadata.Execution.prototype.getSystemMetadata = function() { + return /** @type{?proto.google.protobuf.Any} */ ( + jspb.Message.getWrapperField(this, google_protobuf_any_pb.Any, 11)); +}; + + +/** + * @param {?proto.google.protobuf.Any|undefined} value + * @return {!proto.ml_metadata.Execution} returns this +*/ +proto.ml_metadata.Execution.prototype.setSystemMetadata = function(value) { + return jspb.Message.setWrapperField(this, 11, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.Execution} returns this + */ +proto.ml_metadata.Execution.prototype.clearSystemMetadata = function() { + return this.setSystemMetadata(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Execution.prototype.hasSystemMetadata = function() { + return jspb.Message.getField(this, 11) != null; +}; + + @@ -3671,6 +4164,7 @@ proto.ml_metadata.ExecutionType.toObject = function(includeInstance, msg) { name: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, version: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f, description: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 9)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, undefined) : [], inputType: (f = msg.getInputType()) && proto.ml_metadata.ArtifactStructType.toObject(includeInstance, f), outputType: (f = msg.getOutputType()) && proto.ml_metadata.ArtifactStructType.toObject(includeInstance, f), @@ -3727,6 +4221,10 @@ proto.ml_metadata.ExecutionType.deserializeBinaryFromReader = function(msg, read var value = /** @type {string} */ (reader.readString()); msg.setDescription(value); break; + case 9: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 3: var value = msg.getPropertiesMap(); reader.readMessage(value, function(message, reader) { @@ -3804,6 +4302,13 @@ proto.ml_metadata.ExecutionType.serializeBinaryToWriter = function(message, writ f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 9)); + if (f != null) { + writer.writeString( + 9, + f + ); + } f = message.getPropertiesMap(true); if (f && f.getLength() > 0) { f.serializeBinary(3, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeEnum); @@ -3990,6 +4495,42 @@ proto.ml_metadata.ExecutionType.prototype.hasDescription = function() { }; +/** + * optional string external_id = 9; + * @return {string} + */ +proto.ml_metadata.ExecutionType.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 9, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.ExecutionType} returns this + */ +proto.ml_metadata.ExecutionType.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 9, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ExecutionType} returns this + */ +proto.ml_metadata.ExecutionType.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 9, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ExecutionType.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 9) != null; +}; + + /** * map properties = 3; * @param {boolean=} opt_noLazyCreate Do not create the map if @@ -4158,6 +4699,7 @@ proto.ml_metadata.ContextType.toObject = function(includeInstance, msg) { name: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, version: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f, description: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, undefined) : [], baseType: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f }; @@ -4212,6 +4754,10 @@ proto.ml_metadata.ContextType.deserializeBinaryFromReader = function(msg, reader var value = /** @type {string} */ (reader.readString()); msg.setDescription(value); break; + case 7: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 3: var value = msg.getPropertiesMap(); reader.readMessage(value, function(message, reader) { @@ -4279,6 +4825,13 @@ proto.ml_metadata.ContextType.serializeBinaryToWriter = function(message, writer f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 7)); + if (f != null) { + writer.writeString( + 7, + f + ); + } f = message.getPropertiesMap(true); if (f && f.getLength() > 0) { f.serializeBinary(3, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeEnum); @@ -4444,6 +4997,42 @@ proto.ml_metadata.ContextType.prototype.hasDescription = function() { }; +/** + * optional string external_id = 7; + * @return {string} + */ +proto.ml_metadata.ContextType.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 7, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.ContextType} returns this + */ +proto.ml_metadata.ContextType.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 7, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ContextType} returns this + */ +proto.ml_metadata.ContextType.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 7, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ContextType.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 7) != null; +}; + + /** * map properties = 3; * @param {boolean=} opt_noLazyCreate Do not create the map if @@ -4538,10 +5127,12 @@ proto.ml_metadata.Context.toObject = function(includeInstance, msg) { name: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, typeId: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, type: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f, + externalId: (f = jspb.Message.getField(msg, 9)) == null ? undefined : f, propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], customPropertiesMap: (f = msg.getCustomPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.Value.toObject) : [], createTimeSinceEpoch: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, - lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 8)) == null ? undefined : f + lastUpdateTimeSinceEpoch: (f = jspb.Message.getField(msg, 8)) == null ? undefined : f, + systemMetadata: (f = msg.getSystemMetadata()) && google_protobuf_any_pb.Any.toObject(includeInstance, f) }; if (includeInstance) { @@ -4594,6 +5185,10 @@ proto.ml_metadata.Context.deserializeBinaryFromReader = function(msg, reader) { var value = /** @type {string} */ (reader.readString()); msg.setType(value); break; + case 9: + var value = /** @type {string} */ (reader.readString()); + msg.setExternalId(value); + break; case 4: var value = msg.getPropertiesMap(); reader.readMessage(value, function(message, reader) { @@ -4614,6 +5209,11 @@ proto.ml_metadata.Context.deserializeBinaryFromReader = function(msg, reader) { var value = /** @type {number} */ (reader.readInt64()); msg.setLastUpdateTimeSinceEpoch(value); break; + case 10: + var value = new google_protobuf_any_pb.Any; + reader.readMessage(value,google_protobuf_any_pb.Any.deserializeBinaryFromReader); + msg.setSystemMetadata(value); + break; default: reader.skipField(); break; @@ -4671,6 +5271,13 @@ proto.ml_metadata.Context.serializeBinaryToWriter = function(message, writer) { f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 9)); + if (f != null) { + writer.writeString( + 9, + f + ); + } f = message.getPropertiesMap(true); if (f && f.getLength() > 0) { f.serializeBinary(4, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.Value.serializeBinaryToWriter); @@ -4693,6 +5300,14 @@ proto.ml_metadata.Context.serializeBinaryToWriter = function(message, writer) { f ); } + f = message.getSystemMetadata(); + if (f != null) { + writer.writeMessage( + 10, + f, + google_protobuf_any_pb.Any.serializeBinaryToWriter + ); + } }; @@ -4840,6 +5455,42 @@ proto.ml_metadata.Context.prototype.hasType = function() { }; +/** + * optional string external_id = 9; + * @return {string} + */ +proto.ml_metadata.Context.prototype.getExternalId = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 9, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.Context} returns this + */ +proto.ml_metadata.Context.prototype.setExternalId = function(value) { + return jspb.Message.setField(this, 9, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.Context} returns this + */ +proto.ml_metadata.Context.prototype.clearExternalId = function() { + return jspb.Message.setField(this, 9, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Context.prototype.hasExternalId = function() { + return jspb.Message.getField(this, 9) != null; +}; + + /** * map properties = 4; * @param {boolean=} opt_noLazyCreate Do not create the map if @@ -4956,24 +5607,61 @@ proto.ml_metadata.Context.prototype.hasLastUpdateTimeSinceEpoch = function() { }; +/** + * optional google.protobuf.Any system_metadata = 10; + * @return {?proto.google.protobuf.Any} + */ +proto.ml_metadata.Context.prototype.getSystemMetadata = function() { + return /** @type{?proto.google.protobuf.Any} */ ( + jspb.Message.getWrapperField(this, google_protobuf_any_pb.Any, 10)); +}; + +/** + * @param {?proto.google.protobuf.Any|undefined} value + * @return {!proto.ml_metadata.Context} returns this +*/ +proto.ml_metadata.Context.prototype.setSystemMetadata = function(value) { + return jspb.Message.setWrapperField(this, 10, value); +}; -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.Context} returns this */ -proto.ml_metadata.Attribution.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.Attribution.toObject(opt_includeInstance, this); +proto.ml_metadata.Context.prototype.clearSystemMetadata = function() { + return this.setSystemMetadata(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.Context.prototype.hasSystemMetadata = function() { + return jspb.Message.getField(this, 10) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.Attribution.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.Attribution.toObject(opt_includeInstance, this); }; @@ -7915,7 +8603,7 @@ proto.ml_metadata.MySQLDatabaseConfig.deserializeBinaryFromReader = function(msg msg.setHost(value); break; case 2: - var value = /** @type {number} */ (reader.readUint32()); + var value = /** @type {number} */ (reader.readInt64()); msg.setPort(value); break; case 3: @@ -7981,7 +8669,7 @@ proto.ml_metadata.MySQLDatabaseConfig.serializeBinaryToWriter = function(message } f = /** @type {number} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeUint32( + writer.writeInt64( 2, f ); @@ -8457,7 +9145,7 @@ proto.ml_metadata.MySQLDatabaseConfig.prototype.hasHost = function() { /** - * optional uint32 port = 2; + * optional int64 port = 2; * @return {number} */ proto.ml_metadata.MySQLDatabaseConfig.prototype.getPort = function() { @@ -8931,8 +9619,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.MigrationOptions.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.MigrationOptions.toObject(opt_includeInstance, this); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PostgreSQLDatabaseConfig.toObject(opt_includeInstance, this); }; @@ -8941,14 +9629,21 @@ proto.ml_metadata.MigrationOptions.prototype.toObject = function(opt_includeInst * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.MigrationOptions} msg The msg instance to transform. + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MigrationOptions.toObject = function(includeInstance, msg) { +proto.ml_metadata.PostgreSQLDatabaseConfig.toObject = function(includeInstance, msg) { var f, obj = { - enableUpgradeMigration: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, - downgradeToSchemaVersion: jspb.Message.getFieldWithDefault(msg, 2, -1) + host: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + hostaddr: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + port: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + user: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f, + password: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f, + passfile: (f = jspb.Message.getField(msg, 6)) == null ? undefined : f, + dbname: (f = jspb.Message.getField(msg, 7)) == null ? undefined : f, + skipDbCreation: (f = jspb.Message.getBooleanField(msg, 8)) == null ? undefined : f, + ssloption: (f = msg.getSsloption()) && proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -8962,36 +9657,65 @@ proto.ml_metadata.MigrationOptions.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.MigrationOptions} + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} */ -proto.ml_metadata.MigrationOptions.deserializeBinary = function(bytes) { +proto.ml_metadata.PostgreSQLDatabaseConfig.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.MigrationOptions; - return proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PostgreSQLDatabaseConfig; + return proto.ml_metadata.PostgreSQLDatabaseConfig.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.MigrationOptions} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.MigrationOptions} + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} */ -proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PostgreSQLDatabaseConfig.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { + case 1: + var value = /** @type {string} */ (reader.readString()); + msg.setHost(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setHostaddr(value); + break; case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setPort(value); + break; + case 4: + var value = /** @type {string} */ (reader.readString()); + msg.setUser(value); + break; + case 5: + var value = /** @type {string} */ (reader.readString()); + msg.setPassword(value); + break; + case 6: + var value = /** @type {string} */ (reader.readString()); + msg.setPassfile(value); + break; + case 7: + var value = /** @type {string} */ (reader.readString()); + msg.setDbname(value); + break; + case 8: var value = /** @type {boolean} */ (reader.readBool()); - msg.setEnableUpgradeMigration(value); + msg.setSkipDbCreation(value); break; - case 2: - var value = /** @type {number} */ (reader.readInt64()); - msg.setDowngradeToSchemaVersion(value); + case 9: + var value = new proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions; + reader.readMessage(value,proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.deserializeBinaryFromReader); + msg.setSsloption(value); break; default: reader.skipField(); @@ -9006,9 +9730,9 @@ proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader = function(msg, r * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.MigrationOptions.prototype.serializeBinary = function() { +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.MigrationOptions.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PostgreSQLDatabaseConfig.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9016,98 +9740,76 @@ proto.ml_metadata.MigrationOptions.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.MigrationOptions} message + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MigrationOptions.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PostgreSQLDatabaseConfig.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeBool( - 3, + writer.writeString( + 1, f ); } - f = /** @type {number} */ (jspb.Message.getField(message, 2)); + f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeInt64( + writer.writeString( 2, f ); } -}; - - -/** - * optional bool enable_upgrade_migration = 3; - * @return {boolean} - */ -proto.ml_metadata.MigrationOptions.prototype.getEnableUpgradeMigration = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); -}; - - -/** - * @param {boolean} value - * @return {!proto.ml_metadata.MigrationOptions} returns this - */ -proto.ml_metadata.MigrationOptions.prototype.setEnableUpgradeMigration = function(value) { - return jspb.Message.setField(this, 3, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MigrationOptions} returns this - */ -proto.ml_metadata.MigrationOptions.prototype.clearEnableUpgradeMigration = function() { - return jspb.Message.setField(this, 3, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.MigrationOptions.prototype.hasEnableUpgradeMigration = function() { - return jspb.Message.getField(this, 3) != null; -}; - - -/** - * optional int64 downgrade_to_schema_version = 2; - * @return {number} - */ -proto.ml_metadata.MigrationOptions.prototype.getDowngradeToSchemaVersion = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, -1)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.MigrationOptions} returns this - */ -proto.ml_metadata.MigrationOptions.prototype.setDowngradeToSchemaVersion = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MigrationOptions} returns this - */ -proto.ml_metadata.MigrationOptions.prototype.clearDowngradeToSchemaVersion = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.MigrationOptions.prototype.hasDowngradeToSchemaVersion = function() { - return jspb.Message.getField(this, 2) != null; + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeString( + 4, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeString( + 5, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 6)); + if (f != null) { + writer.writeString( + 6, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 7)); + if (f != null) { + writer.writeString( + 7, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 8)); + if (f != null) { + writer.writeBool( + 8, + f + ); + } + f = message.getSsloption(); + if (f != null) { + writer.writeMessage( + 9, + f, + proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.serializeBinaryToWriter + ); + } }; @@ -9127,8 +9829,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.RetryOptions.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.RetryOptions.toObject(opt_includeInstance, this); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.toObject(opt_includeInstance, this); }; @@ -9137,13 +9839,17 @@ proto.ml_metadata.RetryOptions.prototype.toObject = function(opt_includeInstance * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.RetryOptions} msg The msg instance to transform. + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.RetryOptions.toObject = function(includeInstance, msg) { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.toObject = function(includeInstance, msg) { var f, obj = { - maxNumRetries: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + sslmode: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + sslcert: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + sslkey: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + sslpassword: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f, + sslrootcert: (f = jspb.Message.getField(msg, 5)) == null ? undefined : f }; if (includeInstance) { @@ -9157,23 +9863,23 @@ proto.ml_metadata.RetryOptions.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.RetryOptions} + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} */ -proto.ml_metadata.RetryOptions.deserializeBinary = function(bytes) { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.RetryOptions; - return proto.ml_metadata.RetryOptions.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions; + return proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.RetryOptions} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.RetryOptions} + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} */ -proto.ml_metadata.RetryOptions.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -9181,8 +9887,24 @@ proto.ml_metadata.RetryOptions.deserializeBinaryFromReader = function(msg, reade var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setMaxNumRetries(value); + var value = /** @type {string} */ (reader.readString()); + msg.setSslmode(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setSslcert(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setSslkey(value); + break; + case 4: + var value = /** @type {string} */ (reader.readString()); + msg.setSslpassword(value); + break; + case 5: + var value = /** @type {string} */ (reader.readString()); + msg.setSslrootcert(value); break; default: reader.skipField(); @@ -9197,9 +9919,9 @@ proto.ml_metadata.RetryOptions.deserializeBinaryFromReader = function(msg, reade * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.RetryOptions.prototype.serializeBinary = function() { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.RetryOptions.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9207,45 +9929,73 @@ proto.ml_metadata.RetryOptions.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.RetryOptions} message + * @param {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.RetryOptions.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeInt64( + writer.writeString( 1, f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeString( + 4, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeString( + 5, + f + ); + } }; /** - * optional int64 max_num_retries = 1; - * @return {number} - */ -proto.ml_metadata.RetryOptions.prototype.getMaxNumRetries = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.RetryOptions} returns this + * optional string sslmode = 1; + * @return {string} */ -proto.ml_metadata.RetryOptions.prototype.setMaxNumRetries = function(value) { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.getSslmode = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.setSslmode = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.RetryOptions} returns this + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.RetryOptions.prototype.clearMaxNumRetries = function() { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.clearSslmode = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -9254,219 +10004,107 @@ proto.ml_metadata.RetryOptions.prototype.clearMaxNumRetries = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.RetryOptions.prototype.hasMaxNumRetries = function() { +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.hasSslmode = function() { return jspb.Message.getField(this, 1) != null; }; - -/** - * Oneof group definitions for this message. Each group defines the field - * numbers belonging to that group. When of these fields' value is set, all - * other fields in the group are cleared. During deserialization, if multiple - * fields are encountered for a group, only the last value seen will be kept. - * @private {!Array>} - * @const - */ -proto.ml_metadata.ConnectionConfig.oneofGroups_ = [[1,2,3]]; - /** - * @enum {number} + * optional string sslcert = 2; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.ConfigCase = { - CONFIG_NOT_SET: 0, - FAKE_DATABASE: 1, - MYSQL: 2, - SQLITE: 3 +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.getSslcert = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; + /** - * @return {proto.ml_metadata.ConnectionConfig.ConfigCase} + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.getConfigCase = function() { - return /** @type {proto.ml_metadata.ConnectionConfig.ConfigCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.ConnectionConfig.oneofGroups_[0])); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.setSslcert = function(value) { + return jspb.Message.setField(this, 2, value); }; - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ConnectionConfig.toObject(opt_includeInstance, this); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.clearSslcert = function() { + return jspb.Message.setField(this, 2, undefined); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ConnectionConfig} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.toObject = function(includeInstance, msg) { - var f, obj = { - fakeDatabase: (f = msg.getFakeDatabase()) && proto.ml_metadata.FakeDatabaseConfig.toObject(includeInstance, f), - mysql: (f = msg.getMysql()) && proto.ml_metadata.MySQLDatabaseConfig.toObject(includeInstance, f), - sqlite: (f = msg.getSqlite()) && proto.ml_metadata.SqliteMetadataSourceConfig.toObject(includeInstance, f), - retryOptions: (f = msg.getRetryOptions()) && proto.ml_metadata.RetryOptions.toObject(includeInstance, f) - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.hasSslcert = function() { + return jspb.Message.getField(this, 2) != null; }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ConnectionConfig} + * optional string sslkey = 3; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ConnectionConfig; - return proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.getSslkey = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.ConnectionConfig} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ConnectionConfig} + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new proto.ml_metadata.FakeDatabaseConfig; - reader.readMessage(value,proto.ml_metadata.FakeDatabaseConfig.deserializeBinaryFromReader); - msg.setFakeDatabase(value); - break; - case 2: - var value = new proto.ml_metadata.MySQLDatabaseConfig; - reader.readMessage(value,proto.ml_metadata.MySQLDatabaseConfig.deserializeBinaryFromReader); - msg.setMysql(value); - break; - case 3: - var value = new proto.ml_metadata.SqliteMetadataSourceConfig; - reader.readMessage(value,proto.ml_metadata.SqliteMetadataSourceConfig.deserializeBinaryFromReader); - msg.setSqlite(value); - break; - case 4: - var value = new proto.ml_metadata.RetryOptions; - reader.readMessage(value,proto.ml_metadata.RetryOptions.deserializeBinaryFromReader); - msg.setRetryOptions(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.setSslkey = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.clearSslkey = function() { + return jspb.Message.setField(this, 3, undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ConnectionConfig} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getFakeDatabase(); - if (f != null) { - writer.writeMessage( - 1, - f, - proto.ml_metadata.FakeDatabaseConfig.serializeBinaryToWriter - ); - } - f = message.getMysql(); - if (f != null) { - writer.writeMessage( - 2, - f, - proto.ml_metadata.MySQLDatabaseConfig.serializeBinaryToWriter - ); - } - f = message.getSqlite(); - if (f != null) { - writer.writeMessage( - 3, - f, - proto.ml_metadata.SqliteMetadataSourceConfig.serializeBinaryToWriter - ); - } - f = message.getRetryOptions(); - if (f != null) { - writer.writeMessage( - 4, - f, - proto.ml_metadata.RetryOptions.serializeBinaryToWriter - ); - } +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.hasSslkey = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional FakeDatabaseConfig fake_database = 1; - * @return {?proto.ml_metadata.FakeDatabaseConfig} + * optional string sslpassword = 4; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.prototype.getFakeDatabase = function() { - return /** @type{?proto.ml_metadata.FakeDatabaseConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.FakeDatabaseConfig, 1)); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.getSslpassword = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 4, "")); }; /** - * @param {?proto.ml_metadata.FakeDatabaseConfig|undefined} value - * @return {!proto.ml_metadata.ConnectionConfig} returns this -*/ -proto.ml_metadata.ConnectionConfig.prototype.setFakeDatabase = function(value) { - return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.setSslpassword = function(value) { + return jspb.Message.setField(this, 4, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ConnectionConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.clearFakeDatabase = function() { - return this.setFakeDatabase(undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.clearSslpassword = function() { + return jspb.Message.setField(this, 4, undefined); }; @@ -9474,36 +10112,35 @@ proto.ml_metadata.ConnectionConfig.prototype.clearFakeDatabase = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.prototype.hasFakeDatabase = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.hasSslpassword = function() { + return jspb.Message.getField(this, 4) != null; }; /** - * optional MySQLDatabaseConfig mysql = 2; - * @return {?proto.ml_metadata.MySQLDatabaseConfig} + * optional string sslrootcert = 5; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.prototype.getMysql = function() { - return /** @type{?proto.ml_metadata.MySQLDatabaseConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.MySQLDatabaseConfig, 2)); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.getSslrootcert = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 5, "")); }; /** - * @param {?proto.ml_metadata.MySQLDatabaseConfig|undefined} value - * @return {!proto.ml_metadata.ConnectionConfig} returns this -*/ -proto.ml_metadata.ConnectionConfig.prototype.setMysql = function(value) { - return jspb.Message.setOneofWrapperField(this, 2, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.setSslrootcert = function(value) { + return jspb.Message.setField(this, 5, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ConnectionConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.clearMysql = function() { - return this.setMysql(undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.clearSslrootcert = function() { + return jspb.Message.setField(this, 5, undefined); }; @@ -9511,36 +10148,35 @@ proto.ml_metadata.ConnectionConfig.prototype.clearMysql = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.prototype.hasMysql = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions.prototype.hasSslrootcert = function() { + return jspb.Message.getField(this, 5) != null; }; /** - * optional SqliteMetadataSourceConfig sqlite = 3; - * @return {?proto.ml_metadata.SqliteMetadataSourceConfig} + * optional string host = 1; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.prototype.getSqlite = function() { - return /** @type{?proto.ml_metadata.SqliteMetadataSourceConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.SqliteMetadataSourceConfig, 3)); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getHost = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {?proto.ml_metadata.SqliteMetadataSourceConfig|undefined} value - * @return {!proto.ml_metadata.ConnectionConfig} returns this -*/ -proto.ml_metadata.ConnectionConfig.prototype.setSqlite = function(value) { - return jspb.Message.setOneofWrapperField(this, 3, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setHost = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ConnectionConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.clearSqlite = function() { - return this.setSqlite(undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearHost = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -9548,36 +10184,35 @@ proto.ml_metadata.ConnectionConfig.prototype.clearSqlite = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.prototype.hasSqlite = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasHost = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional RetryOptions retry_options = 4; - * @return {?proto.ml_metadata.RetryOptions} + * optional string hostaddr = 2; + * @return {string} */ -proto.ml_metadata.ConnectionConfig.prototype.getRetryOptions = function() { - return /** @type{?proto.ml_metadata.RetryOptions} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.RetryOptions, 4)); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getHostaddr = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {?proto.ml_metadata.RetryOptions|undefined} value - * @return {!proto.ml_metadata.ConnectionConfig} returns this -*/ -proto.ml_metadata.ConnectionConfig.prototype.setRetryOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setHostaddr = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ConnectionConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.ConnectionConfig.prototype.clearRetryOptions = function() { - return this.setRetryOptions(undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearHostaddr = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -9585,159 +10220,107 @@ proto.ml_metadata.ConnectionConfig.prototype.clearRetryOptions = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ConnectionConfig.prototype.hasRetryOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasHostaddr = function() { + return jspb.Message.getField(this, 2) != null; }; +/** + * optional string port = 3; + * @return {string} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getPort = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +}; - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GrpcChannelArguments.toObject(opt_includeInstance, this); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setPort = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GrpcChannelArguments} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.toObject = function(includeInstance, msg) { - var f, obj = { - maxReceiveMessageLength: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - http2MaxPingStrikes: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f - }; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearPort = function() { + return jspb.Message.setField(this, 3, undefined); +}; - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasPort = function() { + return jspb.Message.getField(this, 3) != null; }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GrpcChannelArguments} + * optional string user = 4; + * @return {string} */ -proto.ml_metadata.GrpcChannelArguments.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GrpcChannelArguments; - return proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getUser = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 4, "")); }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.GrpcChannelArguments} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GrpcChannelArguments} + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setMaxReceiveMessageLength(value); - break; - case 2: - var value = /** @type {number} */ (reader.readInt64()); - msg.setHttp2MaxPingStrikes(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setUser = function(value) { + return jspb.Message.setField(this, 4, value); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearUser = function() { + return jspb.Message.setField(this, 4, undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GrpcChannelArguments} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } - f = /** @type {number} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeInt64( - 2, - f - ); - } +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasUser = function() { + return jspb.Message.getField(this, 4) != null; }; /** - * optional int64 max_receive_message_length = 1; - * @return {number} + * optional string password = 5; + * @return {string} */ -proto.ml_metadata.GrpcChannelArguments.prototype.getMaxReceiveMessageLength = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getPassword = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 5, "")); }; /** - * @param {number} value - * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.setMaxReceiveMessageLength = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setPassword = function(value) { + return jspb.Message.setField(this, 5, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.clearMaxReceiveMessageLength = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearPassword = function() { + return jspb.Message.setField(this, 5, undefined); }; @@ -9745,35 +10328,35 @@ proto.ml_metadata.GrpcChannelArguments.prototype.clearMaxReceiveMessageLength = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GrpcChannelArguments.prototype.hasMaxReceiveMessageLength = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasPassword = function() { + return jspb.Message.getField(this, 5) != null; }; /** - * optional int64 http2_max_ping_strikes = 2; - * @return {number} + * optional string passfile = 6; + * @return {string} */ -proto.ml_metadata.GrpcChannelArguments.prototype.getHttp2MaxPingStrikes = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getPassfile = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 6, "")); }; /** - * @param {number} value - * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.setHttp2MaxPingStrikes = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setPassfile = function(value) { + return jspb.Message.setField(this, 6, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.GrpcChannelArguments.prototype.clearHttp2MaxPingStrikes = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearPassfile = function() { + return jspb.Message.setField(this, 6, undefined); }; @@ -9781,172 +10364,117 @@ proto.ml_metadata.GrpcChannelArguments.prototype.clearHttp2MaxPingStrikes = func * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GrpcChannelArguments.prototype.hasHttp2MaxPingStrikes = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasPassfile = function() { + return jspb.Message.getField(this, 6) != null; }; +/** + * optional string dbname = 7; + * @return {string} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getDbname = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 7, "")); +}; - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * @param {string} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.MetadataStoreClientConfig.toObject(opt_includeInstance, this); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setDbname = function(value) { + return jspb.Message.setField(this, 7, value); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.MetadataStoreClientConfig} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.toObject = function(includeInstance, msg) { - var f, obj = { - host: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - port: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - sslConfig: (f = msg.getSslConfig()) && proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject(includeInstance, f), - channelArguments: (f = msg.getChannelArguments()) && proto.ml_metadata.GrpcChannelArguments.toObject(includeInstance, f), - clientTimeoutSec: (f = jspb.Message.getOptionalFloatingPointField(msg, 5)) == null ? undefined : f - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearDbname = function() { + return jspb.Message.setField(this, 7, undefined); }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.MetadataStoreClientConfig; - return proto.ml_metadata.MetadataStoreClientConfig.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasDbname = function() { + return jspb.Message.getField(this, 7) != null; }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.MetadataStoreClientConfig} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} + * optional bool skip_db_creation = 8; + * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setHost(value); - break; - case 2: - var value = /** @type {number} */ (reader.readUint32()); - msg.setPort(value); - break; - case 3: - var value = new proto.ml_metadata.MetadataStoreClientConfig.SSLConfig; - reader.readMessage(value,proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader); - msg.setSslConfig(value); - break; - case 4: - var value = new proto.ml_metadata.GrpcChannelArguments; - reader.readMessage(value,proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader); - msg.setChannelArguments(value); - break; - case 5: - var value = /** @type {number} */ (reader.readDouble()); - msg.setClientTimeoutSec(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getSkipDbCreation = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 8, false)); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * @param {boolean} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.MetadataStoreClientConfig.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setSkipDbCreation = function(value) { + return jspb.Message.setField(this, 8, value); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.MetadataStoreClientConfig} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( - 1, - f - ); - } - f = /** @type {number} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeUint32( - 2, - f - ); - } - f = message.getSslConfig(); - if (f != null) { - writer.writeMessage( - 3, - f, - proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter - ); - } - f = message.getChannelArguments(); - if (f != null) { - writer.writeMessage( - 4, - f, - proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter - ); - } - f = /** @type {number} */ (jspb.Message.getField(message, 5)); - if (f != null) { - writer.writeDouble( - 5, - f - ); - } +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearSkipDbCreation = function() { + return jspb.Message.setField(this, 8, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasSkipDbCreation = function() { + return jspb.Message.getField(this, 8) != null; +}; + + +/** + * optional SSLOptions ssloption = 9; + * @return {?proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.getSsloption = function() { + return /** @type{?proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions, 9)); +}; + + +/** + * @param {?proto.ml_metadata.PostgreSQLDatabaseConfig.SSLOptions|undefined} value + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this +*/ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.setSsloption = function(value) { + return jspb.Message.setWrapperField(this, 9, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PostgreSQLDatabaseConfig} returns this + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.clearSsloption = function() { + return this.setSsloption(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PostgreSQLDatabaseConfig.prototype.hasSsloption = function() { + return jspb.Message.getField(this, 9) != null; }; @@ -9966,8 +10494,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject(opt_includeInstance, this); +proto.ml_metadata.MigrationOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.MigrationOptions.toObject(opt_includeInstance, this); }; @@ -9976,15 +10504,14 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.toObject = funct * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} msg The msg instance to transform. + * @param {!proto.ml_metadata.MigrationOptions} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject = function(includeInstance, msg) { +proto.ml_metadata.MigrationOptions.toObject = function(includeInstance, msg) { var f, obj = { - clientKey: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - serverCert: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - customCa: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f + enableUpgradeMigration: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, + downgradeToSchemaVersion: jspb.Message.getFieldWithDefault(msg, 2, -1) }; if (includeInstance) { @@ -9998,40 +10525,36 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject = function(includ /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} + * @return {!proto.ml_metadata.MigrationOptions} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinary = function(bytes) { +proto.ml_metadata.MigrationOptions.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.MetadataStoreClientConfig.SSLConfig; - return proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.MigrationOptions; + return proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} msg The message object to deserialize into. + * @param {!proto.ml_metadata.MigrationOptions} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} + * @return {!proto.ml_metadata.MigrationOptions} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setClientKey(value); + case 3: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setEnableUpgradeMigration(value); break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setServerCert(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setCustomCa(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setDowngradeToSchemaVersion(value); break; default: reader.skipField(); @@ -10046,9 +10569,9 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReade * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.serializeBinary = function() { +proto.ml_metadata.MigrationOptions.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter(this, writer); + proto.ml_metadata.MigrationOptions.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -10056,60 +10579,53 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.serializeBinary /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} message + * @param {!proto.ml_metadata.MigrationOptions} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.MigrationOptions.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); + f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); if (f != null) { - writer.writeString( - 1, + writer.writeBool( + 3, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); + f = /** @type {number} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeString( + writer.writeInt64( 2, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } }; /** - * optional string client_key = 1; - * @return {string} + * optional bool enable_upgrade_migration = 3; + * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getClientKey = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.MigrationOptions.prototype.getEnableUpgradeMigration = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this + * @param {boolean} value + * @return {!proto.ml_metadata.MigrationOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setClientKey = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.MigrationOptions.prototype.setEnableUpgradeMigration = function(value) { + return jspb.Message.setField(this, 3, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this + * @return {!proto.ml_metadata.MigrationOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearClientKey = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.MigrationOptions.prototype.clearEnableUpgradeMigration = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -10117,34 +10633,34 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearClientKey = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasClientKey = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.MigrationOptions.prototype.hasEnableUpgradeMigration = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional string server_cert = 2; - * @return {string} + * optional int64 downgrade_to_schema_version = 2; + * @return {number} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getServerCert = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.MigrationOptions.prototype.getDowngradeToSchemaVersion = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, -1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this + * @param {number} value + * @return {!proto.ml_metadata.MigrationOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setServerCert = function(value) { +proto.ml_metadata.MigrationOptions.prototype.setDowngradeToSchemaVersion = function(value) { return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this + * @return {!proto.ml_metadata.MigrationOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearServerCert = function() { +proto.ml_metadata.MigrationOptions.prototype.clearDowngradeToSchemaVersion = function() { return jspb.Message.setField(this, 2, undefined); }; @@ -10153,181 +10669,147 @@ proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearServerCert * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasServerCert = function() { +proto.ml_metadata.MigrationOptions.prototype.hasDowngradeToSchemaVersion = function() { return jspb.Message.getField(this, 2) != null; }; -/** - * optional string custom_ca = 3; - * @return {string} - */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getCustomCa = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); -}; - -/** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setCustomCa = function(value) { - return jspb.Message.setField(this, 3, value); -}; +if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearCustomCa = function() { - return jspb.Message.setField(this, 3, undefined); + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.RetryOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.RetryOptions.toObject(opt_includeInstance, this); }; /** - * Returns whether this field is set. - * @return {boolean} + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.RetryOptions} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasCustomCa = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.RetryOptions.toObject = function(includeInstance, msg) { + var f, obj = { + maxNumRetries: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * optional string host = 1; - * @return {string} + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.RetryOptions} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.getHost = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.RetryOptions.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.RetryOptions; + return proto.ml_metadata.RetryOptions.deserializeBinaryFromReader(msg, reader); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.RetryOptions} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.RetryOptions} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.setHost = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.RetryOptions.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setMaxNumRetries(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.clearHost = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.RetryOptions.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.RetryOptions.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.RetryOptions} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.hasHost = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.RetryOptions.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } }; /** - * optional uint32 port = 2; + * optional int64 max_num_retries = 1; * @return {number} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.getPort = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); +proto.ml_metadata.RetryOptions.prototype.getMaxNumRetries = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** * @param {number} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * @return {!proto.ml_metadata.RetryOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.setPort = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.RetryOptions.prototype.setMaxNumRetries = function(value) { + return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.clearPort = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.hasPort = function() { - return jspb.Message.getField(this, 2) != null; -}; - - -/** - * optional SSLConfig ssl_config = 3; - * @return {?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.getSslConfig = function() { - return /** @type{?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.MetadataStoreClientConfig.SSLConfig, 3)); -}; - - -/** - * @param {?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig|undefined} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this -*/ -proto.ml_metadata.MetadataStoreClientConfig.prototype.setSslConfig = function(value) { - return jspb.Message.setWrapperField(this, 3, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.clearSslConfig = function() { - return this.setSslConfig(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.hasSslConfig = function() { - return jspb.Message.getField(this, 3) != null; -}; - - -/** - * optional GrpcChannelArguments channel_arguments = 4; - * @return {?proto.ml_metadata.GrpcChannelArguments} - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.getChannelArguments = function() { - return /** @type{?proto.ml_metadata.GrpcChannelArguments} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.GrpcChannelArguments, 4)); -}; - - -/** - * @param {?proto.ml_metadata.GrpcChannelArguments|undefined} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this -*/ -proto.ml_metadata.MetadataStoreClientConfig.prototype.setChannelArguments = function(value) { - return jspb.Message.setWrapperField(this, 4, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * @return {!proto.ml_metadata.RetryOptions} returns this */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.clearChannelArguments = function() { - return this.setChannelArguments(undefined); +proto.ml_metadata.RetryOptions.prototype.clearMaxNumRetries = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -10335,50 +10817,42 @@ proto.ml_metadata.MetadataStoreClientConfig.prototype.clearChannelArguments = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.hasChannelArguments = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.RetryOptions.prototype.hasMaxNumRetries = function() { + return jspb.Message.getField(this, 1) != null; }; -/** - * optional double client_timeout_sec = 5; - * @return {number} - */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.getClientTimeoutSec = function() { - return /** @type {number} */ (jspb.Message.getFloatingPointFieldWithDefault(this, 5, 0.0)); -}; - /** - * @param {number} value - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * Oneof group definitions for this message. Each group defines the field + * numbers belonging to that group. When of these fields' value is set, all + * other fields in the group are cleared. During deserialization, if multiple + * fields are encountered for a group, only the last value seen will be kept. + * @private {!Array>} + * @const */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.setClientTimeoutSec = function(value) { - return jspb.Message.setField(this, 5, value); -}; - +proto.ml_metadata.ConnectionConfig.oneofGroups_ = [[1,2,3,5]]; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + * @enum {number} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.clearClientTimeoutSec = function() { - return jspb.Message.setField(this, 5, undefined); +proto.ml_metadata.ConnectionConfig.ConfigCase = { + CONFIG_NOT_SET: 0, + FAKE_DATABASE: 1, + MYSQL: 2, + SQLITE: 3, + POSTGRESQL: 5 }; - /** - * Returns whether this field is set. - * @return {boolean} + * @return {proto.ml_metadata.ConnectionConfig.ConfigCase} */ -proto.ml_metadata.MetadataStoreClientConfig.prototype.hasClientTimeoutSec = function() { - return jspb.Message.getField(this, 5) != null; +proto.ml_metadata.ConnectionConfig.prototype.getConfigCase = function() { + return /** @type {proto.ml_metadata.ConnectionConfig.ConfigCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.ConnectionConfig.oneofGroups_[0])); }; - - if (jspb.Message.GENERATE_TO_OBJECT) { /** * Creates an object representation of this proto. @@ -10392,8 +10866,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.MetadataStoreServerConfig.toObject(opt_includeInstance, this); +proto.ml_metadata.ConnectionConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ConnectionConfig.toObject(opt_includeInstance, this); }; @@ -10402,15 +10876,17 @@ proto.ml_metadata.MetadataStoreServerConfig.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.MetadataStoreServerConfig} msg The msg instance to transform. + * @param {!proto.ml_metadata.ConnectionConfig} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreServerConfig.toObject = function(includeInstance, msg) { +proto.ml_metadata.ConnectionConfig.toObject = function(includeInstance, msg) { var f, obj = { - connectionConfig: (f = msg.getConnectionConfig()) && proto.ml_metadata.ConnectionConfig.toObject(includeInstance, f), - migrationOptions: (f = msg.getMigrationOptions()) && proto.ml_metadata.MigrationOptions.toObject(includeInstance, f), - sslConfig: (f = msg.getSslConfig()) && proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject(includeInstance, f) + fakeDatabase: (f = msg.getFakeDatabase()) && proto.ml_metadata.FakeDatabaseConfig.toObject(includeInstance, f), + mysql: (f = msg.getMysql()) && proto.ml_metadata.MySQLDatabaseConfig.toObject(includeInstance, f), + sqlite: (f = msg.getSqlite()) && proto.ml_metadata.SqliteMetadataSourceConfig.toObject(includeInstance, f), + postgresql: (f = msg.getPostgresql()) && proto.ml_metadata.PostgreSQLDatabaseConfig.toObject(includeInstance, f), + retryOptions: (f = msg.getRetryOptions()) && proto.ml_metadata.RetryOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -10424,23 +10900,23 @@ proto.ml_metadata.MetadataStoreServerConfig.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.MetadataStoreServerConfig} + * @return {!proto.ml_metadata.ConnectionConfig} */ -proto.ml_metadata.MetadataStoreServerConfig.deserializeBinary = function(bytes) { +proto.ml_metadata.ConnectionConfig.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.MetadataStoreServerConfig; - return proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.ConnectionConfig; + return proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.MetadataStoreServerConfig} msg The message object to deserialize into. + * @param {!proto.ml_metadata.ConnectionConfig} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.MetadataStoreServerConfig} + * @return {!proto.ml_metadata.ConnectionConfig} */ -proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -10448,19 +10924,29 @@ proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader = functi var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new proto.ml_metadata.ConnectionConfig; - reader.readMessage(value,proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader); - msg.setConnectionConfig(value); + var value = new proto.ml_metadata.FakeDatabaseConfig; + reader.readMessage(value,proto.ml_metadata.FakeDatabaseConfig.deserializeBinaryFromReader); + msg.setFakeDatabase(value); + break; + case 2: + var value = new proto.ml_metadata.MySQLDatabaseConfig; + reader.readMessage(value,proto.ml_metadata.MySQLDatabaseConfig.deserializeBinaryFromReader); + msg.setMysql(value); break; case 3: - var value = new proto.ml_metadata.MigrationOptions; - reader.readMessage(value,proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader); - msg.setMigrationOptions(value); + var value = new proto.ml_metadata.SqliteMetadataSourceConfig; + reader.readMessage(value,proto.ml_metadata.SqliteMetadataSourceConfig.deserializeBinaryFromReader); + msg.setSqlite(value); break; - case 2: - var value = new proto.ml_metadata.MetadataStoreServerConfig.SSLConfig; - reader.readMessage(value,proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader); - msg.setSslConfig(value); + case 5: + var value = new proto.ml_metadata.PostgreSQLDatabaseConfig; + reader.readMessage(value,proto.ml_metadata.PostgreSQLDatabaseConfig.deserializeBinaryFromReader); + msg.setPostgresql(value); + break; + case 4: + var value = new proto.ml_metadata.RetryOptions; + reader.readMessage(value,proto.ml_metadata.RetryOptions.deserializeBinaryFromReader); + msg.setRetryOptions(value); break; default: reader.skipField(); @@ -10475,9 +10961,9 @@ proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.serializeBinary = function() { +proto.ml_metadata.ConnectionConfig.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.MetadataStoreServerConfig.serializeBinaryToWriter(this, writer); + proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -10485,211 +10971,154 @@ proto.ml_metadata.MetadataStoreServerConfig.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.MetadataStoreServerConfig} message + * @param {!proto.ml_metadata.ConnectionConfig} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreServerConfig.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getConnectionConfig(); + f = message.getFakeDatabase(); if (f != null) { writer.writeMessage( 1, f, - proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter + proto.ml_metadata.FakeDatabaseConfig.serializeBinaryToWriter ); } - f = message.getMigrationOptions(); + f = message.getMysql(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.MySQLDatabaseConfig.serializeBinaryToWriter + ); + } + f = message.getSqlite(); if (f != null) { writer.writeMessage( 3, f, - proto.ml_metadata.MigrationOptions.serializeBinaryToWriter + proto.ml_metadata.SqliteMetadataSourceConfig.serializeBinaryToWriter ); } - f = message.getSslConfig(); + f = message.getPostgresql(); if (f != null) { writer.writeMessage( - 2, + 5, f, - proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter + proto.ml_metadata.PostgreSQLDatabaseConfig.serializeBinaryToWriter + ); + } + f = message.getRetryOptions(); + if (f != null) { + writer.writeMessage( + 4, + f, + proto.ml_metadata.RetryOptions.serializeBinaryToWriter ); } }; +/** + * optional FakeDatabaseConfig fake_database = 1; + * @return {?proto.ml_metadata.FakeDatabaseConfig} + */ +proto.ml_metadata.ConnectionConfig.prototype.getFakeDatabase = function() { + return /** @type{?proto.ml_metadata.FakeDatabaseConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.FakeDatabaseConfig, 1)); +}; + +/** + * @param {?proto.ml_metadata.FakeDatabaseConfig|undefined} value + * @return {!proto.ml_metadata.ConnectionConfig} returns this +*/ +proto.ml_metadata.ConnectionConfig.prototype.setFakeDatabase = function(value) { + return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); +}; -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ConnectionConfig} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject(opt_includeInstance, this); +proto.ml_metadata.ConnectionConfig.prototype.clearFakeDatabase = function() { + return this.setFakeDatabase(undefined); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject = function(includeInstance, msg) { - var f, obj = { - serverKey: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - serverCert: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - customCa: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - clientVerify: (f = jspb.Message.getBooleanField(msg, 4)) == null ? undefined : f - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.ConnectionConfig.prototype.hasFakeDatabase = function() { + return jspb.Message.getField(this, 1) != null; }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} + * optional MySQLDatabaseConfig mysql = 2; + * @return {?proto.ml_metadata.MySQLDatabaseConfig} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.MetadataStoreServerConfig.SSLConfig; - return proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.ConnectionConfig.prototype.getMysql = function() { + return /** @type{?proto.ml_metadata.MySQLDatabaseConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.MySQLDatabaseConfig, 2)); }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setServerKey(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setServerCert(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setCustomCa(value); - break; - case 4: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setClientVerify(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; + * @param {?proto.ml_metadata.MySQLDatabaseConfig|undefined} value + * @return {!proto.ml_metadata.ConnectionConfig} returns this +*/ +proto.ml_metadata.ConnectionConfig.prototype.setMysql = function(value) { + return jspb.Message.setOneofWrapperField(this, 2, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ConnectionConfig} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.ConnectionConfig.prototype.clearMysql = function() { + return this.setMysql(undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( - 1, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); - if (f != null) { - writer.writeBool( - 4, - f - ); - } +proto.ml_metadata.ConnectionConfig.prototype.hasMysql = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * optional string server_key = 1; - * @return {string} + * optional SqliteMetadataSourceConfig sqlite = 3; + * @return {?proto.ml_metadata.SqliteMetadataSourceConfig} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getServerKey = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.ConnectionConfig.prototype.getSqlite = function() { + return /** @type{?proto.ml_metadata.SqliteMetadataSourceConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.SqliteMetadataSourceConfig, 3)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setServerKey = function(value) { - return jspb.Message.setField(this, 1, value); + * @param {?proto.ml_metadata.SqliteMetadataSourceConfig|undefined} value + * @return {!proto.ml_metadata.ConnectionConfig} returns this +*/ +proto.ml_metadata.ConnectionConfig.prototype.setSqlite = function(value) { + return jspb.Message.setOneofWrapperField(this, 3, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ConnectionConfig} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerKey = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.ConnectionConfig.prototype.clearSqlite = function() { + return this.setSqlite(undefined); }; @@ -10697,35 +11126,36 @@ proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerKey = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasServerKey = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.ConnectionConfig.prototype.hasSqlite = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional string server_cert = 2; - * @return {string} + * optional PostgreSQLDatabaseConfig postgresql = 5; + * @return {?proto.ml_metadata.PostgreSQLDatabaseConfig} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getServerCert = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.ConnectionConfig.prototype.getPostgresql = function() { + return /** @type{?proto.ml_metadata.PostgreSQLDatabaseConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.PostgreSQLDatabaseConfig, 5)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setServerCert = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.PostgreSQLDatabaseConfig|undefined} value + * @return {!proto.ml_metadata.ConnectionConfig} returns this +*/ +proto.ml_metadata.ConnectionConfig.prototype.setPostgresql = function(value) { + return jspb.Message.setOneofWrapperField(this, 5, proto.ml_metadata.ConnectionConfig.oneofGroups_[0], value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ConnectionConfig} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerCert = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.ConnectionConfig.prototype.clearPostgresql = function() { + return this.setPostgresql(undefined); }; @@ -10733,35 +11163,36 @@ proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerCert * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasServerCert = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.ConnectionConfig.prototype.hasPostgresql = function() { + return jspb.Message.getField(this, 5) != null; }; /** - * optional string custom_ca = 3; - * @return {string} + * optional RetryOptions retry_options = 4; + * @return {?proto.ml_metadata.RetryOptions} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getCustomCa = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +proto.ml_metadata.ConnectionConfig.prototype.getRetryOptions = function() { + return /** @type{?proto.ml_metadata.RetryOptions} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.RetryOptions, 4)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setCustomCa = function(value) { - return jspb.Message.setField(this, 3, value); + * @param {?proto.ml_metadata.RetryOptions|undefined} value + * @return {!proto.ml_metadata.ConnectionConfig} returns this +*/ +proto.ml_metadata.ConnectionConfig.prototype.setRetryOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ConnectionConfig} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearCustomCa = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.ConnectionConfig.prototype.clearRetryOptions = function() { + return this.setRetryOptions(undefined); }; @@ -10769,109 +11200,159 @@ proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearCustomCa = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasCustomCa = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.ConnectionConfig.prototype.hasRetryOptions = function() { + return jspb.Message.getField(this, 4) != null; }; -/** - * optional bool client_verify = 4; - * @return {boolean} - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getClientVerify = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, false)); -}; - -/** - * @param {boolean} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this - */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setClientVerify = function(value) { - return jspb.Message.setField(this, 4, value); -}; +if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearClientVerify = function() { - return jspb.Message.setField(this, 4, undefined); +proto.ml_metadata.GrpcChannelArguments.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GrpcChannelArguments.toObject(opt_includeInstance, this); }; /** - * Returns whether this field is set. - * @return {boolean} + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.GrpcChannelArguments} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasClientVerify = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GrpcChannelArguments.toObject = function(includeInstance, msg) { + var f, obj = { + maxReceiveMessageLength: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + http2MaxPingStrikes: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * optional ConnectionConfig connection_config = 1; - * @return {?proto.ml_metadata.ConnectionConfig} + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.GrpcChannelArguments} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.getConnectionConfig = function() { - return /** @type{?proto.ml_metadata.ConnectionConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ConnectionConfig, 1)); +proto.ml_metadata.GrpcChannelArguments.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.GrpcChannelArguments; + return proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader(msg, reader); }; /** - * @param {?proto.ml_metadata.ConnectionConfig|undefined} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this -*/ -proto.ml_metadata.MetadataStoreServerConfig.prototype.setConnectionConfig = function(value) { - return jspb.Message.setWrapperField(this, 1, value); + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.GrpcChannelArguments} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.GrpcChannelArguments} + */ +proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setMaxReceiveMessageLength(value); + break; + case 2: + var value = /** @type {number} */ (reader.readInt64()); + msg.setHttp2MaxPingStrikes(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.clearConnectionConfig = function() { - return this.setConnectionConfig(undefined); +proto.ml_metadata.GrpcChannelArguments.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GrpcChannelArguments} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.hasConnectionConfig = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeInt64( + 2, + f + ); + } }; /** - * optional MigrationOptions migration_options = 3; - * @return {?proto.ml_metadata.MigrationOptions} + * optional int64 max_receive_message_length = 1; + * @return {number} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.getMigrationOptions = function() { - return /** @type{?proto.ml_metadata.MigrationOptions} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.MigrationOptions, 3)); +proto.ml_metadata.GrpcChannelArguments.prototype.getMaxReceiveMessageLength = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * @param {?proto.ml_metadata.MigrationOptions|undefined} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this -*/ -proto.ml_metadata.MetadataStoreServerConfig.prototype.setMigrationOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); + * @param {number} value + * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + */ +proto.ml_metadata.GrpcChannelArguments.prototype.setMaxReceiveMessageLength = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GrpcChannelArguments} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.clearMigrationOptions = function() { - return this.setMigrationOptions(undefined); +proto.ml_metadata.GrpcChannelArguments.prototype.clearMaxReceiveMessageLength = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -10879,36 +11360,35 @@ proto.ml_metadata.MetadataStoreServerConfig.prototype.clearMigrationOptions = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.hasMigrationOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GrpcChannelArguments.prototype.hasMaxReceiveMessageLength = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional SSLConfig ssl_config = 2; - * @return {?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} + * optional int64 http2_max_ping_strikes = 2; + * @return {number} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.getSslConfig = function() { - return /** @type{?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.MetadataStoreServerConfig.SSLConfig, 2)); +proto.ml_metadata.GrpcChannelArguments.prototype.getHttp2MaxPingStrikes = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); }; /** - * @param {?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig|undefined} value - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this -*/ -proto.ml_metadata.MetadataStoreServerConfig.prototype.setSslConfig = function(value) { - return jspb.Message.setWrapperField(this, 2, value); + * @param {number} value + * @return {!proto.ml_metadata.GrpcChannelArguments} returns this + */ +proto.ml_metadata.GrpcChannelArguments.prototype.setHttp2MaxPingStrikes = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GrpcChannelArguments} returns this */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.clearSslConfig = function() { - return this.setSslConfig(undefined); +proto.ml_metadata.GrpcChannelArguments.prototype.clearHttp2MaxPingStrikes = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -10916,7 +11396,7 @@ proto.ml_metadata.MetadataStoreServerConfig.prototype.clearSslConfig = function( * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.MetadataStoreServerConfig.prototype.hasSslConfig = function() { +proto.ml_metadata.GrpcChannelArguments.prototype.hasHttp2MaxPingStrikes = function() { return jspb.Message.getField(this, 2) != null; }; @@ -10937,8 +11417,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.ListOperationOptions.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ListOperationOptions.toObject(opt_includeInstance, this); +proto.ml_metadata.MetadataStoreClientConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.MetadataStoreClientConfig.toObject(opt_includeInstance, this); }; @@ -10947,16 +11427,17 @@ proto.ml_metadata.ListOperationOptions.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ListOperationOptions} msg The msg instance to transform. + * @param {!proto.ml_metadata.MetadataStoreClientConfig} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationOptions.toObject = function(includeInstance, msg) { +proto.ml_metadata.MetadataStoreClientConfig.toObject = function(includeInstance, msg) { var f, obj = { - maxResultSize: jspb.Message.getFieldWithDefault(msg, 1, 20), - orderByField: (f = msg.getOrderByField()) && proto.ml_metadata.ListOperationOptions.OrderByField.toObject(includeInstance, f), - nextPageToken: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - filterQuery: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f + host: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + port: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + sslConfig: (f = msg.getSslConfig()) && proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject(includeInstance, f), + channelArguments: (f = msg.getChannelArguments()) && proto.ml_metadata.GrpcChannelArguments.toObject(includeInstance, f), + clientTimeoutSec: (f = jspb.Message.getOptionalFloatingPointField(msg, 5)) == null ? undefined : f }; if (includeInstance) { @@ -10970,23 +11451,23 @@ proto.ml_metadata.ListOperationOptions.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ListOperationOptions} + * @return {!proto.ml_metadata.MetadataStoreClientConfig} */ -proto.ml_metadata.ListOperationOptions.deserializeBinary = function(bytes) { +proto.ml_metadata.MetadataStoreClientConfig.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ListOperationOptions; - return proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.MetadataStoreClientConfig; + return proto.ml_metadata.MetadataStoreClientConfig.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.ListOperationOptions} msg The message object to deserialize into. + * @param {!proto.ml_metadata.MetadataStoreClientConfig} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ListOperationOptions} + * @return {!proto.ml_metadata.MetadataStoreClientConfig} */ -proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.MetadataStoreClientConfig.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -10994,21 +11475,26 @@ proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader = function(ms var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt32()); - msg.setMaxResultSize(value); + var value = /** @type {string} */ (reader.readString()); + msg.setHost(value); break; case 2: - var value = new proto.ml_metadata.ListOperationOptions.OrderByField; - reader.readMessage(value,proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader); - msg.setOrderByField(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setPort(value); break; case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); + var value = new proto.ml_metadata.MetadataStoreClientConfig.SSLConfig; + reader.readMessage(value,proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader); + msg.setSslConfig(value); break; case 4: - var value = /** @type {string} */ (reader.readString()); - msg.setFilterQuery(value); + var value = new proto.ml_metadata.GrpcChannelArguments; + reader.readMessage(value,proto.ml_metadata.GrpcChannelArguments.deserializeBinaryFromReader); + msg.setChannelArguments(value); + break; + case 5: + var value = /** @type {number} */ (reader.readDouble()); + msg.setClientTimeoutSec(value); break; default: reader.skipField(); @@ -11023,9 +11509,9 @@ proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader = function(ms * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.ListOperationOptions.prototype.serializeBinary = function() { +proto.ml_metadata.MetadataStoreClientConfig.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter(this, writer); + proto.ml_metadata.MetadataStoreClientConfig.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11033,38 +11519,46 @@ proto.ml_metadata.ListOperationOptions.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ListOperationOptions} message + * @param {!proto.ml_metadata.MetadataStoreClientConfig} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.MetadataStoreClientConfig.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeInt32( + writer.writeString( 1, f ); } - f = message.getOrderByField(); + f = /** @type {number} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeMessage( + writer.writeInt64( 2, - f, - proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter + f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); + f = message.getSslConfig(); if (f != null) { - writer.writeString( + writer.writeMessage( 3, - f + f, + proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter ); } - f = /** @type {string} */ (jspb.Message.getField(message, 4)); + f = message.getChannelArguments(); if (f != null) { - writer.writeString( + writer.writeMessage( 4, + f, + proto.ml_metadata.GrpcChannelArguments.serializeBinaryToWriter + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeDouble( + 5, f ); } @@ -11087,8 +11581,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ListOperationOptions.OrderByField.toObject(opt_includeInstance, this); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject(opt_includeInstance, this); }; @@ -11097,14 +11591,15 @@ proto.ml_metadata.ListOperationOptions.OrderByField.prototype.toObject = functio * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} msg The msg instance to transform. + * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationOptions.OrderByField.toObject = function(includeInstance, msg) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.toObject = function(includeInstance, msg) { var f, obj = { - field: jspb.Message.getFieldWithDefault(msg, 1, 3), - isAsc: jspb.Message.getBooleanFieldWithDefault(msg, 2, true) + clientKey: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + serverCert: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + customCa: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f }; if (includeInstance) { @@ -11118,23 +11613,23 @@ proto.ml_metadata.ListOperationOptions.OrderByField.toObject = function(includeI /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} */ -proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinary = function(bytes) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ListOperationOptions.OrderByField; - return proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.MetadataStoreClientConfig.SSLConfig; + return proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} msg The message object to deserialize into. + * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} */ -proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11142,12 +11637,16 @@ proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (reader.readEnum()); - msg.setField(value); + var value = /** @type {string} */ (reader.readString()); + msg.setClientKey(value); break; case 2: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setIsAsc(value); + var value = /** @type {string} */ (reader.readString()); + msg.setServerCert(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setCustomCa(value); break; default: reader.skipField(); @@ -11162,9 +11661,9 @@ proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.serializeBinary = function() { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter(this, writer); + proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11172,62 +11671,59 @@ proto.ml_metadata.ListOperationOptions.OrderByField.prototype.serializeBinary = /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} message + * @param {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (jspb.Message.getField(message, 1)); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeEnum( + writer.writeString( 1, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeBool( + writer.writeString( 2, f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } }; /** - * @enum {number} - */ -proto.ml_metadata.ListOperationOptions.OrderByField.Field = { - FIELD_UNSPECIFIED: 0, - CREATE_TIME: 1, - LAST_UPDATE_TIME: 2, - ID: 3 -}; - -/** - * optional Field field = 1; - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} + * optional string client_key = 1; + * @return {string} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.getField = function() { - return /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (jspb.Message.getFieldWithDefault(this, 1, 3)); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getClientKey = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} value - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.setField = function(value) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setClientKey = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearField = function() { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearClientKey = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -11236,34 +11732,34 @@ proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearField = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.hasField = function() { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasClientKey = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional bool is_asc = 2; - * @return {boolean} + * optional string server_cert = 2; + * @return {string} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.getIsAsc = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, true)); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getServerCert = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.setIsAsc = function(value) { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setServerCert = function(value) { return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearIsAsc = function() { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearServerCert = function() { return jspb.Message.setField(this, 2, undefined); }; @@ -11272,35 +11768,35 @@ proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearIsAsc = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.OrderByField.prototype.hasIsAsc = function() { +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasServerCert = function() { return jspb.Message.getField(this, 2) != null; }; /** - * optional int32 max_result_size = 1; - * @return {number} + * optional string custom_ca = 3; + * @return {string} */ -proto.ml_metadata.ListOperationOptions.prototype.getMaxResultSize = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 20)); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.getCustomCa = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); }; /** - * @param {number} value - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.setMaxResultSize = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.setCustomCa = function(value) { + return jspb.Message.setField(this, 3, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @return {!proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.clearMaxResultSize = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.clearCustomCa = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -11308,36 +11804,35 @@ proto.ml_metadata.ListOperationOptions.prototype.clearMaxResultSize = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.prototype.hasMaxResultSize = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.MetadataStoreClientConfig.SSLConfig.prototype.hasCustomCa = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional OrderByField order_by_field = 2; - * @return {?proto.ml_metadata.ListOperationOptions.OrderByField} + * optional string host = 1; + * @return {string} */ -proto.ml_metadata.ListOperationOptions.prototype.getOrderByField = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions.OrderByField} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions.OrderByField, 2)); +proto.ml_metadata.MetadataStoreClientConfig.prototype.getHost = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {?proto.ml_metadata.ListOperationOptions.OrderByField|undefined} value - * @return {!proto.ml_metadata.ListOperationOptions} returns this -*/ -proto.ml_metadata.ListOperationOptions.prototype.setOrderByField = function(value) { - return jspb.Message.setWrapperField(this, 2, value); + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.setHost = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.clearOrderByField = function() { - return this.setOrderByField(undefined); +proto.ml_metadata.MetadataStoreClientConfig.prototype.clearHost = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -11345,35 +11840,35 @@ proto.ml_metadata.ListOperationOptions.prototype.clearOrderByField = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.prototype.hasOrderByField = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.MetadataStoreClientConfig.prototype.hasHost = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional string next_page_token = 3; - * @return {string} + * optional int64 port = 2; + * @return {number} */ -proto.ml_metadata.ListOperationOptions.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +proto.ml_metadata.MetadataStoreClientConfig.prototype.getPort = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @param {number} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 3, value); +proto.ml_metadata.MetadataStoreClientConfig.prototype.setPort = function(value) { + return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.MetadataStoreClientConfig.prototype.clearPort = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -11381,35 +11876,109 @@ proto.ml_metadata.ListOperationOptions.prototype.clearNextPageToken = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.prototype.hasNextPageToken = function() { +proto.ml_metadata.MetadataStoreClientConfig.prototype.hasPort = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional SSLConfig ssl_config = 3; + * @return {?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.getSslConfig = function() { + return /** @type{?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.MetadataStoreClientConfig.SSLConfig, 3)); +}; + + +/** + * @param {?proto.ml_metadata.MetadataStoreClientConfig.SSLConfig|undefined} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this +*/ +proto.ml_metadata.MetadataStoreClientConfig.prototype.setSslConfig = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.clearSslConfig = function() { + return this.setSslConfig(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.hasSslConfig = function() { return jspb.Message.getField(this, 3) != null; }; /** - * optional string filter_query = 4; - * @return {string} + * optional GrpcChannelArguments channel_arguments = 4; + * @return {?proto.ml_metadata.GrpcChannelArguments} */ -proto.ml_metadata.ListOperationOptions.prototype.getFilterQuery = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 4, "")); +proto.ml_metadata.MetadataStoreClientConfig.prototype.getChannelArguments = function() { + return /** @type{?proto.ml_metadata.GrpcChannelArguments} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.GrpcChannelArguments, 4)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @param {?proto.ml_metadata.GrpcChannelArguments|undefined} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this +*/ +proto.ml_metadata.MetadataStoreClientConfig.prototype.setChannelArguments = function(value) { + return jspb.Message.setWrapperField(this, 4, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.setFilterQuery = function(value) { - return jspb.Message.setField(this, 4, value); +proto.ml_metadata.MetadataStoreClientConfig.prototype.clearChannelArguments = function() { + return this.setChannelArguments(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.hasChannelArguments = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional double client_timeout_sec = 5; + * @return {number} + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.getClientTimeoutSec = function() { + return /** @type {number} */ (jspb.Message.getFloatingPointFieldWithDefault(this, 5, 0.0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this + */ +proto.ml_metadata.MetadataStoreClientConfig.prototype.setClientTimeoutSec = function(value) { + return jspb.Message.setField(this, 5, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationOptions} returns this + * @return {!proto.ml_metadata.MetadataStoreClientConfig} returns this */ -proto.ml_metadata.ListOperationOptions.prototype.clearFilterQuery = function() { - return jspb.Message.setField(this, 4, undefined); +proto.ml_metadata.MetadataStoreClientConfig.prototype.clearClientTimeoutSec = function() { + return jspb.Message.setField(this, 5, undefined); }; @@ -11417,18 +11986,1683 @@ proto.ml_metadata.ListOperationOptions.prototype.clearFilterQuery = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationOptions.prototype.hasFilterQuery = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.MetadataStoreClientConfig.prototype.hasClientTimeoutSec = function() { + return jspb.Message.getField(this, 5) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.MetadataStoreServerConfig.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.MetadataStoreServerConfig} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.MetadataStoreServerConfig.toObject = function(includeInstance, msg) { + var f, obj = { + connectionConfig: (f = msg.getConnectionConfig()) && proto.ml_metadata.ConnectionConfig.toObject(includeInstance, f), + migrationOptions: (f = msg.getMigrationOptions()) && proto.ml_metadata.MigrationOptions.toObject(includeInstance, f), + sslConfig: (f = msg.getSslConfig()) && proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.MetadataStoreServerConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.MetadataStoreServerConfig; + return proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.MetadataStoreServerConfig} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.MetadataStoreServerConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new proto.ml_metadata.ConnectionConfig; + reader.readMessage(value,proto.ml_metadata.ConnectionConfig.deserializeBinaryFromReader); + msg.setConnectionConfig(value); + break; + case 3: + var value = new proto.ml_metadata.MigrationOptions; + reader.readMessage(value,proto.ml_metadata.MigrationOptions.deserializeBinaryFromReader); + msg.setMigrationOptions(value); + break; + case 2: + var value = new proto.ml_metadata.MetadataStoreServerConfig.SSLConfig; + reader.readMessage(value,proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader); + msg.setSslConfig(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.MetadataStoreServerConfig.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.MetadataStoreServerConfig} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.MetadataStoreServerConfig.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getConnectionConfig(); + if (f != null) { + writer.writeMessage( + 1, + f, + proto.ml_metadata.ConnectionConfig.serializeBinaryToWriter + ); + } + f = message.getMigrationOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + proto.ml_metadata.MigrationOptions.serializeBinaryToWriter + ); + } + f = message.getSslConfig(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter + ); + } +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.toObject = function(includeInstance, msg) { + var f, obj = { + serverKey: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + serverCert: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + customCa: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + clientVerify: (f = jspb.Message.getBooleanField(msg, 4)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.MetadataStoreServerConfig.SSLConfig; + return proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {string} */ (reader.readString()); + msg.setServerKey(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setServerCert(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setCustomCa(value); + break; + case 4: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setClientVerify(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeBool( + 4, + f + ); + } +}; + + +/** + * optional string server_key = 1; + * @return {string} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getServerKey = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setServerKey = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerKey = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasServerKey = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional string server_cert = 2; + * @return {string} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getServerCert = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setServerCert = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearServerCert = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasServerCert = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional string custom_ca = 3; + * @return {string} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getCustomCa = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setCustomCa = function(value) { + return jspb.Message.setField(this, 3, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearCustomCa = function() { + return jspb.Message.setField(this, 3, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasCustomCa = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional bool client_verify = 4; + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.getClientVerify = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.setClientVerify = function(value) { + return jspb.Message.setField(this, 4, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.clearClientVerify = function() { + return jspb.Message.setField(this, 4, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.SSLConfig.prototype.hasClientVerify = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional ConnectionConfig connection_config = 1; + * @return {?proto.ml_metadata.ConnectionConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.getConnectionConfig = function() { + return /** @type{?proto.ml_metadata.ConnectionConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ConnectionConfig, 1)); +}; + + +/** + * @param {?proto.ml_metadata.ConnectionConfig|undefined} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this +*/ +proto.ml_metadata.MetadataStoreServerConfig.prototype.setConnectionConfig = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.clearConnectionConfig = function() { + return this.setConnectionConfig(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.hasConnectionConfig = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional MigrationOptions migration_options = 3; + * @return {?proto.ml_metadata.MigrationOptions} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.getMigrationOptions = function() { + return /** @type{?proto.ml_metadata.MigrationOptions} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.MigrationOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.MigrationOptions|undefined} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this +*/ +proto.ml_metadata.MetadataStoreServerConfig.prototype.setMigrationOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.clearMigrationOptions = function() { + return this.setMigrationOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.hasMigrationOptions = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional SSLConfig ssl_config = 2; + * @return {?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.getSslConfig = function() { + return /** @type{?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.MetadataStoreServerConfig.SSLConfig, 2)); +}; + + +/** + * @param {?proto.ml_metadata.MetadataStoreServerConfig.SSLConfig|undefined} value + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this +*/ +proto.ml_metadata.MetadataStoreServerConfig.prototype.setSslConfig = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.MetadataStoreServerConfig} returns this + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.clearSslConfig = function() { + return this.setSslConfig(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.MetadataStoreServerConfig.prototype.hasSslConfig = function() { + return jspb.Message.getField(this, 2) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ListOperationOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ListOperationOptions.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ListOperationOptions} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationOptions.toObject = function(includeInstance, msg) { + var f, obj = { + maxResultSize: jspb.Message.getFieldWithDefault(msg, 1, 20), + orderByField: (f = msg.getOrderByField()) && proto.ml_metadata.ListOperationOptions.OrderByField.toObject(includeInstance, f), + nextPageToken: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + filterQuery: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ListOperationOptions} + */ +proto.ml_metadata.ListOperationOptions.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ListOperationOptions; + return proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ListOperationOptions} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ListOperationOptions} + */ +proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt32()); + msg.setMaxResultSize(value); + break; + case 2: + var value = new proto.ml_metadata.ListOperationOptions.OrderByField; + reader.readMessage(value,proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader); + msg.setOrderByField(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); + break; + case 4: + var value = /** @type {string} */ (reader.readString()); + msg.setFilterQuery(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ListOperationOptions.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ListOperationOptions} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt32( + 1, + f + ); + } + f = message.getOrderByField(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeString( + 4, + f + ); + } +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ListOperationOptions.OrderByField.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationOptions.OrderByField.toObject = function(includeInstance, msg) { + var f, obj = { + field: jspb.Message.getFieldWithDefault(msg, 1, 3), + isAsc: jspb.Message.getBooleanFieldWithDefault(msg, 2, true) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ListOperationOptions.OrderByField; + return proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (reader.readEnum()); + msg.setField(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setIsAsc(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ListOperationOptions.OrderByField} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationOptions.OrderByField.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeEnum( + 1, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeBool( + 2, + f + ); + } +}; + + +/** + * @enum {number} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.Field = { + FIELD_UNSPECIFIED: 0, + CREATE_TIME: 1, + LAST_UPDATE_TIME: 2, + ID: 3 +}; + +/** + * optional Field field = 1; + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.getField = function() { + return /** @type {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} */ (jspb.Message.getFieldWithDefault(this, 1, 3)); +}; + + +/** + * @param {!proto.ml_metadata.ListOperationOptions.OrderByField.Field} value + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.setField = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearField = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.hasField = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional bool is_asc = 2; + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.getIsAsc = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, true)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.setIsAsc = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions.OrderByField} returns this + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.clearIsAsc = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.OrderByField.prototype.hasIsAsc = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional int32 max_result_size = 1; + * @return {number} + */ +proto.ml_metadata.ListOperationOptions.prototype.getMaxResultSize = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 20)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.setMaxResultSize = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.clearMaxResultSize = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.prototype.hasMaxResultSize = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional OrderByField order_by_field = 2; + * @return {?proto.ml_metadata.ListOperationOptions.OrderByField} + */ +proto.ml_metadata.ListOperationOptions.prototype.getOrderByField = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions.OrderByField} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions.OrderByField, 2)); +}; + + +/** + * @param {?proto.ml_metadata.ListOperationOptions.OrderByField|undefined} value + * @return {!proto.ml_metadata.ListOperationOptions} returns this +*/ +proto.ml_metadata.ListOperationOptions.prototype.setOrderByField = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.clearOrderByField = function() { + return this.setOrderByField(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.prototype.hasOrderByField = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional string next_page_token = 3; + * @return {string} + */ +proto.ml_metadata.ListOperationOptions.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 3, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 3, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional string filter_query = 4; + * @return {string} + */ +proto.ml_metadata.ListOperationOptions.prototype.getFilterQuery = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 4, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.setFilterQuery = function(value) { + return jspb.Message.setField(this, 4, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationOptions} returns this + */ +proto.ml_metadata.ListOperationOptions.prototype.clearFilterQuery = function() { + return jspb.Message.setField(this, 4, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationOptions.prototype.hasFilterQuery = function() { + return jspb.Message.getField(this, 4) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.ListOperationNextPageToken.repeatedFields_ = [4]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ListOperationNextPageToken.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ListOperationNextPageToken} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationNextPageToken.toObject = function(includeInstance, msg) { + var f, obj = { + idOffset: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + fieldOffset: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + setOptions: (f = msg.getSetOptions()) && proto.ml_metadata.ListOperationOptions.toObject(includeInstance, f), + listedIdsList: (f = jspb.Message.getRepeatedField(msg, 4)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ListOperationNextPageToken} + */ +proto.ml_metadata.ListOperationNextPageToken.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ListOperationNextPageToken; + return proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ListOperationNextPageToken} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ListOperationNextPageToken} + */ +proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setIdOffset(value); + break; + case 2: + var value = /** @type {number} */ (reader.readInt64()); + msg.setFieldOffset(value); + break; + case 3: + var value = new proto.ml_metadata.ListOperationOptions; + reader.readMessage(value,proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader); + msg.setSetOptions(value); + break; + case 4: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addListedIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ListOperationNextPageToken} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeInt64( + 2, + f + ); + } + f = message.getSetOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter + ); + } + f = message.getListedIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 4, + f + ); + } +}; + + +/** + * optional int64 id_offset = 1; + * @return {number} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.getIdOffset = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.setIdOffset = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.clearIdOffset = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.hasIdOffset = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional int64 field_offset = 2; + * @return {number} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.getFieldOffset = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.setFieldOffset = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.clearFieldOffset = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.hasFieldOffset = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional ListOperationOptions set_options = 3; + * @return {?proto.ml_metadata.ListOperationOptions} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.getSetOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this +*/ +proto.ml_metadata.ListOperationNextPageToken.prototype.setSetOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.clearSetOptions = function() { + return this.setSetOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.hasSetOptions = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * repeated int64 listed_ids = 4; + * @return {!Array} + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.getListedIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 4)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.setListedIdsList = function(value) { + return jspb.Message.setField(this, 4, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.addListedIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 4, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + */ +proto.ml_metadata.ListOperationNextPageToken.prototype.clearListedIdsList = function() { + return this.setListedIdsList([]); +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.TransactionOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.TransactionOptions.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.TransactionOptions} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.TransactionOptions.toObject = function(includeInstance, msg) { + var f, obj = { + tag: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + }; + + jspb.Message.toObjectExtension(/** @type {!jspb.Message} */ (msg), obj, + proto.ml_metadata.TransactionOptions.extensions, proto.ml_metadata.TransactionOptions.prototype.getExtension, + includeInstance); + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.TransactionOptions.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.TransactionOptions; + return proto.ml_metadata.TransactionOptions.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.TransactionOptions} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.TransactionOptions.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {string} */ (reader.readString()); + msg.setTag(value); + break; + default: + jspb.Message.readBinaryExtension(msg, reader, + proto.ml_metadata.TransactionOptions.extensionsBinary, + proto.ml_metadata.TransactionOptions.prototype.getExtension, + proto.ml_metadata.TransactionOptions.prototype.setExtension); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.TransactionOptions.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.TransactionOptions.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.TransactionOptions} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.TransactionOptions.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + jspb.Message.serializeBinaryExtensions(message, writer, + proto.ml_metadata.TransactionOptions.extensionsBinary, proto.ml_metadata.TransactionOptions.prototype.getExtension); +}; + + +/** + * optional string tag = 1; + * @return {string} + */ +proto.ml_metadata.TransactionOptions.prototype.getTag = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.TransactionOptions} returns this + */ +proto.ml_metadata.TransactionOptions.prototype.setTag = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.TransactionOptions} returns this + */ +proto.ml_metadata.TransactionOptions.prototype.clearTag = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.TransactionOptions.prototype.hasTag = function() { + return jspb.Message.getField(this, 1) != null; +}; + + + +/** + * Oneof group definitions for this message. Each group defines the field + * numbers belonging to that group. When of these fields' value is set, all + * other fields in the group are cleared. During deserialization, if multiple + * fields are encountered for a group, only the last value seen will be kept. + * @private {!Array>} + * @const + */ +proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_ = [[1]]; + +/** + * @enum {number} + */ +proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase = { + QUERY_NODES_NOT_SET: 0, + ARTIFACTS_OPTIONS: 1 +}; + +/** + * @return {proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase} + */ +proto.ml_metadata.LineageGraphQueryOptions.prototype.getQueryNodesCase = function() { + return /** @type {proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_[0])); +}; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.LineageGraphQueryOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.LineageGraphQueryOptions.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.LineageGraphQueryOptions} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.LineageGraphQueryOptions.toObject = function(includeInstance, msg) { + var f, obj = { + artifactsOptions: (f = msg.getArtifactsOptions()) && proto.ml_metadata.ListOperationOptions.toObject(includeInstance, f), + stopConditions: (f = msg.getStopConditions()) && proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject(includeInstance, f), + maxNodeSize: jspb.Message.getFieldWithDefault(msg, 3, 20) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.LineageGraphQueryOptions} + */ +proto.ml_metadata.LineageGraphQueryOptions.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.LineageGraphQueryOptions; + return proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.LineageGraphQueryOptions} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.LineageGraphQueryOptions} + */ +proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new proto.ml_metadata.ListOperationOptions; + reader.readMessage(value,proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader); + msg.setArtifactsOptions(value); + break; + case 2: + var value = new proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint; + reader.readMessage(value,proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader); + msg.setStopConditions(value); + break; + case 3: + var value = /** @type {number} */ (reader.readInt64()); + msg.setMaxNodeSize(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.LineageGraphQueryOptions.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.LineageGraphQueryOptions.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + /** - * List of repeated fields within this message type. - * @private {!Array} - * @const + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.LineageGraphQueryOptions} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationNextPageToken.repeatedFields_ = [4]; +proto.ml_metadata.LineageGraphQueryOptions.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactsOptions(); + if (f != null) { + writer.writeMessage( + 1, + f, + proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter + ); + } + f = message.getStopConditions(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeInt64( + 3, + f + ); + } +}; + + @@ -11445,8 +13679,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ListOperationNextPageToken.toObject(opt_includeInstance, this); +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject(opt_includeInstance, this); }; @@ -11455,16 +13689,15 @@ proto.ml_metadata.ListOperationNextPageToken.prototype.toObject = function(opt_i * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ListOperationNextPageToken} msg The msg instance to transform. + * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationNextPageToken.toObject = function(includeInstance, msg) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject = function(includeInstance, msg) { var f, obj = { - idOffset: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - fieldOffset: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - setOptions: (f = msg.getSetOptions()) && proto.ml_metadata.ListOperationOptions.toObject(includeInstance, f), - listedIdsList: (f = jspb.Message.getRepeatedField(msg, 4)) == null ? undefined : f + maxNumHops: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + boundaryArtifacts: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + boundaryExecutions: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f }; if (includeInstance) { @@ -11478,23 +13711,23 @@ proto.ml_metadata.ListOperationNextPageToken.toObject = function(includeInstance /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ListOperationNextPageToken} + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} */ -proto.ml_metadata.ListOperationNextPageToken.deserializeBinary = function(bytes) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ListOperationNextPageToken; - return proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint; + return proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.ListOperationNextPageToken} msg The message object to deserialize into. + * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ListOperationNextPageToken} + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} */ -proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11503,22 +13736,15 @@ proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader = funct switch (field) { case 1: var value = /** @type {number} */ (reader.readInt64()); - msg.setIdOffset(value); + msg.setMaxNumHops(value); break; case 2: - var value = /** @type {number} */ (reader.readInt64()); - msg.setFieldOffset(value); + var value = /** @type {string} */ (reader.readString()); + msg.setBoundaryArtifacts(value); break; case 3: - var value = new proto.ml_metadata.ListOperationOptions; - reader.readMessage(value,proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader); - msg.setSetOptions(value); - break; - case 4: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addListedIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.setBoundaryExecutions(value); break; default: reader.skipField(); @@ -11533,9 +13759,9 @@ proto.ml_metadata.ListOperationNextPageToken.deserializeBinaryFromReader = funct * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.serializeBinary = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter(this, writer); + proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11543,11 +13769,11 @@ proto.ml_metadata.ListOperationNextPageToken.prototype.serializeBinary = functio /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ListOperationNextPageToken} message + * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { @@ -11556,25 +13782,17 @@ proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter = function( f ); } - f = /** @type {number} */ (jspb.Message.getField(message, 2)); + f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeInt64( + writer.writeString( 2, f ); } - f = message.getSetOptions(); + f = /** @type {string} */ (jspb.Message.getField(message, 3)); if (f != null) { - writer.writeMessage( + writer.writeString( 3, - f, - proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter - ); - } - f = message.getListedIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 4, f ); } @@ -11582,28 +13800,28 @@ proto.ml_metadata.ListOperationNextPageToken.serializeBinaryToWriter = function( /** - * optional int64 id_offset = 1; + * optional int64 max_num_hops = 1; * @return {number} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.getIdOffset = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getMaxNumHops = function() { return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** * @param {number} value - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.setIdOffset = function(value) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setMaxNumHops = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.clearIdOffset = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearMaxNumHops = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -11612,34 +13830,34 @@ proto.ml_metadata.ListOperationNextPageToken.prototype.clearIdOffset = function( * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.hasIdOffset = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasMaxNumHops = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional int64 field_offset = 2; - * @return {number} + * optional string boundary_artifacts = 2; + * @return {string} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.getFieldOffset = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getBoundaryArtifacts = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {number} value - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * @param {string} value + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.setFieldOffset = function(value) { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setBoundaryArtifacts = function(value) { return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.clearFieldOffset = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBoundaryArtifacts = function() { return jspb.Message.setField(this, 2, undefined); }; @@ -11648,36 +13866,35 @@ proto.ml_metadata.ListOperationNextPageToken.prototype.clearFieldOffset = functi * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.hasFieldOffset = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasBoundaryArtifacts = function() { return jspb.Message.getField(this, 2) != null; }; /** - * optional ListOperationOptions set_options = 3; - * @return {?proto.ml_metadata.ListOperationOptions} + * optional string boundary_executions = 3; + * @return {string} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.getSetOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions, 3)); +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getBoundaryExecutions = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this -*/ -proto.ml_metadata.ListOperationNextPageToken.prototype.setSetOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); + * @param {string} value + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this + */ +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setBoundaryExecutions = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.clearSetOptions = function() { - return this.setSetOptions(undefined); +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBoundaryExecutions = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -11685,154 +13902,118 @@ proto.ml_metadata.ListOperationNextPageToken.prototype.clearSetOptions = functio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.hasSetOptions = function() { +proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasBoundaryExecutions = function() { return jspb.Message.getField(this, 3) != null; }; /** - * repeated int64 listed_ids = 4; - * @return {!Array} + * optional ListOperationOptions artifacts_options = 1; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.getListedIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 4)); +proto.ml_metadata.LineageGraphQueryOptions.prototype.getArtifactsOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this - */ -proto.ml_metadata.ListOperationNextPageToken.prototype.setListedIdsList = function(value) { - return jspb.Message.setField(this, 4, value || []); + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this +*/ +proto.ml_metadata.LineageGraphQueryOptions.prototype.setArtifactsOptions = function(value) { + return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_[0], value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this */ -proto.ml_metadata.ListOperationNextPageToken.prototype.addListedIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 4, value, opt_index); +proto.ml_metadata.LineageGraphQueryOptions.prototype.clearArtifactsOptions = function() { + return this.setArtifactsOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.ListOperationNextPageToken} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ListOperationNextPageToken.prototype.clearListedIdsList = function() { - return this.setListedIdsList([]); +proto.ml_metadata.LineageGraphQueryOptions.prototype.hasArtifactsOptions = function() { + return jspb.Message.getField(this, 1) != null; }; +/** + * optional BoundaryConstraint stop_conditions = 2; + * @return {?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} + */ +proto.ml_metadata.LineageGraphQueryOptions.prototype.getStopConditions = function() { + return /** @type{?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint, 2)); +}; - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} - */ -proto.ml_metadata.TransactionOptions.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.TransactionOptions.toObject(opt_includeInstance, this); + * @param {?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint|undefined} value + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this +*/ +proto.ml_metadata.LineageGraphQueryOptions.prototype.setStopConditions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.TransactionOptions} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this */ -proto.ml_metadata.TransactionOptions.toObject = function(includeInstance, msg) { - var f, obj = { - - }; +proto.ml_metadata.LineageGraphQueryOptions.prototype.clearStopConditions = function() { + return this.setStopConditions(undefined); +}; - jspb.Message.toObjectExtension(/** @type {!jspb.Message} */ (msg), obj, - proto.ml_metadata.TransactionOptions.extensions, proto.ml_metadata.TransactionOptions.prototype.getExtension, - includeInstance); - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.LineageGraphQueryOptions.prototype.hasStopConditions = function() { + return jspb.Message.getField(this, 2) != null; }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.TransactionOptions} + * optional int64 max_node_size = 3; + * @return {number} */ -proto.ml_metadata.TransactionOptions.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.TransactionOptions; - return proto.ml_metadata.TransactionOptions.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.LineageGraphQueryOptions.prototype.getMaxNodeSize = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 3, 20)); }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.TransactionOptions} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.TransactionOptions} + * @param {number} value + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this */ -proto.ml_metadata.TransactionOptions.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - default: - jspb.Message.readBinaryExtension(msg, reader, - proto.ml_metadata.TransactionOptions.extensionsBinary, - proto.ml_metadata.TransactionOptions.prototype.getExtension, - proto.ml_metadata.TransactionOptions.prototype.setExtension); - break; - } - } - return msg; +proto.ml_metadata.LineageGraphQueryOptions.prototype.setMaxNodeSize = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this */ -proto.ml_metadata.TransactionOptions.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.TransactionOptions.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.LineageGraphQueryOptions.prototype.clearMaxNodeSize = function() { + return jspb.Message.setField(this, 3, undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.TransactionOptions} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.TransactionOptions.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - jspb.Message.serializeBinaryExtensions(message, writer, - proto.ml_metadata.TransactionOptions.extensionsBinary, proto.ml_metadata.TransactionOptions.prototype.getExtension); +proto.ml_metadata.LineageGraphQueryOptions.prototype.hasMaxNodeSize = function() { + return jspb.Message.getField(this, 3) != null; }; @@ -11845,21 +14026,22 @@ proto.ml_metadata.TransactionOptions.serializeBinaryToWriter = function(message, * @private {!Array>} * @const */ -proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_ = [[1]]; +proto.ml_metadata.LineageSubgraphQueryOptions.oneofGroups_ = [[1,2]]; /** * @enum {number} */ -proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase = { - QUERY_NODES_NOT_SET: 0, - ARTIFACTS_OPTIONS: 1 +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodesCase = { + STARTING_NODES_NOT_SET: 0, + STARTING_ARTIFACTS: 1, + STARTING_EXECUTIONS: 2 }; /** - * @return {proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase} + * @return {proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodesCase} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.getQueryNodesCase = function() { - return /** @type {proto.ml_metadata.LineageGraphQueryOptions.QueryNodesCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_[0])); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.getStartingNodesCase = function() { + return /** @type {proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodesCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.LineageSubgraphQueryOptions.oneofGroups_[0])); }; @@ -11877,8 +14059,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.LineageGraphQueryOptions.toObject(opt_includeInstance, this); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.LineageSubgraphQueryOptions.toObject(opt_includeInstance, this); }; @@ -11887,14 +14069,16 @@ proto.ml_metadata.LineageGraphQueryOptions.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.LineageGraphQueryOptions} msg The msg instance to transform. + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.LineageGraphQueryOptions.toObject = function(includeInstance, msg) { +proto.ml_metadata.LineageSubgraphQueryOptions.toObject = function(includeInstance, msg) { var f, obj = { - artifactsOptions: (f = msg.getArtifactsOptions()) && proto.ml_metadata.ListOperationOptions.toObject(includeInstance, f), - stopConditions: (f = msg.getStopConditions()) && proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject(includeInstance, f) + startingArtifacts: (f = msg.getStartingArtifacts()) && proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.toObject(includeInstance, f), + startingExecutions: (f = msg.getStartingExecutions()) && proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.toObject(includeInstance, f), + maxNumHops: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + direction: (f = jspb.Message.getField(msg, 4)) == null ? undefined : f }; if (includeInstance) { @@ -11908,23 +14092,23 @@ proto.ml_metadata.LineageGraphQueryOptions.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.LineageGraphQueryOptions} + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} */ -proto.ml_metadata.LineageGraphQueryOptions.deserializeBinary = function(bytes) { +proto.ml_metadata.LineageSubgraphQueryOptions.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.LineageGraphQueryOptions; - return proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.LineageSubgraphQueryOptions; + return proto.ml_metadata.LineageSubgraphQueryOptions.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.LineageGraphQueryOptions} msg The message object to deserialize into. + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.LineageGraphQueryOptions} + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} */ -proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.LineageSubgraphQueryOptions.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11932,14 +14116,22 @@ proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new proto.ml_metadata.ListOperationOptions; - reader.readMessage(value,proto.ml_metadata.ListOperationOptions.deserializeBinaryFromReader); - msg.setArtifactsOptions(value); + var value = new proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes; + reader.readMessage(value,proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.deserializeBinaryFromReader); + msg.setStartingArtifacts(value); break; case 2: - var value = new proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint; - reader.readMessage(value,proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader); - msg.setStopConditions(value); + var value = new proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes; + reader.readMessage(value,proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.deserializeBinaryFromReader); + msg.setStartingExecutions(value); + break; + case 3: + var value = /** @type {number} */ (reader.readInt64()); + msg.setMaxNumHops(value); + break; + case 4: + var value = /** @type {!proto.ml_metadata.LineageSubgraphQueryOptions.Direction} */ (reader.readEnum()); + msg.setDirection(value); break; default: reader.skipField(); @@ -11954,9 +14146,9 @@ proto.ml_metadata.LineageGraphQueryOptions.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.serializeBinary = function() { +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.LineageGraphQueryOptions.serializeBinaryToWriter(this, writer); + proto.ml_metadata.LineageSubgraphQueryOptions.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11964,31 +14156,55 @@ proto.ml_metadata.LineageGraphQueryOptions.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.LineageGraphQueryOptions} message + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.LineageGraphQueryOptions.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.LineageSubgraphQueryOptions.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsOptions(); + f = message.getStartingArtifacts(); if (f != null) { writer.writeMessage( 1, f, - proto.ml_metadata.ListOperationOptions.serializeBinaryToWriter + proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.serializeBinaryToWriter ); } - f = message.getStopConditions(); + f = message.getStartingExecutions(); if (f != null) { writer.writeMessage( 2, f, - proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter + proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.serializeBinaryToWriter + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeInt64( + 3, + f + ); + } + f = /** @type {!proto.ml_metadata.LineageSubgraphQueryOptions.Direction} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeEnum( + 4, + f ); } }; +/** + * @enum {number} + */ +proto.ml_metadata.LineageSubgraphQueryOptions.Direction = { + DIRECTION_UNSPECIFIED: 0, + UPSTREAM: 1, + DOWNSTREAM: 2, + BIDIRECTIONAL: 3 +}; + @@ -12005,8 +14221,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject(opt_includeInstance, this); +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.toObject(opt_includeInstance, this); }; @@ -12015,15 +14231,13 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.toObject * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} msg The msg instance to transform. + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject = function(includeInstance, msg) { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.toObject = function(includeInstance, msg) { var f, obj = { - maxNumHops: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - boundaryArtifacts: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - boundaryExecutions: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f + filterQuery: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f }; if (includeInstance) { @@ -12037,23 +14251,23 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.toObject = functio /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinary = function(bytes) { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint; - return proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes; + return proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} msg The message object to deserialize into. + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12061,16 +14275,8 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryF var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setMaxNumHops(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setBoundaryArtifacts(value); - break; - case 3: var value = /** @type {string} */ (reader.readString()); - msg.setBoundaryExecutions(value); + msg.setFilterQuery(value); break; default: reader.skipField(); @@ -12085,9 +14291,9 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.deserializeBinaryF * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.serializeBinary = function() { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter(this, writer); + proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -12095,30 +14301,16 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.serializ /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} message + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { writer.writeString( - 3, + 1, f ); } @@ -12126,28 +14318,28 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.serializeBinaryToW /** - * optional int64 max_num_hops = 1; - * @return {number} + * optional string filter_query = 1; + * @return {string} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getMaxNumHops = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.getFilterQuery = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {number} value - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this + * @param {string} value + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setMaxNumHops = function(value) { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.setFilterQuery = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearMaxNumHops = function() { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.clearFilterQuery = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -12156,35 +14348,36 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearMax * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasMaxNumHops = function() { +proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes.prototype.hasFilterQuery = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional string boundary_artifacts = 2; - * @return {string} + * optional StartingNodes starting_artifacts = 1; + * @return {?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getBoundaryArtifacts = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.getStartingArtifacts = function() { + return /** @type{?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this - */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setBoundaryArtifacts = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes|undefined} value + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this +*/ +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.setStartingArtifacts = function(value) { + return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.LineageSubgraphQueryOptions.oneofGroups_[0], value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBoundaryArtifacts = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.clearStartingArtifacts = function() { + return this.setStartingArtifacts(undefined); }; @@ -12192,35 +14385,36 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBou * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasBoundaryArtifacts = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.hasStartingArtifacts = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional string boundary_executions = 3; - * @return {string} + * optional StartingNodes starting_executions = 2; + * @return {?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.getBoundaryExecutions = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.getStartingExecutions = function() { + return /** @type{?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes, 2)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this - */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.setBoundaryExecutions = function(value) { - return jspb.Message.setField(this, 3, value); + * @param {?proto.ml_metadata.LineageSubgraphQueryOptions.StartingNodes|undefined} value + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this +*/ +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.setStartingExecutions = function(value) { + return jspb.Message.setOneofWrapperField(this, 2, proto.ml_metadata.LineageSubgraphQueryOptions.oneofGroups_[0], value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBoundaryExecutions = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.clearStartingExecutions = function() { + return this.setStartingExecutions(undefined); }; @@ -12228,36 +14422,35 @@ proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.clearBou * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint.prototype.hasBoundaryExecutions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.hasStartingExecutions = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * optional ListOperationOptions artifacts_options = 1; - * @return {?proto.ml_metadata.ListOperationOptions} + * optional int64 max_num_hops = 3; + * @return {number} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.getArtifactsOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ListOperationOptions, 1)); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.getMaxNumHops = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 3, 0)); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this -*/ -proto.ml_metadata.LineageGraphQueryOptions.prototype.setArtifactsOptions = function(value) { - return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.LineageGraphQueryOptions.oneofGroups_[0], value); + * @param {number} value + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this + */ +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.setMaxNumHops = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.clearArtifactsOptions = function() { - return this.setArtifactsOptions(undefined); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.clearMaxNumHops = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -12265,36 +14458,35 @@ proto.ml_metadata.LineageGraphQueryOptions.prototype.clearArtifactsOptions = fun * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.hasArtifactsOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.hasMaxNumHops = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional BoundaryConstraint stop_conditions = 2; - * @return {?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} + * optional Direction direction = 4; + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions.Direction} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.getStopConditions = function() { - return /** @type{?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint, 2)); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.getDirection = function() { + return /** @type {!proto.ml_metadata.LineageSubgraphQueryOptions.Direction} */ (jspb.Message.getFieldWithDefault(this, 4, 0)); }; /** - * @param {?proto.ml_metadata.LineageGraphQueryOptions.BoundaryConstraint|undefined} value - * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this -*/ -proto.ml_metadata.LineageGraphQueryOptions.prototype.setStopConditions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); + * @param {!proto.ml_metadata.LineageSubgraphQueryOptions.Direction} value + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this + */ +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.setDirection = function(value) { + return jspb.Message.setField(this, 4, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.LineageGraphQueryOptions} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.LineageSubgraphQueryOptions} returns this */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.clearStopConditions = function() { - return this.setStopConditions(undefined); +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.clearDirection = function() { + return jspb.Message.setField(this, 4, undefined); }; @@ -12302,8 +14494,8 @@ proto.ml_metadata.LineageGraphQueryOptions.prototype.clearStopConditions = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.LineageGraphQueryOptions.prototype.hasStopConditions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.LineageSubgraphQueryOptions.prototype.hasDirection = function() { + return jspb.Message.getField(this, 4) != null; }; @@ -12315,7 +14507,9 @@ proto.ml_metadata.PropertyType = { INT: 1, DOUBLE: 2, STRING: 3, - STRUCT: 4 + STRUCT: 4, + PROTO: 5, + BOOLEAN: 6 }; diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.d.ts b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.d.ts index 506a36e70e..1489129c23 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.d.ts +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.d.ts @@ -64,6 +64,13 @@ export class MetadataStoreServiceClient { response: ml_metadata_proto_metadata_store_service_pb.PutExecutionResponse) => void ): grpcWeb.ClientReadableStream; + putLineageSubgraph( + request: ml_metadata_proto_metadata_store_service_pb.PutLineageSubgraphRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.PutLineageSubgraphResponse) => void + ): grpcWeb.ClientReadableStream; + putContexts( request: ml_metadata_proto_metadata_store_service_pb.PutContextsRequest, metadata: grpcWeb.Metadata | undefined, @@ -253,6 +260,48 @@ export class MetadataStoreServiceClient { response: ml_metadata_proto_metadata_store_service_pb.GetEventsByArtifactIDsResponse) => void ): grpcWeb.ClientReadableStream; + getArtifactsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetArtifactsByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetArtifactsByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + + getExecutionsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetExecutionsByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetExecutionsByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + + getContextsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetContextsByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetContextsByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + + getArtifactTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetArtifactTypesByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetArtifactTypesByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + + getExecutionTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetExecutionTypesByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetExecutionTypesByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + + getContextTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetContextTypesByExternalIdsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetContextTypesByExternalIdsResponse) => void + ): grpcWeb.ClientReadableStream; + getContextsByArtifact( request: ml_metadata_proto_metadata_store_service_pb.GetContextsByArtifactRequest, metadata: grpcWeb.Metadata | undefined, @@ -281,6 +330,20 @@ export class MetadataStoreServiceClient { response: ml_metadata_proto_metadata_store_service_pb.GetChildrenContextsByContextResponse) => void ): grpcWeb.ClientReadableStream; + getParentContextsByContexts( + request: ml_metadata_proto_metadata_store_service_pb.GetParentContextsByContextsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetParentContextsByContextsResponse) => void + ): grpcWeb.ClientReadableStream; + + getChildrenContextsByContexts( + request: ml_metadata_proto_metadata_store_service_pb.GetChildrenContextsByContextsRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetChildrenContextsByContextsResponse) => void + ): grpcWeb.ClientReadableStream; + getArtifactsByContext( request: ml_metadata_proto_metadata_store_service_pb.GetArtifactsByContextRequest, metadata: grpcWeb.Metadata | undefined, @@ -302,6 +365,13 @@ export class MetadataStoreServiceClient { response: ml_metadata_proto_metadata_store_service_pb.GetLineageGraphResponse) => void ): grpcWeb.ClientReadableStream; + getLineageSubgraph( + request: ml_metadata_proto_metadata_store_service_pb.GetLineageSubgraphRequest, + metadata: grpcWeb.Metadata | undefined, + callback: (err: grpcWeb.RpcError, + response: ml_metadata_proto_metadata_store_service_pb.GetLineageSubgraphResponse) => void + ): grpcWeb.ClientReadableStream; + } export class MetadataStoreServicePromiseClient { @@ -349,6 +419,11 @@ export class MetadataStoreServicePromiseClient { metadata?: grpcWeb.Metadata ): Promise; + putLineageSubgraph( + request: ml_metadata_proto_metadata_store_service_pb.PutLineageSubgraphRequest, + metadata?: grpcWeb.Metadata + ): Promise; + putContexts( request: ml_metadata_proto_metadata_store_service_pb.PutContextsRequest, metadata?: grpcWeb.Metadata @@ -484,6 +559,36 @@ export class MetadataStoreServicePromiseClient { metadata?: grpcWeb.Metadata ): Promise; + getArtifactsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetArtifactsByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getExecutionsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetExecutionsByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getContextsByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetContextsByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getArtifactTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetArtifactTypesByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getExecutionTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetExecutionTypesByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getContextTypesByExternalIds( + request: ml_metadata_proto_metadata_store_service_pb.GetContextTypesByExternalIdsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + getContextsByArtifact( request: ml_metadata_proto_metadata_store_service_pb.GetContextsByArtifactRequest, metadata?: grpcWeb.Metadata @@ -504,6 +609,16 @@ export class MetadataStoreServicePromiseClient { metadata?: grpcWeb.Metadata ): Promise; + getParentContextsByContexts( + request: ml_metadata_proto_metadata_store_service_pb.GetParentContextsByContextsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + + getChildrenContextsByContexts( + request: ml_metadata_proto_metadata_store_service_pb.GetChildrenContextsByContextsRequest, + metadata?: grpcWeb.Metadata + ): Promise; + getArtifactsByContext( request: ml_metadata_proto_metadata_store_service_pb.GetArtifactsByContextRequest, metadata?: grpcWeb.Metadata @@ -519,5 +634,10 @@ export class MetadataStoreServicePromiseClient { metadata?: grpcWeb.Metadata ): Promise; + getLineageSubgraph( + request: ml_metadata_proto_metadata_store_service_pb.GetLineageSubgraphRequest, + metadata?: grpcWeb.Metadata + ): Promise; + } diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.js b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.js index 9e4d1a33d2..db5c216b49 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.js +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_grpc_web_pb.js @@ -16,6 +16,8 @@ const grpc = {}; grpc.web = require('grpc-web'); +var google_protobuf_field_mask_pb = require('google-protobuf/google/protobuf/field_mask_pb.js') + var ml_metadata_proto_metadata_store_pb = require('../../ml_metadata/proto/metadata_store_pb.js') const proto = {}; proto.ml_metadata = require('./metadata_store_service_pb.js'); @@ -560,6 +562,67 @@ proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.putExecution = }; +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.PutLineageSubgraphRequest, + * !proto.ml_metadata.PutLineageSubgraphResponse>} + */ +const methodDescriptor_MetadataStoreService_PutLineageSubgraph = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/PutLineageSubgraph', + grpc.web.MethodType.UNARY, + proto.ml_metadata.PutLineageSubgraphRequest, + proto.ml_metadata.PutLineageSubgraphResponse, + /** + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.PutLineageSubgraphResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.PutLineageSubgraphResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.putLineageSubgraph = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/PutLineageSubgraph', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_PutLineageSubgraph, + callback); +}; + + +/** + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.putLineageSubgraph = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/PutLineageSubgraph', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_PutLineageSubgraph); +}; + + /** * @const * @type {!grpc.web.MethodDescriptor< @@ -2207,6 +2270,372 @@ proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getEventsByArtifac }; +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetArtifactsByExternalIdsRequest, + * !proto.ml_metadata.GetArtifactsByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetArtifactsByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetArtifactsByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetArtifactsByExternalIdsRequest, + proto.ml_metadata.GetArtifactsByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetArtifactsByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetArtifactsByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getArtifactsByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetArtifactsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetArtifactsByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getArtifactsByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetArtifactsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetArtifactsByExternalIds); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetExecutionsByExternalIdsRequest, + * !proto.ml_metadata.GetExecutionsByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetExecutionsByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetExecutionsByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetExecutionsByExternalIdsRequest, + proto.ml_metadata.GetExecutionsByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetExecutionsByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetExecutionsByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getExecutionsByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetExecutionsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetExecutionsByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getExecutionsByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetExecutionsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetExecutionsByExternalIds); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetContextsByExternalIdsRequest, + * !proto.ml_metadata.GetContextsByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetContextsByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetContextsByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetContextsByExternalIdsRequest, + proto.ml_metadata.GetContextsByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetContextsByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetContextsByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getContextsByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetContextsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetContextsByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getContextsByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetContextsByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetContextsByExternalIds); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetArtifactTypesByExternalIdsRequest, + * !proto.ml_metadata.GetArtifactTypesByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetArtifactTypesByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetArtifactTypesByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetArtifactTypesByExternalIdsRequest, + proto.ml_metadata.GetArtifactTypesByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetArtifactTypesByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getArtifactTypesByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetArtifactTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetArtifactTypesByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getArtifactTypesByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetArtifactTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetArtifactTypesByExternalIds); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetExecutionTypesByExternalIdsRequest, + * !proto.ml_metadata.GetExecutionTypesByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetExecutionTypesByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetExecutionTypesByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetExecutionTypesByExternalIdsRequest, + proto.ml_metadata.GetExecutionTypesByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetExecutionTypesByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getExecutionTypesByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetExecutionTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetExecutionTypesByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getExecutionTypesByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetExecutionTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetExecutionTypesByExternalIds); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetContextTypesByExternalIdsRequest, + * !proto.ml_metadata.GetContextTypesByExternalIdsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetContextTypesByExternalIds = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetContextTypesByExternalIds', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetContextTypesByExternalIdsRequest, + proto.ml_metadata.GetContextTypesByExternalIdsResponse, + /** + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetContextTypesByExternalIdsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetContextTypesByExternalIdsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getContextTypesByExternalIds = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetContextTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetContextTypesByExternalIds, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getContextTypesByExternalIds = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetContextTypesByExternalIds', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetContextTypesByExternalIds); +}; + + /** * @const * @type {!grpc.web.MethodDescriptor< @@ -2451,6 +2880,128 @@ proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getChildrenContext }; +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetParentContextsByContextsRequest, + * !proto.ml_metadata.GetParentContextsByContextsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetParentContextsByContexts = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetParentContextsByContexts', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetParentContextsByContextsRequest, + proto.ml_metadata.GetParentContextsByContextsResponse, + /** + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetParentContextsByContextsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetParentContextsByContextsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getParentContextsByContexts = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetParentContextsByContexts', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetParentContextsByContexts, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getParentContextsByContexts = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetParentContextsByContexts', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetParentContextsByContexts); +}; + + +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetChildrenContextsByContextsRequest, + * !proto.ml_metadata.GetChildrenContextsByContextsResponse>} + */ +const methodDescriptor_MetadataStoreService_GetChildrenContextsByContexts = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetChildrenContextsByContexts', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetChildrenContextsByContextsRequest, + proto.ml_metadata.GetChildrenContextsByContextsResponse, + /** + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetChildrenContextsByContextsResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetChildrenContextsByContextsResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getChildrenContextsByContexts = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetChildrenContextsByContexts', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetChildrenContextsByContexts, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getChildrenContextsByContexts = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetChildrenContextsByContexts', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetChildrenContextsByContexts); +}; + + /** * @const * @type {!grpc.web.MethodDescriptor< @@ -2634,5 +3185,66 @@ proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getLineageGraph = }; +/** + * @const + * @type {!grpc.web.MethodDescriptor< + * !proto.ml_metadata.GetLineageSubgraphRequest, + * !proto.ml_metadata.GetLineageSubgraphResponse>} + */ +const methodDescriptor_MetadataStoreService_GetLineageSubgraph = new grpc.web.MethodDescriptor( + '/ml_metadata.MetadataStoreService/GetLineageSubgraph', + grpc.web.MethodType.UNARY, + proto.ml_metadata.GetLineageSubgraphRequest, + proto.ml_metadata.GetLineageSubgraphResponse, + /** + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} request + * @return {!Uint8Array} + */ + function(request) { + return request.serializeBinary(); + }, + proto.ml_metadata.GetLineageSubgraphResponse.deserializeBinary +); + + +/** + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} request The + * request proto + * @param {?Object} metadata User defined + * call metadata + * @param {function(?grpc.web.RpcError, ?proto.ml_metadata.GetLineageSubgraphResponse)} + * callback The callback function(error, response) + * @return {!grpc.web.ClientReadableStream|undefined} + * The XHR Node Readable Stream + */ +proto.ml_metadata.MetadataStoreServiceClient.prototype.getLineageSubgraph = + function(request, metadata, callback) { + return this.client_.rpcCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetLineageSubgraph', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetLineageSubgraph, + callback); +}; + + +/** + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} request The + * request proto + * @param {?Object=} metadata User defined + * call metadata + * @return {!Promise} + * Promise that resolves to the response + */ +proto.ml_metadata.MetadataStoreServicePromiseClient.prototype.getLineageSubgraph = + function(request, metadata) { + return this.client_.unaryCall(this.hostname_ + + '/ml_metadata.MetadataStoreService/GetLineageSubgraph', + request, + metadata || {}, + methodDescriptor_MetadataStoreService_GetLineageSubgraph); +}; + + module.exports = proto.ml_metadata; diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.d.ts b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.d.ts index d59fdebc00..83fb1f015c 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.d.ts +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.d.ts @@ -1,5 +1,6 @@ import * as jspb from 'google-protobuf' +import * as google_protobuf_field_mask_pb from 'google-protobuf/google/protobuf/field_mask_pb'; import * as ml_metadata_proto_metadata_store_pb from '../../ml_metadata/proto/metadata_store_pb'; @@ -119,6 +120,16 @@ export class PutArtifactsRequest extends jspb.Message { hasOptions(): boolean; clearOptions(): PutArtifactsRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutArtifactsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutArtifactsRequest; + + getUpdateMask(): google_protobuf_field_mask_pb.FieldMask | undefined; + setUpdateMask(value?: google_protobuf_field_mask_pb.FieldMask): PutArtifactsRequest; + hasUpdateMask(): boolean; + clearUpdateMask(): PutArtifactsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutArtifactsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutArtifactsRequest): PutArtifactsRequest.AsObject; @@ -131,6 +142,8 @@ export namespace PutArtifactsRequest { export type AsObject = { artifactsList: Array, options?: PutArtifactsRequest.Options.AsObject, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + updateMask?: google_protobuf_field_mask_pb.FieldMask.AsObject, } export class Options extends jspb.Message { @@ -191,6 +204,11 @@ export class PutArtifactTypeRequest extends jspb.Message { getAllFieldsMatch(): boolean; setAllFieldsMatch(value: boolean): PutArtifactTypeRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutArtifactTypeRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutArtifactTypeRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutArtifactTypeRequest.AsObject; static toObject(includeInstance: boolean, msg: PutArtifactTypeRequest): PutArtifactTypeRequest.AsObject; @@ -206,6 +224,7 @@ export namespace PutArtifactTypeRequest { canOmitFields: boolean, canDeleteFields: boolean, allFieldsMatch: boolean, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -233,6 +252,16 @@ export class PutExecutionsRequest extends jspb.Message { clearExecutionsList(): PutExecutionsRequest; addExecutions(value?: ml_metadata_proto_metadata_store_pb.Execution, index?: number): ml_metadata_proto_metadata_store_pb.Execution; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutExecutionsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutExecutionsRequest; + + getUpdateMask(): google_protobuf_field_mask_pb.FieldMask | undefined; + setUpdateMask(value?: google_protobuf_field_mask_pb.FieldMask): PutExecutionsRequest; + hasUpdateMask(): boolean; + clearUpdateMask(): PutExecutionsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutExecutionsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutExecutionsRequest): PutExecutionsRequest.AsObject; @@ -244,6 +273,8 @@ export class PutExecutionsRequest extends jspb.Message { export namespace PutExecutionsRequest { export type AsObject = { executionsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + updateMask?: google_protobuf_field_mask_pb.FieldMask.AsObject, } } @@ -285,6 +316,11 @@ export class PutExecutionTypeRequest extends jspb.Message { getAllFieldsMatch(): boolean; setAllFieldsMatch(value: boolean): PutExecutionTypeRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutExecutionTypeRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutExecutionTypeRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutExecutionTypeRequest.AsObject; static toObject(includeInstance: boolean, msg: PutExecutionTypeRequest): PutExecutionTypeRequest.AsObject; @@ -300,6 +336,7 @@ export namespace PutExecutionTypeRequest { canOmitFields: boolean, canDeleteFields: boolean, allFieldsMatch: boolean, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -327,6 +364,11 @@ export class PutEventsRequest extends jspb.Message { clearEventsList(): PutEventsRequest; addEvents(value?: ml_metadata_proto_metadata_store_pb.Event, index?: number): ml_metadata_proto_metadata_store_pb.Event; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutEventsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutEventsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutEventsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutEventsRequest): PutEventsRequest.AsObject; @@ -338,6 +380,7 @@ export class PutEventsRequest extends jspb.Message { export namespace PutEventsRequest { export type AsObject = { eventsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -376,6 +419,11 @@ export class PutExecutionRequest extends jspb.Message { hasOptions(): boolean; clearOptions(): PutExecutionRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutExecutionRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutExecutionRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutExecutionRequest.AsObject; static toObject(includeInstance: boolean, msg: PutExecutionRequest): PutExecutionRequest.AsObject; @@ -390,6 +438,7 @@ export namespace PutExecutionRequest { artifactEventPairsList: Array, contextsList: Array, options?: PutExecutionRequest.Options.AsObject, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } export class ArtifactAndEvent extends jspb.Message { @@ -423,6 +472,9 @@ export namespace PutExecutionRequest { getReuseContextIfAlreadyExist(): boolean; setReuseContextIfAlreadyExist(value: boolean): Options; + getReuseArtifactIfAlreadyExistByExternalId(): boolean; + setReuseArtifactIfAlreadyExistByExternalId(value: boolean): Options; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): Options.AsObject; static toObject(includeInstance: boolean, msg: Options): Options.AsObject; @@ -434,6 +486,7 @@ export namespace PutExecutionRequest { export namespace Options { export type AsObject = { reuseContextIfAlreadyExist: boolean, + reuseArtifactIfAlreadyExistByExternalId: boolean, } } @@ -469,6 +522,140 @@ export namespace PutExecutionResponse { } } +export class PutLineageSubgraphRequest extends jspb.Message { + getExecutionsList(): Array; + setExecutionsList(value: Array): PutLineageSubgraphRequest; + clearExecutionsList(): PutLineageSubgraphRequest; + addExecutions(value?: ml_metadata_proto_metadata_store_pb.Execution, index?: number): ml_metadata_proto_metadata_store_pb.Execution; + + getArtifactsList(): Array; + setArtifactsList(value: Array): PutLineageSubgraphRequest; + clearArtifactsList(): PutLineageSubgraphRequest; + addArtifacts(value?: ml_metadata_proto_metadata_store_pb.Artifact, index?: number): ml_metadata_proto_metadata_store_pb.Artifact; + + getContextsList(): Array; + setContextsList(value: Array): PutLineageSubgraphRequest; + clearContextsList(): PutLineageSubgraphRequest; + addContexts(value?: ml_metadata_proto_metadata_store_pb.Context, index?: number): ml_metadata_proto_metadata_store_pb.Context; + + getEventEdgesList(): Array; + setEventEdgesList(value: Array): PutLineageSubgraphRequest; + clearEventEdgesList(): PutLineageSubgraphRequest; + addEventEdges(value?: PutLineageSubgraphRequest.EventEdge, index?: number): PutLineageSubgraphRequest.EventEdge; + + getOptions(): PutLineageSubgraphRequest.Options | undefined; + setOptions(value?: PutLineageSubgraphRequest.Options): PutLineageSubgraphRequest; + hasOptions(): boolean; + clearOptions(): PutLineageSubgraphRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutLineageSubgraphRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutLineageSubgraphRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): PutLineageSubgraphRequest.AsObject; + static toObject(includeInstance: boolean, msg: PutLineageSubgraphRequest): PutLineageSubgraphRequest.AsObject; + static serializeBinaryToWriter(message: PutLineageSubgraphRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): PutLineageSubgraphRequest; + static deserializeBinaryFromReader(message: PutLineageSubgraphRequest, reader: jspb.BinaryReader): PutLineageSubgraphRequest; +} + +export namespace PutLineageSubgraphRequest { + export type AsObject = { + executionsList: Array, + artifactsList: Array, + contextsList: Array, + eventEdgesList: Array, + options?: PutLineageSubgraphRequest.Options.AsObject, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } + + export class EventEdge extends jspb.Message { + getExecutionIndex(): number; + setExecutionIndex(value: number): EventEdge; + + getArtifactIndex(): number; + setArtifactIndex(value: number): EventEdge; + + getEvent(): ml_metadata_proto_metadata_store_pb.Event | undefined; + setEvent(value?: ml_metadata_proto_metadata_store_pb.Event): EventEdge; + hasEvent(): boolean; + clearEvent(): EventEdge; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): EventEdge.AsObject; + static toObject(includeInstance: boolean, msg: EventEdge): EventEdge.AsObject; + static serializeBinaryToWriter(message: EventEdge, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): EventEdge; + static deserializeBinaryFromReader(message: EventEdge, reader: jspb.BinaryReader): EventEdge; + } + + export namespace EventEdge { + export type AsObject = { + executionIndex: number, + artifactIndex: number, + event?: ml_metadata_proto_metadata_store_pb.Event.AsObject, + } + } + + + export class Options extends jspb.Message { + getReuseContextIfAlreadyExist(): boolean; + setReuseContextIfAlreadyExist(value: boolean): Options; + + getReuseArtifactIfAlreadyExistByExternalId(): boolean; + setReuseArtifactIfAlreadyExistByExternalId(value: boolean): Options; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): Options.AsObject; + static toObject(includeInstance: boolean, msg: Options): Options.AsObject; + static serializeBinaryToWriter(message: Options, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): Options; + static deserializeBinaryFromReader(message: Options, reader: jspb.BinaryReader): Options; + } + + export namespace Options { + export type AsObject = { + reuseContextIfAlreadyExist: boolean, + reuseArtifactIfAlreadyExistByExternalId: boolean, + } + } + +} + +export class PutLineageSubgraphResponse extends jspb.Message { + getExecutionIdsList(): Array; + setExecutionIdsList(value: Array): PutLineageSubgraphResponse; + clearExecutionIdsList(): PutLineageSubgraphResponse; + addExecutionIds(value: number, index?: number): PutLineageSubgraphResponse; + + getArtifactIdsList(): Array; + setArtifactIdsList(value: Array): PutLineageSubgraphResponse; + clearArtifactIdsList(): PutLineageSubgraphResponse; + addArtifactIds(value: number, index?: number): PutLineageSubgraphResponse; + + getContextIdsList(): Array; + setContextIdsList(value: Array): PutLineageSubgraphResponse; + clearContextIdsList(): PutLineageSubgraphResponse; + addContextIds(value: number, index?: number): PutLineageSubgraphResponse; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): PutLineageSubgraphResponse.AsObject; + static toObject(includeInstance: boolean, msg: PutLineageSubgraphResponse): PutLineageSubgraphResponse.AsObject; + static serializeBinaryToWriter(message: PutLineageSubgraphResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): PutLineageSubgraphResponse; + static deserializeBinaryFromReader(message: PutLineageSubgraphResponse, reader: jspb.BinaryReader): PutLineageSubgraphResponse; +} + +export namespace PutLineageSubgraphResponse { + export type AsObject = { + executionIdsList: Array, + artifactIdsList: Array, + contextIdsList: Array, + } +} + export class PutTypesRequest extends jspb.Message { getArtifactTypesList(): Array; setArtifactTypesList(value: Array): PutTypesRequest; @@ -497,6 +684,11 @@ export class PutTypesRequest extends jspb.Message { getAllFieldsMatch(): boolean; setAllFieldsMatch(value: boolean): PutTypesRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutTypesRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutTypesRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutTypesRequest.AsObject; static toObject(includeInstance: boolean, msg: PutTypesRequest): PutTypesRequest.AsObject; @@ -514,6 +706,7 @@ export namespace PutTypesRequest { canOmitFields: boolean, canDeleteFields: boolean, allFieldsMatch: boolean, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -567,6 +760,11 @@ export class PutContextTypeRequest extends jspb.Message { getAllFieldsMatch(): boolean; setAllFieldsMatch(value: boolean): PutContextTypeRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutContextTypeRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutContextTypeRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutContextTypeRequest.AsObject; static toObject(includeInstance: boolean, msg: PutContextTypeRequest): PutContextTypeRequest.AsObject; @@ -582,6 +780,7 @@ export namespace PutContextTypeRequest { canOmitFields: boolean, canDeleteFields: boolean, allFieldsMatch: boolean, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -609,6 +808,16 @@ export class PutContextsRequest extends jspb.Message { clearContextsList(): PutContextsRequest; addContexts(value?: ml_metadata_proto_metadata_store_pb.Context, index?: number): ml_metadata_proto_metadata_store_pb.Context; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutContextsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutContextsRequest; + + getUpdateMask(): google_protobuf_field_mask_pb.FieldMask | undefined; + setUpdateMask(value?: google_protobuf_field_mask_pb.FieldMask): PutContextsRequest; + hasUpdateMask(): boolean; + clearUpdateMask(): PutContextsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutContextsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutContextsRequest): PutContextsRequest.AsObject; @@ -620,6 +829,8 @@ export class PutContextsRequest extends jspb.Message { export namespace PutContextsRequest { export type AsObject = { contextsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + updateMask?: google_protobuf_field_mask_pb.FieldMask.AsObject, } } @@ -654,6 +865,11 @@ export class PutAttributionsAndAssociationsRequest extends jspb.Message { clearAssociationsList(): PutAttributionsAndAssociationsRequest; addAssociations(value?: ml_metadata_proto_metadata_store_pb.Association, index?: number): ml_metadata_proto_metadata_store_pb.Association; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutAttributionsAndAssociationsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutAttributionsAndAssociationsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutAttributionsAndAssociationsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutAttributionsAndAssociationsRequest): PutAttributionsAndAssociationsRequest.AsObject; @@ -666,6 +882,7 @@ export namespace PutAttributionsAndAssociationsRequest { export type AsObject = { attributionsList: Array, associationsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -689,6 +906,11 @@ export class PutParentContextsRequest extends jspb.Message { clearParentContextsList(): PutParentContextsRequest; addParentContexts(value?: ml_metadata_proto_metadata_store_pb.ParentContext, index?: number): ml_metadata_proto_metadata_store_pb.ParentContext; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): PutParentContextsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): PutParentContextsRequest; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): PutParentContextsRequest.AsObject; static toObject(includeInstance: boolean, msg: PutParentContextsRequest): PutParentContextsRequest.AsObject; @@ -700,6 +922,7 @@ export class PutParentContextsRequest extends jspb.Message { export namespace PutParentContextsRequest { export type AsObject = { parentContextsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -833,6 +1056,9 @@ export class GetArtifactsByIDRequest extends jspb.Message { clearArtifactIdsList(): GetArtifactsByIDRequest; addArtifactIds(value: number, index?: number): GetArtifactsByIDRequest; + getPopulateArtifactTypes(): boolean; + setPopulateArtifactTypes(value: boolean): GetArtifactsByIDRequest; + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetArtifactsByIDRequest; hasTransactionOptions(): boolean; @@ -849,6 +1075,7 @@ export class GetArtifactsByIDRequest extends jspb.Message { export namespace GetArtifactsByIDRequest { export type AsObject = { artifactIdsList: Array, + populateArtifactTypes: boolean, transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, } } @@ -859,6 +1086,11 @@ export class GetArtifactsByIDResponse extends jspb.Message { clearArtifactsList(): GetArtifactsByIDResponse; addArtifacts(value?: ml_metadata_proto_metadata_store_pb.Artifact, index?: number): ml_metadata_proto_metadata_store_pb.Artifact; + getArtifactTypesList(): Array; + setArtifactTypesList(value: Array): GetArtifactsByIDResponse; + clearArtifactTypesList(): GetArtifactsByIDResponse; + addArtifactTypes(value?: ml_metadata_proto_metadata_store_pb.ArtifactType, index?: number): ml_metadata_proto_metadata_store_pb.ArtifactType; + serializeBinary(): Uint8Array; toObject(includeInstance?: boolean): GetArtifactsByIDResponse.AsObject; static toObject(includeInstance: boolean, msg: GetArtifactsByIDResponse): GetArtifactsByIDResponse.AsObject; @@ -870,6 +1102,7 @@ export class GetArtifactsByIDResponse extends jspb.Message { export namespace GetArtifactsByIDResponse { export type AsObject = { artifactsList: Array, + artifactTypesList: Array, } } @@ -1187,6 +1420,282 @@ export namespace GetContextTypesResponse { } } +export class GetArtifactsByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetArtifactsByExternalIdsRequest; + clearExternalIdsList(): GetArtifactsByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetArtifactsByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetArtifactsByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetArtifactsByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetArtifactsByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetArtifactsByExternalIdsRequest): GetArtifactsByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetArtifactsByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetArtifactsByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetArtifactsByExternalIdsRequest, reader: jspb.BinaryReader): GetArtifactsByExternalIdsRequest; +} + +export namespace GetArtifactsByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetArtifactsByExternalIdsResponse extends jspb.Message { + getArtifactsList(): Array; + setArtifactsList(value: Array): GetArtifactsByExternalIdsResponse; + clearArtifactsList(): GetArtifactsByExternalIdsResponse; + addArtifacts(value?: ml_metadata_proto_metadata_store_pb.Artifact, index?: number): ml_metadata_proto_metadata_store_pb.Artifact; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetArtifactsByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetArtifactsByExternalIdsResponse): GetArtifactsByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetArtifactsByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetArtifactsByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetArtifactsByExternalIdsResponse, reader: jspb.BinaryReader): GetArtifactsByExternalIdsResponse; +} + +export namespace GetArtifactsByExternalIdsResponse { + export type AsObject = { + artifactsList: Array, + } +} + +export class GetExecutionsByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetExecutionsByExternalIdsRequest; + clearExternalIdsList(): GetExecutionsByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetExecutionsByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetExecutionsByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetExecutionsByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetExecutionsByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetExecutionsByExternalIdsRequest): GetExecutionsByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetExecutionsByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetExecutionsByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetExecutionsByExternalIdsRequest, reader: jspb.BinaryReader): GetExecutionsByExternalIdsRequest; +} + +export namespace GetExecutionsByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetExecutionsByExternalIdsResponse extends jspb.Message { + getExecutionsList(): Array; + setExecutionsList(value: Array): GetExecutionsByExternalIdsResponse; + clearExecutionsList(): GetExecutionsByExternalIdsResponse; + addExecutions(value?: ml_metadata_proto_metadata_store_pb.Execution, index?: number): ml_metadata_proto_metadata_store_pb.Execution; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetExecutionsByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetExecutionsByExternalIdsResponse): GetExecutionsByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetExecutionsByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetExecutionsByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetExecutionsByExternalIdsResponse, reader: jspb.BinaryReader): GetExecutionsByExternalIdsResponse; +} + +export namespace GetExecutionsByExternalIdsResponse { + export type AsObject = { + executionsList: Array, + } +} + +export class GetContextsByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetContextsByExternalIdsRequest; + clearExternalIdsList(): GetContextsByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetContextsByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetContextsByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetContextsByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetContextsByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetContextsByExternalIdsRequest): GetContextsByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetContextsByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetContextsByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetContextsByExternalIdsRequest, reader: jspb.BinaryReader): GetContextsByExternalIdsRequest; +} + +export namespace GetContextsByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetContextsByExternalIdsResponse extends jspb.Message { + getContextsList(): Array; + setContextsList(value: Array): GetContextsByExternalIdsResponse; + clearContextsList(): GetContextsByExternalIdsResponse; + addContexts(value?: ml_metadata_proto_metadata_store_pb.Context, index?: number): ml_metadata_proto_metadata_store_pb.Context; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetContextsByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetContextsByExternalIdsResponse): GetContextsByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetContextsByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetContextsByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetContextsByExternalIdsResponse, reader: jspb.BinaryReader): GetContextsByExternalIdsResponse; +} + +export namespace GetContextsByExternalIdsResponse { + export type AsObject = { + contextsList: Array, + } +} + +export class GetArtifactTypesByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetArtifactTypesByExternalIdsRequest; + clearExternalIdsList(): GetArtifactTypesByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetArtifactTypesByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetArtifactTypesByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetArtifactTypesByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetArtifactTypesByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetArtifactTypesByExternalIdsRequest): GetArtifactTypesByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetArtifactTypesByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetArtifactTypesByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetArtifactTypesByExternalIdsRequest, reader: jspb.BinaryReader): GetArtifactTypesByExternalIdsRequest; +} + +export namespace GetArtifactTypesByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetArtifactTypesByExternalIdsResponse extends jspb.Message { + getArtifactTypesList(): Array; + setArtifactTypesList(value: Array): GetArtifactTypesByExternalIdsResponse; + clearArtifactTypesList(): GetArtifactTypesByExternalIdsResponse; + addArtifactTypes(value?: ml_metadata_proto_metadata_store_pb.ArtifactType, index?: number): ml_metadata_proto_metadata_store_pb.ArtifactType; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetArtifactTypesByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetArtifactTypesByExternalIdsResponse): GetArtifactTypesByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetArtifactTypesByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetArtifactTypesByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetArtifactTypesByExternalIdsResponse, reader: jspb.BinaryReader): GetArtifactTypesByExternalIdsResponse; +} + +export namespace GetArtifactTypesByExternalIdsResponse { + export type AsObject = { + artifactTypesList: Array, + } +} + +export class GetExecutionTypesByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetExecutionTypesByExternalIdsRequest; + clearExternalIdsList(): GetExecutionTypesByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetExecutionTypesByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetExecutionTypesByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetExecutionTypesByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetExecutionTypesByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetExecutionTypesByExternalIdsRequest): GetExecutionTypesByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetExecutionTypesByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetExecutionTypesByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetExecutionTypesByExternalIdsRequest, reader: jspb.BinaryReader): GetExecutionTypesByExternalIdsRequest; +} + +export namespace GetExecutionTypesByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetExecutionTypesByExternalIdsResponse extends jspb.Message { + getExecutionTypesList(): Array; + setExecutionTypesList(value: Array): GetExecutionTypesByExternalIdsResponse; + clearExecutionTypesList(): GetExecutionTypesByExternalIdsResponse; + addExecutionTypes(value?: ml_metadata_proto_metadata_store_pb.ExecutionType, index?: number): ml_metadata_proto_metadata_store_pb.ExecutionType; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetExecutionTypesByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetExecutionTypesByExternalIdsResponse): GetExecutionTypesByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetExecutionTypesByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetExecutionTypesByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetExecutionTypesByExternalIdsResponse, reader: jspb.BinaryReader): GetExecutionTypesByExternalIdsResponse; +} + +export namespace GetExecutionTypesByExternalIdsResponse { + export type AsObject = { + executionTypesList: Array, + } +} + +export class GetContextTypesByExternalIdsRequest extends jspb.Message { + getExternalIdsList(): Array; + setExternalIdsList(value: Array): GetContextTypesByExternalIdsRequest; + clearExternalIdsList(): GetContextTypesByExternalIdsRequest; + addExternalIds(value: string, index?: number): GetContextTypesByExternalIdsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetContextTypesByExternalIdsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetContextTypesByExternalIdsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetContextTypesByExternalIdsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetContextTypesByExternalIdsRequest): GetContextTypesByExternalIdsRequest.AsObject; + static serializeBinaryToWriter(message: GetContextTypesByExternalIdsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetContextTypesByExternalIdsRequest; + static deserializeBinaryFromReader(message: GetContextTypesByExternalIdsRequest, reader: jspb.BinaryReader): GetContextTypesByExternalIdsRequest; +} + +export namespace GetContextTypesByExternalIdsRequest { + export type AsObject = { + externalIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetContextTypesByExternalIdsResponse extends jspb.Message { + getContextTypesList(): Array; + setContextTypesList(value: Array): GetContextTypesByExternalIdsResponse; + clearContextTypesList(): GetContextTypesByExternalIdsResponse; + addContextTypes(value?: ml_metadata_proto_metadata_store_pb.ContextType, index?: number): ml_metadata_proto_metadata_store_pb.ContextType; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetContextTypesByExternalIdsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetContextTypesByExternalIdsResponse): GetContextTypesByExternalIdsResponse.AsObject; + static serializeBinaryToWriter(message: GetContextTypesByExternalIdsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetContextTypesByExternalIdsResponse; + static deserializeBinaryFromReader(message: GetContextTypesByExternalIdsResponse, reader: jspb.BinaryReader): GetContextTypesByExternalIdsResponse; +} + +export namespace GetContextTypesByExternalIdsResponse { + export type AsObject = { + contextTypesList: Array, + } +} + export class GetExecutionsByTypeRequest extends jspb.Message { getTypeName(): string; setTypeName(value: string): GetExecutionsByTypeRequest; @@ -2051,6 +2560,136 @@ export namespace GetChildrenContextsByContextResponse { } } +export class GetParentContextsByContextsRequest extends jspb.Message { + getContextIdsList(): Array; + setContextIdsList(value: Array): GetParentContextsByContextsRequest; + clearContextIdsList(): GetParentContextsByContextsRequest; + addContextIds(value: number, index?: number): GetParentContextsByContextsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetParentContextsByContextsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetParentContextsByContextsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetParentContextsByContextsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetParentContextsByContextsRequest): GetParentContextsByContextsRequest.AsObject; + static serializeBinaryToWriter(message: GetParentContextsByContextsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetParentContextsByContextsRequest; + static deserializeBinaryFromReader(message: GetParentContextsByContextsRequest, reader: jspb.BinaryReader): GetParentContextsByContextsRequest; +} + +export namespace GetParentContextsByContextsRequest { + export type AsObject = { + contextIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetParentContextsByContextsResponse extends jspb.Message { + getContextsMap(): jspb.Map; + clearContextsMap(): GetParentContextsByContextsResponse; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetParentContextsByContextsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetParentContextsByContextsResponse): GetParentContextsByContextsResponse.AsObject; + static serializeBinaryToWriter(message: GetParentContextsByContextsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetParentContextsByContextsResponse; + static deserializeBinaryFromReader(message: GetParentContextsByContextsResponse, reader: jspb.BinaryReader): GetParentContextsByContextsResponse; +} + +export namespace GetParentContextsByContextsResponse { + export type AsObject = { + contextsMap: Array<[number, GetParentContextsByContextsResponse.ParentContextsPerChild.AsObject]>, + } + + export class ParentContextsPerChild extends jspb.Message { + getParentContextsList(): Array; + setParentContextsList(value: Array): ParentContextsPerChild; + clearParentContextsList(): ParentContextsPerChild; + addParentContexts(value?: ml_metadata_proto_metadata_store_pb.Context, index?: number): ml_metadata_proto_metadata_store_pb.Context; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): ParentContextsPerChild.AsObject; + static toObject(includeInstance: boolean, msg: ParentContextsPerChild): ParentContextsPerChild.AsObject; + static serializeBinaryToWriter(message: ParentContextsPerChild, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): ParentContextsPerChild; + static deserializeBinaryFromReader(message: ParentContextsPerChild, reader: jspb.BinaryReader): ParentContextsPerChild; + } + + export namespace ParentContextsPerChild { + export type AsObject = { + parentContextsList: Array, + } + } + +} + +export class GetChildrenContextsByContextsRequest extends jspb.Message { + getContextIdsList(): Array; + setContextIdsList(value: Array): GetChildrenContextsByContextsRequest; + clearContextIdsList(): GetChildrenContextsByContextsRequest; + addContextIds(value: number, index?: number): GetChildrenContextsByContextsRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetChildrenContextsByContextsRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetChildrenContextsByContextsRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetChildrenContextsByContextsRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetChildrenContextsByContextsRequest): GetChildrenContextsByContextsRequest.AsObject; + static serializeBinaryToWriter(message: GetChildrenContextsByContextsRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetChildrenContextsByContextsRequest; + static deserializeBinaryFromReader(message: GetChildrenContextsByContextsRequest, reader: jspb.BinaryReader): GetChildrenContextsByContextsRequest; +} + +export namespace GetChildrenContextsByContextsRequest { + export type AsObject = { + contextIdsList: Array, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetChildrenContextsByContextsResponse extends jspb.Message { + getContextsMap(): jspb.Map; + clearContextsMap(): GetChildrenContextsByContextsResponse; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetChildrenContextsByContextsResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetChildrenContextsByContextsResponse): GetChildrenContextsByContextsResponse.AsObject; + static serializeBinaryToWriter(message: GetChildrenContextsByContextsResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetChildrenContextsByContextsResponse; + static deserializeBinaryFromReader(message: GetChildrenContextsByContextsResponse, reader: jspb.BinaryReader): GetChildrenContextsByContextsResponse; +} + +export namespace GetChildrenContextsByContextsResponse { + export type AsObject = { + contextsMap: Array<[number, GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.AsObject]>, + } + + export class ChildrenContextsPerParent extends jspb.Message { + getChildrenContextsList(): Array; + setChildrenContextsList(value: Array): ChildrenContextsPerParent; + clearChildrenContextsList(): ChildrenContextsPerParent; + addChildrenContexts(value?: ml_metadata_proto_metadata_store_pb.Context, index?: number): ml_metadata_proto_metadata_store_pb.Context; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): ChildrenContextsPerParent.AsObject; + static toObject(includeInstance: boolean, msg: ChildrenContextsPerParent): ChildrenContextsPerParent.AsObject; + static serializeBinaryToWriter(message: ChildrenContextsPerParent, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): ChildrenContextsPerParent; + static deserializeBinaryFromReader(message: ChildrenContextsPerParent, reader: jspb.BinaryReader): ChildrenContextsPerParent; + } + + export namespace ChildrenContextsPerParent { + export type AsObject = { + childrenContextsList: Array, + } + } + +} + export class GetArtifactsByContextRequest extends jspb.Message { getContextId(): number; setContextId(value: number): GetArtifactsByContextRequest; @@ -2211,3 +2850,55 @@ export namespace GetLineageGraphResponse { } } +export class GetLineageSubgraphRequest extends jspb.Message { + getLineageSubgraphQueryOptions(): ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions | undefined; + setLineageSubgraphQueryOptions(value?: ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions): GetLineageSubgraphRequest; + hasLineageSubgraphQueryOptions(): boolean; + clearLineageSubgraphQueryOptions(): GetLineageSubgraphRequest; + + getReadMask(): google_protobuf_field_mask_pb.FieldMask | undefined; + setReadMask(value?: google_protobuf_field_mask_pb.FieldMask): GetLineageSubgraphRequest; + hasReadMask(): boolean; + clearReadMask(): GetLineageSubgraphRequest; + + getTransactionOptions(): ml_metadata_proto_metadata_store_pb.TransactionOptions | undefined; + setTransactionOptions(value?: ml_metadata_proto_metadata_store_pb.TransactionOptions): GetLineageSubgraphRequest; + hasTransactionOptions(): boolean; + clearTransactionOptions(): GetLineageSubgraphRequest; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetLineageSubgraphRequest.AsObject; + static toObject(includeInstance: boolean, msg: GetLineageSubgraphRequest): GetLineageSubgraphRequest.AsObject; + static serializeBinaryToWriter(message: GetLineageSubgraphRequest, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetLineageSubgraphRequest; + static deserializeBinaryFromReader(message: GetLineageSubgraphRequest, reader: jspb.BinaryReader): GetLineageSubgraphRequest; +} + +export namespace GetLineageSubgraphRequest { + export type AsObject = { + lineageSubgraphQueryOptions?: ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions.AsObject, + readMask?: google_protobuf_field_mask_pb.FieldMask.AsObject, + transactionOptions?: ml_metadata_proto_metadata_store_pb.TransactionOptions.AsObject, + } +} + +export class GetLineageSubgraphResponse extends jspb.Message { + getLineageSubgraph(): ml_metadata_proto_metadata_store_pb.LineageGraph | undefined; + setLineageSubgraph(value?: ml_metadata_proto_metadata_store_pb.LineageGraph): GetLineageSubgraphResponse; + hasLineageSubgraph(): boolean; + clearLineageSubgraph(): GetLineageSubgraphResponse; + + serializeBinary(): Uint8Array; + toObject(includeInstance?: boolean): GetLineageSubgraphResponse.AsObject; + static toObject(includeInstance: boolean, msg: GetLineageSubgraphResponse): GetLineageSubgraphResponse.AsObject; + static serializeBinaryToWriter(message: GetLineageSubgraphResponse, writer: jspb.BinaryWriter): void; + static deserializeBinary(bytes: Uint8Array): GetLineageSubgraphResponse; + static deserializeBinaryFromReader(message: GetLineageSubgraphResponse, reader: jspb.BinaryReader): GetLineageSubgraphResponse; +} + +export namespace GetLineageSubgraphResponse { + export type AsObject = { + lineageSubgraph?: ml_metadata_proto_metadata_store_pb.LineageGraph.AsObject, + } +} + diff --git a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.js b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.js index c4fdbcaec2..79ddeebab6 100644 --- a/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.js +++ b/frontend/src/third_party/mlmd/generated/ml_metadata/proto/metadata_store_service_pb.js @@ -15,6 +15,8 @@ var jspb = require('google-protobuf'); var goog = jspb; var global = Function('return this')(); +var google_protobuf_field_mask_pb = require('google-protobuf/google/protobuf/field_mask_pb.js'); +goog.object.extend(proto, google_protobuf_field_mask_pb); var ml_metadata_proto_metadata_store_pb = require('../../ml_metadata/proto/metadata_store_pb.js'); goog.object.extend(proto, ml_metadata_proto_metadata_store_pb); goog.exportSymbol('proto.ml_metadata.ArtifactAndType', null, global); @@ -26,12 +28,16 @@ goog.exportSymbol('proto.ml_metadata.GetArtifactByTypeAndNameRequest', null, glo goog.exportSymbol('proto.ml_metadata.GetArtifactByTypeAndNameResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypeRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypeResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetArtifactTypesByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetArtifactTypesByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypesByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypesByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypesRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactTypesResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsByContextRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsByContextResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetArtifactsByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetArtifactsByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsByTypeRequest', null, global); @@ -42,10 +48,15 @@ goog.exportSymbol('proto.ml_metadata.GetArtifactsRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetArtifactsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetChildrenContextsByContextRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetChildrenContextsByContextResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetChildrenContextsByContextsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetChildrenContextsByContextsResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent', null, global); goog.exportSymbol('proto.ml_metadata.GetContextByTypeAndNameRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetContextByTypeAndNameResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextTypeRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetContextTypeResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetContextTypesByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetContextTypesByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextTypesByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetContextTypesByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextTypesRequest', null, global); @@ -54,6 +65,8 @@ goog.exportSymbol('proto.ml_metadata.GetContextsByArtifactRequest', null, global goog.exportSymbol('proto.ml_metadata.GetContextsByArtifactResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextsByExecutionRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetContextsByExecutionResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetContextsByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetContextsByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextsByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetContextsByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetContextsByTypeRequest', null, global); @@ -68,12 +81,16 @@ goog.exportSymbol('proto.ml_metadata.GetExecutionByTypeAndNameRequest', null, gl goog.exportSymbol('proto.ml_metadata.GetExecutionByTypeAndNameResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypeRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypeResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetExecutionTypesByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetExecutionTypesByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypesByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypesByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypesRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionTypesResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsByContextRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsByContextResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetExecutionsByExternalIdsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetExecutionsByExternalIdsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsByIDRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsByIDResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsByTypeRequest', null, global); @@ -82,8 +99,13 @@ goog.exportSymbol('proto.ml_metadata.GetExecutionsRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetExecutionsResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetLineageGraphRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetLineageGraphResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetLineageSubgraphRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetLineageSubgraphResponse', null, global); goog.exportSymbol('proto.ml_metadata.GetParentContextsByContextRequest', null, global); goog.exportSymbol('proto.ml_metadata.GetParentContextsByContextResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetParentContextsByContextsRequest', null, global); +goog.exportSymbol('proto.ml_metadata.GetParentContextsByContextsResponse', null, global); +goog.exportSymbol('proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild', null, global); goog.exportSymbol('proto.ml_metadata.PutArtifactTypeRequest', null, global); goog.exportSymbol('proto.ml_metadata.PutArtifactTypeResponse', null, global); goog.exportSymbol('proto.ml_metadata.PutArtifactsRequest', null, global); @@ -105,6 +127,10 @@ goog.exportSymbol('proto.ml_metadata.PutExecutionTypeRequest', null, global); goog.exportSymbol('proto.ml_metadata.PutExecutionTypeResponse', null, global); goog.exportSymbol('proto.ml_metadata.PutExecutionsRequest', null, global); goog.exportSymbol('proto.ml_metadata.PutExecutionsResponse', null, global); +goog.exportSymbol('proto.ml_metadata.PutLineageSubgraphRequest', null, global); +goog.exportSymbol('proto.ml_metadata.PutLineageSubgraphRequest.EventEdge', null, global); +goog.exportSymbol('proto.ml_metadata.PutLineageSubgraphRequest.Options', null, global); +goog.exportSymbol('proto.ml_metadata.PutLineageSubgraphResponse', null, global); goog.exportSymbol('proto.ml_metadata.PutParentContextsRequest', null, global); goog.exportSymbol('proto.ml_metadata.PutParentContextsResponse', null, global); goog.exportSymbol('proto.ml_metadata.PutTypesRequest', null, global); @@ -508,6 +534,90 @@ if (goog.DEBUG && !COMPILED) { */ proto.ml_metadata.PutExecutionResponse.displayName = 'proto.ml_metadata.PutExecutionResponse'; } +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PutLineageSubgraphRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.PutLineageSubgraphRequest.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.PutLineageSubgraphRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PutLineageSubgraphRequest.displayName = 'proto.ml_metadata.PutLineageSubgraphRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.PutLineageSubgraphRequest.EventEdge, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.displayName = 'proto.ml_metadata.PutLineageSubgraphRequest.EventEdge'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.PutLineageSubgraphRequest.Options, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PutLineageSubgraphRequest.Options.displayName = 'proto.ml_metadata.PutLineageSubgraphRequest.Options'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.PutLineageSubgraphResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.PutLineageSubgraphResponse.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.PutLineageSubgraphResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.PutLineageSubgraphResponse.displayName = 'proto.ml_metadata.PutLineageSubgraphResponse'; +} /** * Generated by JsPbCodeGenerator. * @param {Array=} opt_data Optional initial data array, typically from a @@ -1148,16 +1258,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByTypeRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetArtifactsByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactsByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByTypeRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactsByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByTypeRequest.displayName = 'proto.ml_metadata.GetExecutionsByTypeRequest'; + proto.ml_metadata.GetArtifactsByExternalIdsRequest.displayName = 'proto.ml_metadata.GetArtifactsByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1169,16 +1279,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByTypeResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByTypeResponse.repeatedFields_, null); +proto.ml_metadata.GetArtifactsByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactsByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByTypeResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactsByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByTypeResponse.displayName = 'proto.ml_metadata.GetExecutionsByTypeResponse'; + proto.ml_metadata.GetArtifactsByExternalIdsResponse.displayName = 'proto.ml_metadata.GetArtifactsByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1190,16 +1300,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetExecutionsByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionByTypeAndNameRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionByTypeAndNameRequest.displayName = 'proto.ml_metadata.GetExecutionByTypeAndNameRequest'; + proto.ml_metadata.GetExecutionsByExternalIdsRequest.displayName = 'proto.ml_metadata.GetExecutionsByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1211,16 +1321,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetExecutionsByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionByTypeAndNameResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionByTypeAndNameResponse.displayName = 'proto.ml_metadata.GetExecutionByTypeAndNameResponse'; + proto.ml_metadata.GetExecutionsByExternalIdsResponse.displayName = 'proto.ml_metadata.GetExecutionsByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1232,16 +1342,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByIDRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByIDRequest.repeatedFields_, null); +proto.ml_metadata.GetContextsByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByIDRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByIDRequest.displayName = 'proto.ml_metadata.GetExecutionsByIDRequest'; + proto.ml_metadata.GetContextsByExternalIdsRequest.displayName = 'proto.ml_metadata.GetContextsByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1253,16 +1363,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByIDResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByIDResponse.repeatedFields_, null); +proto.ml_metadata.GetContextsByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByIDResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByIDResponse.displayName = 'proto.ml_metadata.GetExecutionsByIDResponse'; + proto.ml_metadata.GetContextsByExternalIdsResponse.displayName = 'proto.ml_metadata.GetContextsByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1274,16 +1384,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionTypeRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionTypeRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactTypesByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionTypeRequest.displayName = 'proto.ml_metadata.GetExecutionTypeRequest'; + proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.displayName = 'proto.ml_metadata.GetArtifactTypesByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1295,16 +1405,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionTypeResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionTypeResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactTypesByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionTypeResponse.displayName = 'proto.ml_metadata.GetExecutionTypeResponse'; + proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.displayName = 'proto.ml_metadata.GetArtifactTypesByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1316,16 +1426,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetEventsByExecutionIDsRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByExecutionIDsRequest.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetEventsByExecutionIDsRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypesByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetEventsByExecutionIDsRequest.displayName = 'proto.ml_metadata.GetEventsByExecutionIDsRequest'; + proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.displayName = 'proto.ml_metadata.GetExecutionTypesByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1337,16 +1447,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetEventsByExecutionIDsResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByExecutionIDsResponse.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetEventsByExecutionIDsResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypesByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetEventsByExecutionIDsResponse.displayName = 'proto.ml_metadata.GetEventsByExecutionIDsResponse'; + proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.displayName = 'proto.ml_metadata.GetExecutionTypesByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1358,16 +1468,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetEventsByArtifactIDsRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByArtifactIDsRequest.repeatedFields_, null); +proto.ml_metadata.GetContextTypesByExternalIdsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByExternalIdsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetEventsByArtifactIDsRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypesByExternalIdsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetEventsByArtifactIDsRequest.displayName = 'proto.ml_metadata.GetEventsByArtifactIDsRequest'; + proto.ml_metadata.GetContextTypesByExternalIdsRequest.displayName = 'proto.ml_metadata.GetContextTypesByExternalIdsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1379,16 +1489,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetEventsByArtifactIDsResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByArtifactIDsResponse.repeatedFields_, null); +proto.ml_metadata.GetContextTypesByExternalIdsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByExternalIdsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetEventsByArtifactIDsResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypesByExternalIdsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetEventsByArtifactIDsResponse.displayName = 'proto.ml_metadata.GetEventsByArtifactIDsResponse'; + proto.ml_metadata.GetContextTypesByExternalIdsResponse.displayName = 'proto.ml_metadata.GetContextTypesByExternalIdsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1400,16 +1510,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetArtifactTypesByIDRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByIDRequest.repeatedFields_, null); +proto.ml_metadata.GetExecutionsByTypeRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetArtifactTypesByIDRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByTypeRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetArtifactTypesByIDRequest.displayName = 'proto.ml_metadata.GetArtifactTypesByIDRequest'; + proto.ml_metadata.GetExecutionsByTypeRequest.displayName = 'proto.ml_metadata.GetExecutionsByTypeRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1421,16 +1531,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetArtifactTypesByIDResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByIDResponse.repeatedFields_, null); +proto.ml_metadata.GetExecutionsByTypeResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByTypeResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetArtifactTypesByIDResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByTypeResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetArtifactTypesByIDResponse.displayName = 'proto.ml_metadata.GetArtifactTypesByIDResponse'; + proto.ml_metadata.GetExecutionsByTypeResponse.displayName = 'proto.ml_metadata.GetExecutionsByTypeResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1442,16 +1552,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionTypesByIDRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByIDRequest.repeatedFields_, null); +proto.ml_metadata.GetExecutionByTypeAndNameRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetExecutionTypesByIDRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionByTypeAndNameRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionTypesByIDRequest.displayName = 'proto.ml_metadata.GetExecutionTypesByIDRequest'; + proto.ml_metadata.GetExecutionByTypeAndNameRequest.displayName = 'proto.ml_metadata.GetExecutionByTypeAndNameRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1463,16 +1573,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionTypesByIDResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByIDResponse.repeatedFields_, null); +proto.ml_metadata.GetExecutionByTypeAndNameResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetExecutionTypesByIDResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionByTypeAndNameResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionTypesByIDResponse.displayName = 'proto.ml_metadata.GetExecutionTypesByIDResponse'; + proto.ml_metadata.GetExecutionByTypeAndNameResponse.displayName = 'proto.ml_metadata.GetExecutionByTypeAndNameResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1484,16 +1594,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextTypeRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetExecutionsByIDRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByIDRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextTypeRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByIDRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextTypeRequest.displayName = 'proto.ml_metadata.GetContextTypeRequest'; + proto.ml_metadata.GetExecutionsByIDRequest.displayName = 'proto.ml_metadata.GetExecutionsByIDRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1505,16 +1615,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextTypeResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetExecutionsByIDResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByIDResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextTypeResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionsByIDResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextTypeResponse.displayName = 'proto.ml_metadata.GetContextTypeResponse'; + proto.ml_metadata.GetExecutionsByIDResponse.displayName = 'proto.ml_metadata.GetExecutionsByIDResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1526,16 +1636,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextTypesByIDRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByIDRequest.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypeRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetContextTypesByIDRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypeRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextTypesByIDRequest.displayName = 'proto.ml_metadata.GetContextTypesByIDRequest'; + proto.ml_metadata.GetExecutionTypeRequest.displayName = 'proto.ml_metadata.GetExecutionTypeRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1547,16 +1657,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextTypesByIDResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByIDResponse.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypeResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetContextTypesByIDResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypeResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextTypesByIDResponse.displayName = 'proto.ml_metadata.GetContextTypesByIDResponse'; + proto.ml_metadata.GetExecutionTypeResponse.displayName = 'proto.ml_metadata.GetExecutionTypeResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1568,16 +1678,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetEventsByExecutionIDsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByExecutionIDsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetEventsByExecutionIDsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsRequest.displayName = 'proto.ml_metadata.GetContextsRequest'; + proto.ml_metadata.GetEventsByExecutionIDsRequest.displayName = 'proto.ml_metadata.GetEventsByExecutionIDsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1589,16 +1699,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsResponse.repeatedFields_, null); +proto.ml_metadata.GetEventsByExecutionIDsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByExecutionIDsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetEventsByExecutionIDsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsResponse.displayName = 'proto.ml_metadata.GetContextsResponse'; + proto.ml_metadata.GetEventsByExecutionIDsResponse.displayName = 'proto.ml_metadata.GetEventsByExecutionIDsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1610,16 +1720,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByTypeRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetEventsByArtifactIDsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByArtifactIDsRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByTypeRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetEventsByArtifactIDsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByTypeRequest.displayName = 'proto.ml_metadata.GetContextsByTypeRequest'; + proto.ml_metadata.GetEventsByArtifactIDsRequest.displayName = 'proto.ml_metadata.GetEventsByArtifactIDsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1631,16 +1741,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByTypeResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByTypeResponse.repeatedFields_, null); +proto.ml_metadata.GetEventsByArtifactIDsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetEventsByArtifactIDsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByTypeResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetEventsByArtifactIDsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByTypeResponse.displayName = 'proto.ml_metadata.GetContextsByTypeResponse'; + proto.ml_metadata.GetEventsByArtifactIDsResponse.displayName = 'proto.ml_metadata.GetEventsByArtifactIDsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1652,16 +1762,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextByTypeAndNameRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetArtifactTypesByIDRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByIDRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextByTypeAndNameRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactTypesByIDRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextByTypeAndNameRequest.displayName = 'proto.ml_metadata.GetContextByTypeAndNameRequest'; + proto.ml_metadata.GetArtifactTypesByIDRequest.displayName = 'proto.ml_metadata.GetArtifactTypesByIDRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1673,16 +1783,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextByTypeAndNameResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetArtifactTypesByIDResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactTypesByIDResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextByTypeAndNameResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetArtifactTypesByIDResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextByTypeAndNameResponse.displayName = 'proto.ml_metadata.GetContextByTypeAndNameResponse'; + proto.ml_metadata.GetArtifactTypesByIDResponse.displayName = 'proto.ml_metadata.GetArtifactTypesByIDResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1694,16 +1804,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByIDRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByIDRequest.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypesByIDRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByIDRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByIDRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypesByIDRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByIDRequest.displayName = 'proto.ml_metadata.GetContextsByIDRequest'; + proto.ml_metadata.GetExecutionTypesByIDRequest.displayName = 'proto.ml_metadata.GetExecutionTypesByIDRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1715,16 +1825,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByIDResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByIDResponse.repeatedFields_, null); +proto.ml_metadata.GetExecutionTypesByIDResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionTypesByIDResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByIDResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetExecutionTypesByIDResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByIDResponse.displayName = 'proto.ml_metadata.GetContextsByIDResponse'; + proto.ml_metadata.GetExecutionTypesByIDResponse.displayName = 'proto.ml_metadata.GetExecutionTypesByIDResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1736,16 +1846,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByArtifactRequest = function(opt_data) { +proto.ml_metadata.GetContextTypeRequest = function(opt_data) { jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetContextsByArtifactRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypeRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByArtifactRequest.displayName = 'proto.ml_metadata.GetContextsByArtifactRequest'; + proto.ml_metadata.GetContextTypeRequest.displayName = 'proto.ml_metadata.GetContextTypeRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1757,16 +1867,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByArtifactResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByArtifactResponse.repeatedFields_, null); +proto.ml_metadata.GetContextTypeResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetContextsByArtifactResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypeResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByArtifactResponse.displayName = 'proto.ml_metadata.GetContextsByArtifactResponse'; + proto.ml_metadata.GetContextTypeResponse.displayName = 'proto.ml_metadata.GetContextTypeResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1778,16 +1888,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByExecutionRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetContextTypesByIDRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByIDRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByExecutionRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypesByIDRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByExecutionRequest.displayName = 'proto.ml_metadata.GetContextsByExecutionRequest'; + proto.ml_metadata.GetContextTypesByIDRequest.displayName = 'proto.ml_metadata.GetContextTypesByIDRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1799,16 +1909,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetContextsByExecutionResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByExecutionResponse.repeatedFields_, null); +proto.ml_metadata.GetContextTypesByIDResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextTypesByIDResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetContextsByExecutionResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextTypesByIDResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetContextsByExecutionResponse.displayName = 'proto.ml_metadata.GetContextsByExecutionResponse'; + proto.ml_metadata.GetContextTypesByIDResponse.displayName = 'proto.ml_metadata.GetContextTypesByIDResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1820,16 +1930,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetParentContextsByContextRequest = function(opt_data) { +proto.ml_metadata.GetContextsRequest = function(opt_data) { jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetParentContextsByContextRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetParentContextsByContextRequest.displayName = 'proto.ml_metadata.GetParentContextsByContextRequest'; + proto.ml_metadata.GetContextsRequest.displayName = 'proto.ml_metadata.GetContextsRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1841,16 +1951,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetParentContextsByContextResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetParentContextsByContextResponse.repeatedFields_, null); +proto.ml_metadata.GetContextsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetParentContextsByContextResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetParentContextsByContextResponse.displayName = 'proto.ml_metadata.GetParentContextsByContextResponse'; + proto.ml_metadata.GetContextsResponse.displayName = 'proto.ml_metadata.GetContextsResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1862,16 +1972,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetChildrenContextsByContextRequest = function(opt_data) { +proto.ml_metadata.GetContextsByTypeRequest = function(opt_data) { jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetChildrenContextsByContextRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByTypeRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetChildrenContextsByContextRequest.displayName = 'proto.ml_metadata.GetChildrenContextsByContextRequest'; + proto.ml_metadata.GetContextsByTypeRequest.displayName = 'proto.ml_metadata.GetContextsByTypeRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1883,16 +1993,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetChildrenContextsByContextResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetChildrenContextsByContextResponse.repeatedFields_, null); +proto.ml_metadata.GetContextsByTypeResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByTypeResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetChildrenContextsByContextResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByTypeResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetChildrenContextsByContextResponse.displayName = 'proto.ml_metadata.GetChildrenContextsByContextResponse'; + proto.ml_metadata.GetContextsByTypeResponse.displayName = 'proto.ml_metadata.GetContextsByTypeResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1904,16 +2014,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetArtifactsByContextRequest = function(opt_data) { +proto.ml_metadata.GetContextByTypeAndNameRequest = function(opt_data) { jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetArtifactsByContextRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextByTypeAndNameRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetArtifactsByContextRequest.displayName = 'proto.ml_metadata.GetArtifactsByContextRequest'; + proto.ml_metadata.GetContextByTypeAndNameRequest.displayName = 'proto.ml_metadata.GetContextByTypeAndNameRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1925,16 +2035,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetArtifactsByContextResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactsByContextResponse.repeatedFields_, null); +proto.ml_metadata.GetContextByTypeAndNameResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetArtifactsByContextResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextByTypeAndNameResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetArtifactsByContextResponse.displayName = 'proto.ml_metadata.GetArtifactsByContextResponse'; + proto.ml_metadata.GetContextByTypeAndNameResponse.displayName = 'proto.ml_metadata.GetContextByTypeAndNameResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1946,16 +2056,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByContextRequest = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetContextsByIDRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByIDRequest.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByContextRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByIDRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByContextRequest.displayName = 'proto.ml_metadata.GetExecutionsByContextRequest'; + proto.ml_metadata.GetContextsByIDRequest.displayName = 'proto.ml_metadata.GetContextsByIDRequest'; } /** * Generated by JsPbCodeGenerator. @@ -1967,16 +2077,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetExecutionsByContextResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByContextResponse.repeatedFields_, null); +proto.ml_metadata.GetContextsByIDResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByIDResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetExecutionsByContextResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByIDResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetExecutionsByContextResponse.displayName = 'proto.ml_metadata.GetExecutionsByContextResponse'; + proto.ml_metadata.GetContextsByIDResponse.displayName = 'proto.ml_metadata.GetContextsByIDResponse'; } /** * Generated by JsPbCodeGenerator. @@ -1988,16 +2098,16 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetLineageGraphRequest = function(opt_data) { +proto.ml_metadata.GetContextsByArtifactRequest = function(opt_data) { jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; -goog.inherits(proto.ml_metadata.GetLineageGraphRequest, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByArtifactRequest, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetLineageGraphRequest.displayName = 'proto.ml_metadata.GetLineageGraphRequest'; + proto.ml_metadata.GetContextsByArtifactRequest.displayName = 'proto.ml_metadata.GetContextsByArtifactRequest'; } /** * Generated by JsPbCodeGenerator. @@ -2009,170 +2119,6762 @@ if (goog.DEBUG && !COMPILED) { * @extends {jspb.Message} * @constructor */ -proto.ml_metadata.GetLineageGraphResponse = function(opt_data) { - jspb.Message.initialize(this, opt_data, 0, -1, null, null); +proto.ml_metadata.GetContextsByArtifactResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByArtifactResponse.repeatedFields_, null); }; -goog.inherits(proto.ml_metadata.GetLineageGraphResponse, jspb.Message); +goog.inherits(proto.ml_metadata.GetContextsByArtifactResponse, jspb.Message); if (goog.DEBUG && !COMPILED) { /** * @public * @override */ - proto.ml_metadata.GetLineageGraphResponse.displayName = 'proto.ml_metadata.GetLineageGraphResponse'; + proto.ml_metadata.GetContextsByArtifactResponse.displayName = 'proto.ml_metadata.GetContextsByArtifactResponse'; } - - - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor */ -proto.ml_metadata.ArtifactAndType.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ArtifactAndType.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByExecutionRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; - - +goog.inherits(proto.ml_metadata.GetContextsByExecutionRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetContextsByExecutionRequest.displayName = 'proto.ml_metadata.GetContextsByExecutionRequest'; +} /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ArtifactAndType} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor */ -proto.ml_metadata.ArtifactAndType.toObject = function(includeInstance, msg) { - var f, obj = { - artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f), - type: (f = msg.getType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f) - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.GetContextsByExecutionResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetContextsByExecutionResponse.repeatedFields_, null); }; +goog.inherits(proto.ml_metadata.GetContextsByExecutionResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetContextsByExecutionResponse.displayName = 'proto.ml_metadata.GetContextsByExecutionResponse'; } - - /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ArtifactAndType} + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor */ -proto.ml_metadata.ArtifactAndType.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ArtifactAndType; - return proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.GetParentContextsByContextRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); }; - - +goog.inherits(proto.ml_metadata.GetParentContextsByContextRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetParentContextsByContextRequest.displayName = 'proto.ml_metadata.GetParentContextsByContextRequest'; +} /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.ArtifactAndType} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ArtifactAndType} + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor */ -proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.setArtifact(value); - break; - case 2: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.setType(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.GetParentContextsByContextResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetParentContextsByContextResponse.repeatedFields_, null); }; - - +goog.inherits(proto.ml_metadata.GetParentContextsByContextResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetParentContextsByContextResponse.displayName = 'proto.ml_metadata.GetParentContextsByContextResponse'; +} /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor */ -proto.ml_metadata.ArtifactAndType.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetChildrenContextsByContextRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetChildrenContextsByContextRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetChildrenContextsByContextRequest.displayName = 'proto.ml_metadata.GetChildrenContextsByContextRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetChildrenContextsByContextResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetChildrenContextsByContextResponse.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetChildrenContextsByContextResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetChildrenContextsByContextResponse.displayName = 'proto.ml_metadata.GetChildrenContextsByContextResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetParentContextsByContextsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetParentContextsByContextsRequest.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetParentContextsByContextsRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetParentContextsByContextsRequest.displayName = 'proto.ml_metadata.GetParentContextsByContextsRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetParentContextsByContextsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetParentContextsByContextsResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetParentContextsByContextsResponse.displayName = 'proto.ml_metadata.GetParentContextsByContextsResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.displayName = 'proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetChildrenContextsByContextsRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetChildrenContextsByContextsRequest.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetChildrenContextsByContextsRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetChildrenContextsByContextsRequest.displayName = 'proto.ml_metadata.GetChildrenContextsByContextsRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetChildrenContextsByContextsResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetChildrenContextsByContextsResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetChildrenContextsByContextsResponse.displayName = 'proto.ml_metadata.GetChildrenContextsByContextsResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.displayName = 'proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetArtifactsByContextRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetArtifactsByContextRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetArtifactsByContextRequest.displayName = 'proto.ml_metadata.GetArtifactsByContextRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetArtifactsByContextResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetArtifactsByContextResponse.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetArtifactsByContextResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetArtifactsByContextResponse.displayName = 'proto.ml_metadata.GetArtifactsByContextResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetExecutionsByContextRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetExecutionsByContextRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetExecutionsByContextRequest.displayName = 'proto.ml_metadata.GetExecutionsByContextRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetExecutionsByContextResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, proto.ml_metadata.GetExecutionsByContextResponse.repeatedFields_, null); +}; +goog.inherits(proto.ml_metadata.GetExecutionsByContextResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetExecutionsByContextResponse.displayName = 'proto.ml_metadata.GetExecutionsByContextResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetLineageGraphRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetLineageGraphRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetLineageGraphRequest.displayName = 'proto.ml_metadata.GetLineageGraphRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetLineageGraphResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetLineageGraphResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetLineageGraphResponse.displayName = 'proto.ml_metadata.GetLineageGraphResponse'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetLineageSubgraphRequest = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetLineageSubgraphRequest, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetLineageSubgraphRequest.displayName = 'proto.ml_metadata.GetLineageSubgraphRequest'; +} +/** + * Generated by JsPbCodeGenerator. + * @param {Array=} opt_data Optional initial data array, typically from a + * server response, or constructed directly in Javascript. The array is used + * in place and becomes part of the constructed object. It is not cloned. + * If no data is provided, the constructed object will be empty, but still + * valid. + * @extends {jspb.Message} + * @constructor + */ +proto.ml_metadata.GetLineageSubgraphResponse = function(opt_data) { + jspb.Message.initialize(this, opt_data, 0, -1, null, null); +}; +goog.inherits(proto.ml_metadata.GetLineageSubgraphResponse, jspb.Message); +if (goog.DEBUG && !COMPILED) { + /** + * @public + * @override + */ + proto.ml_metadata.GetLineageSubgraphResponse.displayName = 'proto.ml_metadata.GetLineageSubgraphResponse'; +} + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ArtifactAndType.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ArtifactAndType.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ArtifactAndType} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactAndType.toObject = function(includeInstance, msg) { + var f, obj = { + artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f), + type: (f = msg.getType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ArtifactAndType} + */ +proto.ml_metadata.ArtifactAndType.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ArtifactAndType; + return proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ArtifactAndType} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ArtifactAndType} + */ +proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.setArtifact(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.setType(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ArtifactAndType.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ArtifactAndType} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifact(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = message.getType(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ); + } +}; + + +/** + * optional Artifact artifact = 1; + * @return {?proto.ml_metadata.Artifact} + */ +proto.ml_metadata.ArtifactAndType.prototype.getArtifact = function() { + return /** @type{?proto.ml_metadata.Artifact} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +}; + + +/** + * @param {?proto.ml_metadata.Artifact|undefined} value + * @return {!proto.ml_metadata.ArtifactAndType} returns this +*/ +proto.ml_metadata.ArtifactAndType.prototype.setArtifact = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ArtifactAndType} returns this + */ +proto.ml_metadata.ArtifactAndType.prototype.clearArtifact = function() { + return this.setArtifact(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactAndType.prototype.hasArtifact = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional ArtifactType type = 2; + * @return {?proto.ml_metadata.ArtifactType} + */ +proto.ml_metadata.ArtifactAndType.prototype.getType = function() { + return /** @type{?proto.ml_metadata.ArtifactType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 2)); +}; + + +/** + * @param {?proto.ml_metadata.ArtifactType|undefined} value + * @return {!proto.ml_metadata.ArtifactAndType} returns this +*/ +proto.ml_metadata.ArtifactAndType.prototype.setType = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ArtifactAndType} returns this + */ +proto.ml_metadata.ArtifactAndType.prototype.clearType = function() { + return this.setType(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactAndType.prototype.hasType = function() { + return jspb.Message.getField(this, 2) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ArtifactStructMap.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ArtifactStructMap.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ArtifactStructMap} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStructMap.toObject = function(includeInstance, msg) { + var f, obj = { + propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.ArtifactStruct.toObject) : [] + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ArtifactStructMap} + */ +proto.ml_metadata.ArtifactStructMap.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ArtifactStructMap; + return proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ArtifactStructMap} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ArtifactStructMap} + */ +proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = msg.getPropertiesMap(); + reader.readMessage(value, function(message, reader) { + jspb.Map.deserializeBinary(message, reader, jspb.BinaryReader.prototype.readString, jspb.BinaryReader.prototype.readMessage, proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader, "", new proto.ml_metadata.ArtifactStruct()); + }); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ArtifactStructMap.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ArtifactStructMap} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getPropertiesMap(true); + if (f && f.getLength() > 0) { + f.serializeBinary(1, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter); + } +}; + + +/** + * map properties = 1; + * @param {boolean=} opt_noLazyCreate Do not create the map if + * empty, instead returning `undefined` + * @return {!jspb.Map} + */ +proto.ml_metadata.ArtifactStructMap.prototype.getPropertiesMap = function(opt_noLazyCreate) { + return /** @type {!jspb.Map} */ ( + jspb.Message.getMapField(this, 1, opt_noLazyCreate, + proto.ml_metadata.ArtifactStruct)); +}; + + +/** + * Clears values from the map. The map will be non-null. + * @return {!proto.ml_metadata.ArtifactStructMap} returns this + */ +proto.ml_metadata.ArtifactStructMap.prototype.clearPropertiesMap = function() { + this.getPropertiesMap().clear(); + return this;}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.ArtifactStructList.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ArtifactStructList.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ArtifactStructList.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ArtifactStructList} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStructList.toObject = function(includeInstance, msg) { + var f, obj = { + elementsList: jspb.Message.toObjectList(msg.getElementsList(), + proto.ml_metadata.ArtifactStruct.toObject, includeInstance) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ArtifactStructList} + */ +proto.ml_metadata.ArtifactStructList.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ArtifactStructList; + return proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ArtifactStructList} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ArtifactStructList} + */ +proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new proto.ml_metadata.ArtifactStruct; + reader.readMessage(value,proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader); + msg.addElements(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ArtifactStructList.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ArtifactStructList} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getElementsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter + ); + } +}; + + +/** + * repeated ArtifactStruct elements = 1; + * @return {!Array} + */ +proto.ml_metadata.ArtifactStructList.prototype.getElementsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, proto.ml_metadata.ArtifactStruct, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.ArtifactStructList} returns this +*/ +proto.ml_metadata.ArtifactStructList.prototype.setElementsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.ArtifactStruct=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ArtifactStruct} + */ +proto.ml_metadata.ArtifactStructList.prototype.addElements = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactStruct, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.ArtifactStructList} returns this + */ +proto.ml_metadata.ArtifactStructList.prototype.clearElementsList = function() { + return this.setElementsList([]); +}; + + + +/** + * Oneof group definitions for this message. Each group defines the field + * numbers belonging to that group. When of these fields' value is set, all + * other fields in the group are cleared. During deserialization, if multiple + * fields are encountered for a group, only the last value seen will be kept. + * @private {!Array>} + * @const + */ +proto.ml_metadata.ArtifactStruct.oneofGroups_ = [[1,2,3]]; + +/** + * @enum {number} + */ +proto.ml_metadata.ArtifactStruct.ValueCase = { + VALUE_NOT_SET: 0, + ARTIFACT: 1, + MAP: 2, + LIST: 3 +}; + +/** + * @return {proto.ml_metadata.ArtifactStruct.ValueCase} + */ +proto.ml_metadata.ArtifactStruct.prototype.getValueCase = function() { + return /** @type {proto.ml_metadata.ArtifactStruct.ValueCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.ArtifactStruct.oneofGroups_[0])); +}; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.ArtifactStruct.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.ArtifactStruct.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.ArtifactStruct} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStruct.toObject = function(includeInstance, msg) { + var f, obj = { + artifact: (f = msg.getArtifact()) && proto.ml_metadata.ArtifactAndType.toObject(includeInstance, f), + map: (f = msg.getMap()) && proto.ml_metadata.ArtifactStructMap.toObject(includeInstance, f), + list: (f = msg.getList()) && proto.ml_metadata.ArtifactStructList.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.ArtifactStruct} + */ +proto.ml_metadata.ArtifactStruct.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.ArtifactStruct; + return proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.ArtifactStruct} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.ArtifactStruct} + */ +proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new proto.ml_metadata.ArtifactAndType; + reader.readMessage(value,proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader); + msg.setArtifact(value); + break; + case 2: + var value = new proto.ml_metadata.ArtifactStructMap; + reader.readMessage(value,proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader); + msg.setMap(value); + break; + case 3: + var value = new proto.ml_metadata.ArtifactStructList; + reader.readMessage(value,proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader); + msg.setList(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.ArtifactStruct.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.ArtifactStruct} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifact(); + if (f != null) { + writer.writeMessage( + 1, + f, + proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter + ); + } + f = message.getMap(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter + ); + } + f = message.getList(); + if (f != null) { + writer.writeMessage( + 3, + f, + proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter + ); + } +}; + + +/** + * optional ArtifactAndType artifact = 1; + * @return {?proto.ml_metadata.ArtifactAndType} + */ +proto.ml_metadata.ArtifactStruct.prototype.getArtifact = function() { + return /** @type{?proto.ml_metadata.ArtifactAndType} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactAndType, 1)); +}; + + +/** + * @param {?proto.ml_metadata.ArtifactAndType|undefined} value + * @return {!proto.ml_metadata.ArtifactStruct} returns this +*/ +proto.ml_metadata.ArtifactStruct.prototype.setArtifact = function(value) { + return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ArtifactStruct} returns this + */ +proto.ml_metadata.ArtifactStruct.prototype.clearArtifact = function() { + return this.setArtifact(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactStruct.prototype.hasArtifact = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional ArtifactStructMap map = 2; + * @return {?proto.ml_metadata.ArtifactStructMap} + */ +proto.ml_metadata.ArtifactStruct.prototype.getMap = function() { + return /** @type{?proto.ml_metadata.ArtifactStructMap} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactStructMap, 2)); +}; + + +/** + * @param {?proto.ml_metadata.ArtifactStructMap|undefined} value + * @return {!proto.ml_metadata.ArtifactStruct} returns this +*/ +proto.ml_metadata.ArtifactStruct.prototype.setMap = function(value) { + return jspb.Message.setOneofWrapperField(this, 2, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ArtifactStruct} returns this + */ +proto.ml_metadata.ArtifactStruct.prototype.clearMap = function() { + return this.setMap(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactStruct.prototype.hasMap = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional ArtifactStructList list = 3; + * @return {?proto.ml_metadata.ArtifactStructList} + */ +proto.ml_metadata.ArtifactStruct.prototype.getList = function() { + return /** @type{?proto.ml_metadata.ArtifactStructList} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactStructList, 3)); +}; + + +/** + * @param {?proto.ml_metadata.ArtifactStructList|undefined} value + * @return {!proto.ml_metadata.ArtifactStruct} returns this +*/ +proto.ml_metadata.ArtifactStruct.prototype.setList = function(value) { + return jspb.Message.setOneofWrapperField(this, 3, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.ArtifactStruct} returns this + */ +proto.ml_metadata.ArtifactStruct.prototype.clearList = function() { + return this.setList(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.ArtifactStruct.prototype.hasList = function() { + return jspb.Message.getField(this, 3) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutArtifactsRequest.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutArtifactsRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutArtifactsRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsRequest.toObject = function(includeInstance, msg) { + var f, obj = { + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + options: (f = msg.getOptions()) && proto.ml_metadata.PutArtifactsRequest.Options.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f), + updateMask: (f = msg.getUpdateMask()) && google_protobuf_field_mask_pb.FieldMask.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutArtifactsRequest} + */ +proto.ml_metadata.PutArtifactsRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutArtifactsRequest; + return proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutArtifactsRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutArtifactsRequest} + */ +proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + case 2: + var value = new proto.ml_metadata.PutArtifactsRequest.Options; + reader.readMessage(value,proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + case 4: + var value = new google_protobuf_field_mask_pb.FieldMask; + reader.readMessage(value,google_protobuf_field_mask_pb.FieldMask.deserializeBinaryFromReader); + msg.setUpdateMask(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutArtifactsRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutArtifactsRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } + f = message.getUpdateMask(); + if (f != null) { + writer.writeMessage( + 4, + f, + google_protobuf_field_mask_pb.FieldMask.serializeBinaryToWriter + ); + } +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutArtifactsRequest.Options.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutArtifactsRequest.Options} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsRequest.Options.toObject = function(includeInstance, msg) { + var f, obj = { + abortIfLatestUpdatedTimeChanged: (f = jspb.Message.getBooleanField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutArtifactsRequest.Options} + */ +proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutArtifactsRequest.Options; + return proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutArtifactsRequest.Options} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutArtifactsRequest.Options} + */ +proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setAbortIfLatestUpdatedTimeChanged(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutArtifactsRequest.Options} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {boolean} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeBool( + 1, + f + ); + } +}; + + +/** + * optional bool abort_if_latest_updated_time_changed = 1; + * @return {boolean} + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.getAbortIfLatestUpdatedTimeChanged = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 1, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutArtifactsRequest.Options} returns this + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.setAbortIfLatestUpdatedTimeChanged = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactsRequest.Options} returns this + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.clearAbortIfLatestUpdatedTimeChanged = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactsRequest.Options.prototype.hasAbortIfLatestUpdatedTimeChanged = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * repeated Artifact artifacts = 1; + * @return {!Array} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this +*/ +proto.ml_metadata.PutArtifactsRequest.prototype.setArtifactsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Artifact} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + */ +proto.ml_metadata.PutArtifactsRequest.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); +}; + + +/** + * optional Options options = 2; + * @return {?proto.ml_metadata.PutArtifactsRequest.Options} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.PutArtifactsRequest.Options} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.PutArtifactsRequest.Options, 2)); +}; + + +/** + * @param {?proto.ml_metadata.PutArtifactsRequest.Options|undefined} value + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this +*/ +proto.ml_metadata.PutArtifactsRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + */ +proto.ml_metadata.PutArtifactsRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 3; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this +*/ +proto.ml_metadata.PutArtifactsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + */ +proto.ml_metadata.PutArtifactsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional google.protobuf.FieldMask update_mask = 4; + * @return {?proto.google.protobuf.FieldMask} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.getUpdateMask = function() { + return /** @type{?proto.google.protobuf.FieldMask} */ ( + jspb.Message.getWrapperField(this, google_protobuf_field_mask_pb.FieldMask, 4)); +}; + + +/** + * @param {?proto.google.protobuf.FieldMask|undefined} value + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this +*/ +proto.ml_metadata.PutArtifactsRequest.prototype.setUpdateMask = function(value) { + return jspb.Message.setWrapperField(this, 4, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + */ +proto.ml_metadata.PutArtifactsRequest.prototype.clearUpdateMask = function() { + return this.setUpdateMask(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactsRequest.prototype.hasUpdateMask = function() { + return jspb.Message.getField(this, 4) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutArtifactsResponse.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutArtifactsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutArtifactsResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutArtifactsResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsResponse.toObject = function(includeInstance, msg) { + var f, obj = { + artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutArtifactsResponse} + */ +proto.ml_metadata.PutArtifactsResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutArtifactsResponse; + return proto.ml_metadata.PutArtifactsResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutArtifactsResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutArtifactsResponse} + */ +proto.ml_metadata.PutArtifactsResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutArtifactsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutArtifactsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutArtifactsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 1, + f + ); + } +}; + + +/** + * repeated int64 artifact_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.PutArtifactsResponse.prototype.getArtifactIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + */ +proto.ml_metadata.PutArtifactsResponse.prototype.setArtifactIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + */ +proto.ml_metadata.PutArtifactsResponse.prototype.addArtifactIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + */ +proto.ml_metadata.PutArtifactsResponse.prototype.clearArtifactIdsList = function() { + return this.setArtifactIdsList([]); +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutArtifactTypeRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutArtifactTypeRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactTypeRequest.toObject = function(includeInstance, msg) { + var f, obj = { + artifactType: (f = msg.getArtifactType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f), + canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, + canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, + canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, + allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} + */ +proto.ml_metadata.PutArtifactTypeRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutArtifactTypeRequest; + return proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutArtifactTypeRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} + */ +proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.setArtifactType(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanAddFields(value); + break; + case 5: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanOmitFields(value); + break; + case 3: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanDeleteFields(value); + break; + case 4: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setAllFieldsMatch(value); + break; + case 6: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutArtifactTypeRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutArtifactTypeRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactTypeRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactType(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeBool( + 2, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeBool( + 5, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeBool( + 3, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeBool( + 4, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 6, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * optional ArtifactType artifact_type = 1; + * @return {?proto.ml_metadata.ArtifactType} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getArtifactType = function() { + return /** @type{?proto.ml_metadata.ArtifactType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +}; + + +/** + * @param {?proto.ml_metadata.ArtifactType|undefined} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this +*/ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setArtifactType = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearArtifactType = function() { + return this.setArtifactType(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasArtifactType = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional bool can_add_fields = 2; + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanAddFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanAddFields = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanAddFields = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanAddFields = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional bool can_omit_fields = 5; + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanOmitFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanOmitFields = function(value) { + return jspb.Message.setField(this, 5, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanOmitFields = function() { + return jspb.Message.setField(this, 5, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanOmitFields = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional bool can_delete_fields = 3; + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanDeleteFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanDeleteFields = function(value) { + return jspb.Message.setField(this, 3, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanDeleteFields = function() { + return jspb.Message.setField(this, 3, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanDeleteFields = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional bool all_fields_match = 4; + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getAllFieldsMatch = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setAllFieldsMatch = function(value) { + return jspb.Message.setField(this, 4, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearAllFieldsMatch = function() { + return jspb.Message.setField(this, 4, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasAllFieldsMatch = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 6; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 6)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this +*/ +proto.ml_metadata.PutArtifactTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 6, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 6) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutArtifactTypeResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutArtifactTypeResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactTypeResponse.toObject = function(includeInstance, msg) { + var f, obj = { + typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutArtifactTypeResponse} + */ +proto.ml_metadata.PutArtifactTypeResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutArtifactTypeResponse; + return proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutArtifactTypeResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutArtifactTypeResponse} + */ +proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setTypeId(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutArtifactTypeResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutArtifactTypeResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutArtifactTypeResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } +}; + + +/** + * optional int64 type_id = 1; + * @return {number} + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.getTypeId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.PutArtifactTypeResponse} returns this + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.setTypeId = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutArtifactTypeResponse} returns this + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.clearTypeId = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutArtifactTypeResponse.prototype.hasTypeId = function() { + return jspb.Message.getField(this, 1) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutExecutionsRequest.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionsRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionsRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionsRequest.toObject = function(includeInstance, msg) { + var f, obj = { + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f), + updateMask: (f = msg.getUpdateMask()) && google_protobuf_field_mask_pb.FieldMask.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionsRequest} + */ +proto.ml_metadata.PutExecutionsRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionsRequest; + return proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionsRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionsRequest} + */ +proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + case 3: + var value = new google_protobuf_field_mask_pb.FieldMask; + reader.readMessage(value,google_protobuf_field_mask_pb.FieldMask.deserializeBinaryFromReader); + msg.setUpdateMask(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionsRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionsRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionsRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } + f = message.getUpdateMask(); + if (f != null) { + writer.writeMessage( + 3, + f, + google_protobuf_field_mask_pb.FieldMask.serializeBinaryToWriter + ); + } +}; + + +/** + * repeated Execution executions = 1; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this +*/ +proto.ml_metadata.PutExecutionsRequest.prototype.setExecutionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Execution=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Execution} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this + */ +proto.ml_metadata.PutExecutionsRequest.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); +}; + + +/** + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this +*/ +proto.ml_metadata.PutExecutionsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this + */ +proto.ml_metadata.PutExecutionsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional google.protobuf.FieldMask update_mask = 3; + * @return {?proto.google.protobuf.FieldMask} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.getUpdateMask = function() { + return /** @type{?proto.google.protobuf.FieldMask} */ ( + jspb.Message.getWrapperField(this, google_protobuf_field_mask_pb.FieldMask, 3)); +}; + + +/** + * @param {?proto.google.protobuf.FieldMask|undefined} value + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this +*/ +proto.ml_metadata.PutExecutionsRequest.prototype.setUpdateMask = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionsRequest} returns this + */ +proto.ml_metadata.PutExecutionsRequest.prototype.clearUpdateMask = function() { + return this.setUpdateMask(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionsRequest.prototype.hasUpdateMask = function() { + return jspb.Message.getField(this, 3) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutExecutionsResponse.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionsResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionsResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionsResponse.toObject = function(includeInstance, msg) { + var f, obj = { + executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionsResponse} + */ +proto.ml_metadata.PutExecutionsResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionsResponse; + return proto.ml_metadata.PutExecutionsResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionsResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionsResponse} + */ +proto.ml_metadata.PutExecutionsResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addExecutionIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 1, + f + ); + } +}; + + +/** + * repeated int64 execution_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionsResponse.prototype.getExecutionIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + */ +proto.ml_metadata.PutExecutionsResponse.prototype.setExecutionIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + */ +proto.ml_metadata.PutExecutionsResponse.prototype.addExecutionIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + */ +proto.ml_metadata.PutExecutionsResponse.prototype.clearExecutionIdsList = function() { + return this.setExecutionIdsList([]); +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionTypeRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionTypeRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionTypeRequest.toObject = function(includeInstance, msg) { + var f, obj = { + executionType: (f = msg.getExecutionType()) && ml_metadata_proto_metadata_store_pb.ExecutionType.toObject(includeInstance, f), + canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, + canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, + canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, + allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} + */ +proto.ml_metadata.PutExecutionTypeRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionTypeRequest; + return proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionTypeRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} + */ +proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); + msg.setExecutionType(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanAddFields(value); + break; + case 5: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanOmitFields(value); + break; + case 3: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanDeleteFields(value); + break; + case 4: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setAllFieldsMatch(value); + break; + case 6: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionTypeRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionTypeRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionTypeRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionType(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeBool( + 2, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeBool( + 5, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeBool( + 3, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeBool( + 4, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 6, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * optional ExecutionType execution_type = 1; + * @return {?proto.ml_metadata.ExecutionType} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getExecutionType = function() { + return /** @type{?proto.ml_metadata.ExecutionType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); +}; + + +/** + * @param {?proto.ml_metadata.ExecutionType|undefined} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this +*/ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setExecutionType = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearExecutionType = function() { + return this.setExecutionType(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasExecutionType = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional bool can_add_fields = 2; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanAddFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanAddFields = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanAddFields = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanAddFields = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional bool can_omit_fields = 5; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanOmitFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanOmitFields = function(value) { + return jspb.Message.setField(this, 5, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanOmitFields = function() { + return jspb.Message.setField(this, 5, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanOmitFields = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional bool can_delete_fields = 3; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanDeleteFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanDeleteFields = function(value) { + return jspb.Message.setField(this, 3, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanDeleteFields = function() { + return jspb.Message.setField(this, 3, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanDeleteFields = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional bool all_fields_match = 4; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getAllFieldsMatch = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setAllFieldsMatch = function(value) { + return jspb.Message.setField(this, 4, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearAllFieldsMatch = function() { + return jspb.Message.setField(this, 4, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasAllFieldsMatch = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 6; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 6)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this +*/ +proto.ml_metadata.PutExecutionTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 6, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 6) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionTypeResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionTypeResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionTypeResponse.toObject = function(includeInstance, msg) { + var f, obj = { + typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionTypeResponse} + */ +proto.ml_metadata.PutExecutionTypeResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionTypeResponse; + return proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionTypeResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionTypeResponse} + */ +proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setTypeId(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionTypeResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionTypeResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionTypeResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } +}; + + +/** + * optional int64 type_id = 1; + * @return {number} + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.getTypeId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.PutExecutionTypeResponse} returns this + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.setTypeId = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionTypeResponse} returns this + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.clearTypeId = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionTypeResponse.prototype.hasTypeId = function() { + return jspb.Message.getField(this, 1) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutEventsRequest.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutEventsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutEventsRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutEventsRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutEventsRequest.toObject = function(includeInstance, msg) { + var f, obj = { + eventsList: jspb.Message.toObjectList(msg.getEventsList(), + ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutEventsRequest} + */ +proto.ml_metadata.PutEventsRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutEventsRequest; + return proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutEventsRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutEventsRequest} + */ +proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Event; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); + msg.addEvents(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutEventsRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutEventsRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutEventsRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutEventsRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getEventsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * repeated Event events = 1; + * @return {!Array} + */ +proto.ml_metadata.PutEventsRequest.prototype.getEventsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutEventsRequest} returns this +*/ +proto.ml_metadata.PutEventsRequest.prototype.setEventsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Event=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Event} + */ +proto.ml_metadata.PutEventsRequest.prototype.addEvents = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutEventsRequest} returns this + */ +proto.ml_metadata.PutEventsRequest.prototype.clearEventsList = function() { + return this.setEventsList([]); +}; + + +/** + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutEventsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutEventsRequest} returns this +*/ +proto.ml_metadata.PutEventsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutEventsRequest} returns this + */ +proto.ml_metadata.PutEventsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutEventsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutEventsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutEventsResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutEventsResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutEventsResponse.toObject = function(includeInstance, msg) { + var f, obj = { + + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutEventsResponse} + */ +proto.ml_metadata.PutEventsResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutEventsResponse; + return proto.ml_metadata.PutEventsResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutEventsResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutEventsResponse} + */ +proto.ml_metadata.PutEventsResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutEventsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutEventsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutEventsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutEventsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutExecutionRequest.repeatedFields_ = [2,3]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.toObject = function(includeInstance, msg) { + var f, obj = { + execution: (f = msg.getExecution()) && ml_metadata_proto_metadata_store_pb.Execution.toObject(includeInstance, f), + artifactEventPairsList: jspb.Message.toObjectList(msg.getArtifactEventPairsList(), + proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject, includeInstance), + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), + options: (f = msg.getOptions()) && proto.ml_metadata.PutExecutionRequest.Options.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionRequest} + */ +proto.ml_metadata.PutExecutionRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionRequest; + return proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionRequest} + */ +proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.setExecution(value); + break; + case 2: + var value = new proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent; + reader.readMessage(value,proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader); + msg.addArtifactEventPairs(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); + break; + case 4: + var value = new proto.ml_metadata.PutExecutionRequest.Options; + reader.readMessage(value,proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 5: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecution(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ); + } + f = message.getArtifactEventPairsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 2, + f, + proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter + ); + } + f = message.getContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ); + } + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 4, + f, + proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 5, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject = function(includeInstance, msg) { + var f, obj = { + artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f), + event: (f = msg.getEvent()) && ml_metadata_proto_metadata_store_pb.Event.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent; + return proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.setArtifact(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.Event; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); + msg.setEvent(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifact(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = message.getEvent(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ); + } +}; + + +/** + * optional Artifact artifact = 1; + * @return {?proto.ml_metadata.Artifact} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.getArtifact = function() { + return /** @type{?proto.ml_metadata.Artifact} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +}; + + +/** + * @param {?proto.ml_metadata.Artifact|undefined} value + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this +*/ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.setArtifact = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearArtifact = function() { + return this.setArtifact(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.hasArtifact = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional Event event = 2; + * @return {?proto.ml_metadata.Event} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.getEvent = function() { + return /** @type{?proto.ml_metadata.Event} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 2)); +}; + + +/** + * @param {?proto.ml_metadata.Event|undefined} value + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this +*/ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.setEvent = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearEvent = function() { + return this.setEvent(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.hasEvent = function() { + return jspb.Message.getField(this, 2) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionRequest.Options.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionRequest.Options} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.Options.toObject = function(includeInstance, msg) { + var f, obj = { + reuseContextIfAlreadyExist: (f = jspb.Message.getBooleanField(msg, 1)) == null ? undefined : f, + reuseArtifactIfAlreadyExistByExternalId: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionRequest.Options} + */ +proto.ml_metadata.PutExecutionRequest.Options.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionRequest.Options; + return proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionRequest.Options} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionRequest.Options} + */ +proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setReuseContextIfAlreadyExist(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setReuseArtifactIfAlreadyExistByExternalId(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionRequest.Options} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {boolean} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeBool( + 1, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeBool( + 2, + f + ); + } +}; + + +/** + * optional bool reuse_context_if_already_exist = 1; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.getReuseContextIfAlreadyExist = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 1, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.setReuseContextIfAlreadyExist = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.clearReuseContextIfAlreadyExist = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.hasReuseContextIfAlreadyExist = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional bool reuse_artifact_if_already_exist_by_external_id = 2; + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.getReuseArtifactIfAlreadyExistByExternalId = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.setReuseArtifactIfAlreadyExistByExternalId = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.clearReuseArtifactIfAlreadyExistByExternalId = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.Options.prototype.hasReuseArtifactIfAlreadyExistByExternalId = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional Execution execution = 1; + * @return {?proto.ml_metadata.Execution} + */ +proto.ml_metadata.PutExecutionRequest.prototype.getExecution = function() { + return /** @type{?proto.ml_metadata.Execution} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +}; + + +/** + * @param {?proto.ml_metadata.Execution|undefined} value + * @return {!proto.ml_metadata.PutExecutionRequest} returns this +*/ +proto.ml_metadata.PutExecutionRequest.prototype.setExecution = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest} returns this + */ +proto.ml_metadata.PutExecutionRequest.prototype.clearExecution = function() { + return this.setExecution(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.prototype.hasExecution = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * repeated ArtifactAndEvent artifact_event_pairs = 2; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionRequest.prototype.getArtifactEventPairsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionRequest} returns this +*/ +proto.ml_metadata.PutExecutionRequest.prototype.setArtifactEventPairsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 2, value); +}; + + +/** + * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} + */ +proto.ml_metadata.PutExecutionRequest.prototype.addArtifactEventPairs = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionRequest} returns this + */ +proto.ml_metadata.PutExecutionRequest.prototype.clearArtifactEventPairsList = function() { + return this.setArtifactEventPairsList([]); +}; + + +/** + * repeated Context contexts = 3; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionRequest.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionRequest} returns this +*/ +proto.ml_metadata.PutExecutionRequest.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 3, value); +}; + + +/** + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} + */ +proto.ml_metadata.PutExecutionRequest.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 3, opt_value, proto.ml_metadata.Context, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionRequest} returns this + */ +proto.ml_metadata.PutExecutionRequest.prototype.clearContextsList = function() { + return this.setContextsList([]); +}; + + +/** + * optional Options options = 4; + * @return {?proto.ml_metadata.PutExecutionRequest.Options} + */ +proto.ml_metadata.PutExecutionRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.PutExecutionRequest.Options} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.PutExecutionRequest.Options, 4)); +}; + + +/** + * @param {?proto.ml_metadata.PutExecutionRequest.Options|undefined} value + * @return {!proto.ml_metadata.PutExecutionRequest} returns this +*/ +proto.ml_metadata.PutExecutionRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest} returns this + */ +proto.ml_metadata.PutExecutionRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 5; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutExecutionRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 5)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutExecutionRequest} returns this +*/ +proto.ml_metadata.PutExecutionRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 5, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutExecutionRequest} returns this + */ +proto.ml_metadata.PutExecutionRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 5) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutExecutionResponse.repeatedFields_ = [2,3]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutExecutionResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutExecutionResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutExecutionResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionResponse.toObject = function(includeInstance, msg) { + var f, obj = { + executionId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 3)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutExecutionResponse} + */ +proto.ml_metadata.PutExecutionResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutExecutionResponse; + return proto.ml_metadata.PutExecutionResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutExecutionResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutExecutionResponse} + */ +proto.ml_metadata.PutExecutionResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt64()); + msg.setExecutionId(value); + break; + case 2: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactIds(values[i]); + } + break; + case 3: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutExecutionResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutExecutionResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutExecutionResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutExecutionResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( + 1, + f + ); + } + f = message.getArtifactIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 2, + f + ); + } + f = message.getContextIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 3, + f + ); + } +}; + + +/** + * optional int64 execution_id = 1; + * @return {number} + */ +proto.ml_metadata.PutExecutionResponse.prototype.getExecutionId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.setExecutionId = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.clearExecutionId = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutExecutionResponse.prototype.hasExecutionId = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * repeated int64 artifact_ids = 2; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionResponse.prototype.getArtifactIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.setArtifactIdsList = function(value) { + return jspb.Message.setField(this, 2, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.addArtifactIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 2, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.clearArtifactIdsList = function() { + return this.setArtifactIdsList([]); +}; + + +/** + * repeated int64 context_ids = 3; + * @return {!Array} + */ +proto.ml_metadata.PutExecutionResponse.prototype.getContextIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.setContextIdsList = function(value) { + return jspb.Message.setField(this, 3, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.addContextIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 3, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutExecutionResponse} returns this + */ +proto.ml_metadata.PutExecutionResponse.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutLineageSubgraphRequest.repeatedFields_ = [1,2,3,4]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutLineageSubgraphRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.toObject = function(includeInstance, msg) { + var f, obj = { + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), + eventEdgesList: jspb.Message.toObjectList(msg.getEventEdgesList(), + proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.toObject, includeInstance), + options: (f = msg.getOptions()) && proto.ml_metadata.PutLineageSubgraphRequest.Options.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} + */ +proto.ml_metadata.PutLineageSubgraphRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutLineageSubgraphRequest; + return proto.ml_metadata.PutLineageSubgraphRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} + */ +proto.ml_metadata.PutLineageSubgraphRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); + break; + case 4: + var value = new proto.ml_metadata.PutLineageSubgraphRequest.EventEdge; + reader.readMessage(value,proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.deserializeBinaryFromReader); + msg.addEventEdges(value); + break; + case 5: + var value = new proto.ml_metadata.PutLineageSubgraphRequest.Options; + reader.readMessage(value,proto.ml_metadata.PutLineageSubgraphRequest.Options.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 6: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutLineageSubgraphRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ); + } + f = message.getArtifactsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = message.getContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ); + } + f = message.getEventEdgesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 4, + f, + proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.serializeBinaryToWriter + ); + } + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 5, + f, + proto.ml_metadata.PutLineageSubgraphRequest.Options.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 6, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.toObject = function(includeInstance, msg) { + var f, obj = { + executionIndex: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + artifactIndex: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + event: (f = msg.getEvent()) && ml_metadata_proto_metadata_store_pb.Event.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutLineageSubgraphRequest.EventEdge; + return proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {number} */ (reader.readInt32()); + msg.setExecutionIndex(value); + break; + case 2: + var value = /** @type {number} */ (reader.readInt32()); + msg.setArtifactIndex(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.Event; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); + msg.setEvent(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt32( + 1, + f + ); + } + f = /** @type {number} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeInt32( + 2, + f + ); + } + f = message.getEvent(); + if (f != null) { + writer.writeMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ); + } +}; + + +/** + * optional int32 execution_index = 1; + * @return {number} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.getExecutionIndex = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.setExecutionIndex = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.clearExecutionIndex = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.hasExecutionIndex = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional int32 artifact_index = 2; + * @return {number} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.getArtifactIndex = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 2, 0)); +}; + + +/** + * @param {number} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.setArtifactIndex = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.clearArtifactIndex = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.hasArtifactIndex = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional Event event = 3; + * @return {?proto.ml_metadata.Event} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.getEvent = function() { + return /** @type{?proto.ml_metadata.Event} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 3)); +}; + + +/** + * @param {?proto.ml_metadata.Event|undefined} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.setEvent = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.clearEvent = function() { + return this.setEvent(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.EventEdge.prototype.hasEvent = function() { + return jspb.Message.getField(this, 3) != null; +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutLineageSubgraphRequest.Options.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.Options} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.toObject = function(includeInstance, msg) { + var f, obj = { + reuseContextIfAlreadyExist: (f = jspb.Message.getBooleanField(msg, 1)) == null ? undefined : f, + reuseArtifactIfAlreadyExistByExternalId: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutLineageSubgraphRequest.Options; + return proto.ml_metadata.PutLineageSubgraphRequest.Options.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.Options} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setReuseContextIfAlreadyExist(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setReuseArtifactIfAlreadyExistByExternalId(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutLineageSubgraphRequest.Options.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.Options} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {boolean} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeBool( + 1, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeBool( + 2, + f + ); + } +}; + + +/** + * optional bool reuse_context_if_already_exist = 1; + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.getReuseContextIfAlreadyExist = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 1, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.setReuseContextIfAlreadyExist = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.clearReuseContextIfAlreadyExist = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.hasReuseContextIfAlreadyExist = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional bool reuse_artifact_if_already_exist_by_external_id = 2; + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.getReuseArtifactIfAlreadyExistByExternalId = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.setReuseArtifactIfAlreadyExistByExternalId = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.Options} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.clearReuseArtifactIfAlreadyExistByExternalId = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.Options.prototype.hasReuseArtifactIfAlreadyExistByExternalId = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * repeated Execution executions = 1; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setExecutionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Execution=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Execution} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); +}; + + +/** + * repeated Artifact artifacts = 2; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setArtifactsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 2, value); +}; + + +/** + * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Artifact} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.Artifact, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); +}; + + +/** + * repeated Context contexts = 3; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 3, value); +}; + + +/** + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 3, opt_value, proto.ml_metadata.Context, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearContextsList = function() { + return this.setContextsList([]); +}; + + +/** + * repeated EventEdge event_edges = 4; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getEventEdgesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, proto.ml_metadata.PutLineageSubgraphRequest.EventEdge, 4)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setEventEdgesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 4, value); +}; + + +/** + * @param {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutLineageSubgraphRequest.EventEdge} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.addEventEdges = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 4, opt_value, proto.ml_metadata.PutLineageSubgraphRequest.EventEdge, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearEventEdgesList = function() { + return this.setEventEdgesList([]); +}; + + +/** + * optional Options options = 5; + * @return {?proto.ml_metadata.PutLineageSubgraphRequest.Options} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.PutLineageSubgraphRequest.Options} */ ( + jspb.Message.getWrapperField(this, proto.ml_metadata.PutLineageSubgraphRequest.Options, 5)); +}; + + +/** + * @param {?proto.ml_metadata.PutLineageSubgraphRequest.Options|undefined} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 5, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 6; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 6)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 6, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutLineageSubgraphRequest} returns this + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutLineageSubgraphRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 6) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutLineageSubgraphResponse.repeatedFields_ = [1,2,3]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutLineageSubgraphResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutLineageSubgraphResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphResponse.toObject = function(includeInstance, msg) { + var f, obj = { + executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 3)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} + */ +proto.ml_metadata.PutLineageSubgraphResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutLineageSubgraphResponse; + return proto.ml_metadata.PutLineageSubgraphResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutLineageSubgraphResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} + */ +proto.ml_metadata.PutLineageSubgraphResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addExecutionIds(values[i]); + } + break; + case 2: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactIds(values[i]); + } + break; + case 3: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutLineageSubgraphResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutLineageSubgraphResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutLineageSubgraphResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionIdsList(); + if (f.length > 0) { + writer.writePackedInt64( + 1, + f + ); + } + f = message.getArtifactIdsList(); + if (f.length > 0) { + writer.writePackedInt64( + 2, + f + ); + } + f = message.getContextIdsList(); + if (f.length > 0) { + writer.writePackedInt64( + 3, + f + ); + } +}; + + +/** + * repeated int64 execution_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.getExecutionIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.setExecutionIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.addExecutionIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.clearExecutionIdsList = function() { + return this.setExecutionIdsList([]); +}; + + +/** + * repeated int64 artifact_ids = 2; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.getArtifactIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.setArtifactIdsList = function(value) { + return jspb.Message.setField(this, 2, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.addArtifactIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 2, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.clearArtifactIdsList = function() { + return this.setArtifactIdsList([]); +}; + + +/** + * repeated int64 context_ids = 3; + * @return {!Array} + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.getContextIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.setContextIdsList = function(value) { + return jspb.Message.setField(this, 3, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.addContextIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 3, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutLineageSubgraphResponse} returns this + */ +proto.ml_metadata.PutLineageSubgraphResponse.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutTypesRequest.repeatedFields_ = [1,2,3]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutTypesRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutTypesRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutTypesRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutTypesRequest.toObject = function(includeInstance, msg) { + var f, obj = { + artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), + ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance), + executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), + ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance), + contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), + ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance), + canAddFields: (f = jspb.Message.getBooleanField(msg, 4)) == null ? undefined : f, + canOmitFields: (f = jspb.Message.getBooleanField(msg, 7)) == null ? undefined : f, + canDeleteFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, + allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 6, true), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutTypesRequest} + */ +proto.ml_metadata.PutTypesRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutTypesRequest; + return proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutTypesRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutTypesRequest} + */ +proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.addArtifactTypes(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); + msg.addExecutionTypes(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.addContextTypes(value); + break; + case 4: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanAddFields(value); + break; + case 7: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanOmitFields(value); + break; + case 5: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanDeleteFields(value); + break; + case 6: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setAllFieldsMatch(value); + break; + case 8: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutTypesRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutTypesRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutTypesRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutTypesRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactTypesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ); + } + f = message.getExecutionTypesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter + ); + } + f = message.getContextTypesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeBool( + 4, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 7)); + if (f != null) { + writer.writeBool( + 7, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeBool( + 5, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 6)); + if (f != null) { + writer.writeBool( + 6, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 8, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * repeated ArtifactType artifact_types = 1; + * @return {!Array} + */ +proto.ml_metadata.PutTypesRequest.prototype.getArtifactTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this +*/ +proto.ml_metadata.PutTypesRequest.prototype.setArtifactTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.ArtifactType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ArtifactType} + */ +proto.ml_metadata.PutTypesRequest.prototype.addArtifactTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearArtifactTypesList = function() { + return this.setArtifactTypesList([]); +}; + + +/** + * repeated ExecutionType execution_types = 2; + * @return {!Array} + */ +proto.ml_metadata.PutTypesRequest.prototype.getExecutionTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this +*/ +proto.ml_metadata.PutTypesRequest.prototype.setExecutionTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 2, value); +}; + + +/** + * @param {!proto.ml_metadata.ExecutionType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ExecutionType} + */ +proto.ml_metadata.PutTypesRequest.prototype.addExecutionTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.ExecutionType, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearExecutionTypesList = function() { + return this.setExecutionTypesList([]); +}; + + +/** + * repeated ContextType context_types = 3; + * @return {!Array} + */ +proto.ml_metadata.PutTypesRequest.prototype.getContextTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this +*/ +proto.ml_metadata.PutTypesRequest.prototype.setContextTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 3, value); +}; + + +/** + * @param {!proto.ml_metadata.ContextType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ContextType} + */ +proto.ml_metadata.PutTypesRequest.prototype.addContextTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 3, opt_value, proto.ml_metadata.ContextType, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearContextTypesList = function() { + return this.setContextTypesList([]); +}; + + +/** + * optional bool can_add_fields = 4; + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.getCanAddFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.setCanAddFields = function(value) { + return jspb.Message.setField(this, 4, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearCanAddFields = function() { + return jspb.Message.setField(this, 4, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.hasCanAddFields = function() { + return jspb.Message.getField(this, 4) != null; +}; + + +/** + * optional bool can_omit_fields = 7; + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.getCanOmitFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 7, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.setCanOmitFields = function(value) { + return jspb.Message.setField(this, 7, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearCanOmitFields = function() { + return jspb.Message.setField(this, 7, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.hasCanOmitFields = function() { + return jspb.Message.getField(this, 7) != null; +}; + + +/** + * optional bool can_delete_fields = 5; + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.getCanDeleteFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.setCanDeleteFields = function(value) { + return jspb.Message.setField(this, 5, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearCanDeleteFields = function() { + return jspb.Message.setField(this, 5, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.hasCanDeleteFields = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional bool all_fields_match = 6; + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.getAllFieldsMatch = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 6, true)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.setAllFieldsMatch = function(value) { + return jspb.Message.setField(this, 6, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearAllFieldsMatch = function() { + return jspb.Message.setField(this, 6, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.hasAllFieldsMatch = function() { + return jspb.Message.getField(this, 6) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 8; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.PutTypesRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 8)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutTypesRequest} returns this +*/ +proto.ml_metadata.PutTypesRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 8, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutTypesRequest} returns this + */ +proto.ml_metadata.PutTypesRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutTypesRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 8) != null; +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutTypesResponse.repeatedFields_ = [1,2,3]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutTypesResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutTypesResponse.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutTypesResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutTypesResponse.toObject = function(includeInstance, msg) { + var f, obj = { + artifactTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + executionTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, + contextTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 3)) == null ? undefined : f + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutTypesResponse} + */ +proto.ml_metadata.PutTypesResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutTypesResponse; + return proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutTypesResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutTypesResponse} + */ +proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactTypeIds(values[i]); + } + break; + case 2: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addExecutionTypeIds(values[i]); + } + break; + case 3: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextTypeIds(values[i]); + } + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutTypesResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutTypesResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutTypesResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutTypesResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactTypeIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 1, + f + ); + } + f = message.getExecutionTypeIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 2, + f + ); + } + f = message.getContextTypeIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 3, + f + ); + } +}; + + +/** + * repeated int64 artifact_type_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.PutTypesResponse.prototype.getArtifactTypeIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.setArtifactTypeIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.addArtifactTypeIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.clearArtifactTypeIdsList = function() { + return this.setArtifactTypeIdsList([]); +}; + + +/** + * repeated int64 execution_type_ids = 2; + * @return {!Array} + */ +proto.ml_metadata.PutTypesResponse.prototype.getExecutionTypeIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.setExecutionTypeIdsList = function(value) { + return jspb.Message.setField(this, 2, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.addExecutionTypeIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 2, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.clearExecutionTypeIdsList = function() { + return this.setExecutionTypeIdsList([]); +}; + + +/** + * repeated int64 context_type_ids = 3; + * @return {!Array} + */ +proto.ml_metadata.PutTypesResponse.prototype.getContextTypeIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 3)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.setContextTypeIdsList = function(value) { + return jspb.Message.setField(this, 3, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.addContextTypeIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 3, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutTypesResponse} returns this + */ +proto.ml_metadata.PutTypesResponse.prototype.clearContextTypeIdsList = function() { + return this.setContextTypeIdsList([]); +}; + + + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutContextTypeRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutContextTypeRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.PutContextTypeRequest.toObject = function(includeInstance, msg) { + var f, obj = { + contextType: (f = msg.getContextType()) && ml_metadata_proto_metadata_store_pb.ContextType.toObject(includeInstance, f), + canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, + canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, + canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, + allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; +}; +} + + +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutContextTypeRequest} + */ +proto.ml_metadata.PutContextTypeRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutContextTypeRequest; + return proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader(msg, reader); +}; + + +/** + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutContextTypeRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutContextTypeRequest} + */ +proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.setContextType(value); + break; + case 2: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanAddFields(value); + break; + case 5: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanOmitFields(value); + break; + case 3: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setCanDeleteFields(value); + break; + case 4: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setAllFieldsMatch(value); + break; + case 6: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; +}; + + +/** + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutContextTypeRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; - + /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ArtifactAndType} message + * @param {!proto.ml_metadata.PutContextTypeRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutContextTypeRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifact(); + f = message.getContextType(); if (f != null) { writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter ); } - f = message.getType(); + f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeMessage( + writer.writeBool( 2, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + if (f != null) { + writer.writeBool( + 5, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeBool( + 3, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + if (f != null) { + writer.writeBool( + 4, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 6, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional Artifact artifact = 1; - * @return {?proto.ml_metadata.Artifact} + * optional ContextType context_type = 1; + * @return {?proto.ml_metadata.ContextType} */ -proto.ml_metadata.ArtifactAndType.prototype.getArtifact = function() { - return /** @type{?proto.ml_metadata.Artifact} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.PutContextTypeRequest.prototype.getContextType = function() { + return /** @type{?proto.ml_metadata.ContextType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); }; /** - * @param {?proto.ml_metadata.Artifact|undefined} value - * @return {!proto.ml_metadata.ArtifactAndType} returns this + * @param {?proto.ml_metadata.ContextType|undefined} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactAndType.prototype.setArtifact = function(value) { +proto.ml_metadata.PutContextTypeRequest.prototype.setContextType = function(value) { return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ArtifactAndType} returns this + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactAndType.prototype.clearArtifact = function() { - return this.setArtifact(undefined); +proto.ml_metadata.PutContextTypeRequest.prototype.clearContextType = function() { + return this.setContextType(undefined); }; @@ -2180,36 +8882,107 @@ proto.ml_metadata.ArtifactAndType.prototype.clearArtifact = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ArtifactAndType.prototype.hasArtifact = function() { +proto.ml_metadata.PutContextTypeRequest.prototype.hasContextType = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional ArtifactType type = 2; - * @return {?proto.ml_metadata.ArtifactType} + * optional bool can_add_fields = 2; + * @return {boolean} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.getCanAddFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + */ +proto.ml_metadata.PutContextTypeRequest.prototype.setCanAddFields = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + */ +proto.ml_metadata.PutContextTypeRequest.prototype.clearCanAddFields = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.hasCanAddFields = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional bool can_omit_fields = 5; + * @return {boolean} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.getCanOmitFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +}; + + +/** + * @param {boolean} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + */ +proto.ml_metadata.PutContextTypeRequest.prototype.setCanOmitFields = function(value) { + return jspb.Message.setField(this, 5, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + */ +proto.ml_metadata.PutContextTypeRequest.prototype.clearCanOmitFields = function() { + return jspb.Message.setField(this, 5, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.PutContextTypeRequest.prototype.hasCanOmitFields = function() { + return jspb.Message.getField(this, 5) != null; +}; + + +/** + * optional bool can_delete_fields = 3; + * @return {boolean} */ -proto.ml_metadata.ArtifactAndType.prototype.getType = function() { - return /** @type{?proto.ml_metadata.ArtifactType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 2)); +proto.ml_metadata.PutContextTypeRequest.prototype.getCanDeleteFields = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); }; /** - * @param {?proto.ml_metadata.ArtifactType|undefined} value - * @return {!proto.ml_metadata.ArtifactAndType} returns this -*/ -proto.ml_metadata.ArtifactAndType.prototype.setType = function(value) { - return jspb.Message.setWrapperField(this, 2, value); + * @param {boolean} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + */ +proto.ml_metadata.PutContextTypeRequest.prototype.setCanDeleteFields = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ArtifactAndType} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactAndType.prototype.clearType = function() { - return this.setType(undefined); +proto.ml_metadata.PutContextTypeRequest.prototype.clearCanDeleteFields = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -2217,151 +8990,84 @@ proto.ml_metadata.ArtifactAndType.prototype.clearType = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ArtifactAndType.prototype.hasType = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.PutContextTypeRequest.prototype.hasCanDeleteFields = function() { + return jspb.Message.getField(this, 3) != null; }; - - - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * optional bool all_fields_match = 4; + * @return {boolean} */ -proto.ml_metadata.ArtifactStructMap.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ArtifactStructMap.toObject(opt_includeInstance, this); +proto.ml_metadata.PutContextTypeRequest.prototype.getAllFieldsMatch = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ArtifactStructMap} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * @param {boolean} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactStructMap.toObject = function(includeInstance, msg) { - var f, obj = { - propertiesMap: (f = msg.getPropertiesMap()) ? f.toObject(includeInstance, proto.ml_metadata.ArtifactStruct.toObject) : [] - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.PutContextTypeRequest.prototype.setAllFieldsMatch = function(value) { + return jspb.Message.setField(this, 4, value); }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ArtifactStructMap} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactStructMap.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ArtifactStructMap; - return proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.PutContextTypeRequest.prototype.clearAllFieldsMatch = function() { + return jspb.Message.setField(this, 4, undefined); }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.ArtifactStructMap} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ArtifactStructMap} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = msg.getPropertiesMap(); - reader.readMessage(value, function(message, reader) { - jspb.Map.deserializeBinary(message, reader, jspb.BinaryReader.prototype.readString, jspb.BinaryReader.prototype.readMessage, proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader, "", new proto.ml_metadata.ArtifactStruct()); - }); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.PutContextTypeRequest.prototype.hasAllFieldsMatch = function() { + return jspb.Message.getField(this, 4) != null; }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * optional TransactionOptions transaction_options = 6; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.ArtifactStructMap.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.PutContextTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 6)); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ArtifactStructMap} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getPropertiesMap(true); - if (f && f.getLength() > 0) { - f.serializeBinary(1, writer, jspb.BinaryWriter.prototype.writeString, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter); - } + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this +*/ +proto.ml_metadata.PutContextTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 6, value); }; /** - * map properties = 1; - * @param {boolean=} opt_noLazyCreate Do not create the map if - * empty, instead returning `undefined` - * @return {!jspb.Map} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeRequest} returns this */ -proto.ml_metadata.ArtifactStructMap.prototype.getPropertiesMap = function(opt_noLazyCreate) { - return /** @type {!jspb.Map} */ ( - jspb.Message.getMapField(this, 1, opt_noLazyCreate, - proto.ml_metadata.ArtifactStruct)); +proto.ml_metadata.PutContextTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * Clears values from the map. The map will be non-null. - * @return {!proto.ml_metadata.ArtifactStructMap} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ArtifactStructMap.prototype.clearPropertiesMap = function() { - this.getPropertiesMap().clear(); - return this;}; - +proto.ml_metadata.PutContextTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 6) != null; +}; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.ArtifactStructList.repeatedFields_ = [1]; @@ -2378,8 +9084,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.ArtifactStructList.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ArtifactStructList.toObject(opt_includeInstance, this); +proto.ml_metadata.PutContextTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutContextTypeResponse.toObject(opt_includeInstance, this); }; @@ -2388,14 +9094,13 @@ proto.ml_metadata.ArtifactStructList.prototype.toObject = function(opt_includeIn * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ArtifactStructList} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutContextTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ArtifactStructList.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutContextTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - elementsList: jspb.Message.toObjectList(msg.getElementsList(), - proto.ml_metadata.ArtifactStruct.toObject, includeInstance) + typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f }; if (includeInstance) { @@ -2409,23 +9114,23 @@ proto.ml_metadata.ArtifactStructList.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ArtifactStructList} + * @return {!proto.ml_metadata.PutContextTypeResponse} */ -proto.ml_metadata.ArtifactStructList.deserializeBinary = function(bytes) { +proto.ml_metadata.PutContextTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ArtifactStructList; - return proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutContextTypeResponse; + return proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.ArtifactStructList} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutContextTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ArtifactStructList} + * @return {!proto.ml_metadata.PutContextTypeResponse} */ -proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -2433,9 +9138,8 @@ proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader = function(msg, var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new proto.ml_metadata.ArtifactStruct; - reader.readMessage(value,proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader); - msg.addElements(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setTypeId(value); break; default: reader.skipField(); @@ -2450,9 +9154,9 @@ proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader = function(msg, * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.ArtifactStructList.prototype.serializeBinary = function() { +proto.ml_metadata.PutContextTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PutContextTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -2460,88 +9164,65 @@ proto.ml_metadata.ArtifactStructList.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ArtifactStructList} message + * @param {!proto.ml_metadata.PutContextTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutContextTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getElementsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( 1, - f, - proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter + f ); } }; /** - * repeated ArtifactStruct elements = 1; - * @return {!Array} - */ -proto.ml_metadata.ArtifactStructList.prototype.getElementsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, proto.ml_metadata.ArtifactStruct, 1)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.ArtifactStructList} returns this -*/ -proto.ml_metadata.ArtifactStructList.prototype.setElementsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); -}; - - -/** - * @param {!proto.ml_metadata.ArtifactStruct=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ArtifactStruct} + * optional int64 type_id = 1; + * @return {number} */ -proto.ml_metadata.ArtifactStructList.prototype.addElements = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactStruct, opt_index); +proto.ml_metadata.PutContextTypeResponse.prototype.getTypeId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.ArtifactStructList} returns this + * @param {number} value + * @return {!proto.ml_metadata.PutContextTypeResponse} returns this */ -proto.ml_metadata.ArtifactStructList.prototype.clearElementsList = function() { - return this.setElementsList([]); +proto.ml_metadata.PutContextTypeResponse.prototype.setTypeId = function(value) { + return jspb.Message.setField(this, 1, value); }; - /** - * Oneof group definitions for this message. Each group defines the field - * numbers belonging to that group. When of these fields' value is set, all - * other fields in the group are cleared. During deserialization, if multiple - * fields are encountered for a group, only the last value seen will be kept. - * @private {!Array>} - * @const + * Clears the field making it undefined. + * @return {!proto.ml_metadata.PutContextTypeResponse} returns this */ -proto.ml_metadata.ArtifactStruct.oneofGroups_ = [[1,2,3]]; +proto.ml_metadata.PutContextTypeResponse.prototype.clearTypeId = function() { + return jspb.Message.setField(this, 1, undefined); +}; + /** - * @enum {number} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.ArtifactStruct.ValueCase = { - VALUE_NOT_SET: 0, - ARTIFACT: 1, - MAP: 2, - LIST: 3 +proto.ml_metadata.PutContextTypeResponse.prototype.hasTypeId = function() { + return jspb.Message.getField(this, 1) != null; }; + + /** - * @return {proto.ml_metadata.ArtifactStruct.ValueCase} + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.ArtifactStruct.prototype.getValueCase = function() { - return /** @type {proto.ml_metadata.ArtifactStruct.ValueCase} */(jspb.Message.computeOneofCase(this, proto.ml_metadata.ArtifactStruct.oneofGroups_[0])); -}; +proto.ml_metadata.PutContextsRequest.repeatedFields_ = [1]; @@ -2558,8 +9239,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.ArtifactStruct.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.ArtifactStruct.toObject(opt_includeInstance, this); +proto.ml_metadata.PutContextsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutContextsRequest.toObject(opt_includeInstance, this); }; @@ -2568,15 +9249,16 @@ proto.ml_metadata.ArtifactStruct.prototype.toObject = function(opt_includeInstan * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.ArtifactStruct} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutContextsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ArtifactStruct.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutContextsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifact: (f = msg.getArtifact()) && proto.ml_metadata.ArtifactAndType.toObject(includeInstance, f), - map: (f = msg.getMap()) && proto.ml_metadata.ArtifactStructMap.toObject(includeInstance, f), - list: (f = msg.getList()) && proto.ml_metadata.ArtifactStructList.toObject(includeInstance, f) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f), + updateMask: (f = msg.getUpdateMask()) && google_protobuf_field_mask_pb.FieldMask.toObject(includeInstance, f) }; if (includeInstance) { @@ -2590,23 +9272,23 @@ proto.ml_metadata.ArtifactStruct.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.ArtifactStruct} + * @return {!proto.ml_metadata.PutContextsRequest} */ -proto.ml_metadata.ArtifactStruct.deserializeBinary = function(bytes) { +proto.ml_metadata.PutContextsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.ArtifactStruct; - return proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutContextsRequest; + return proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.ArtifactStruct} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutContextsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.ArtifactStruct} + * @return {!proto.ml_metadata.PutContextsRequest} */ -proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -2614,19 +9296,19 @@ proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader = function(msg, rea var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new proto.ml_metadata.ArtifactAndType; - reader.readMessage(value,proto.ml_metadata.ArtifactAndType.deserializeBinaryFromReader); - msg.setArtifact(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); break; case 2: - var value = new proto.ml_metadata.ArtifactStructMap; - reader.readMessage(value,proto.ml_metadata.ArtifactStructMap.deserializeBinaryFromReader); - msg.setMap(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; case 3: - var value = new proto.ml_metadata.ArtifactStructList; - reader.readMessage(value,proto.ml_metadata.ArtifactStructList.deserializeBinaryFromReader); - msg.setList(value); + var value = new google_protobuf_field_mask_pb.FieldMask; + reader.readMessage(value,google_protobuf_field_mask_pb.FieldMask.deserializeBinaryFromReader); + msg.setUpdateMask(value); break; default: reader.skipField(); @@ -2641,9 +9323,9 @@ proto.ml_metadata.ArtifactStruct.deserializeBinaryFromReader = function(msg, rea * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.ArtifactStruct.prototype.serializeBinary = function() { +proto.ml_metadata.PutContextsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PutContextsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -2651,101 +9333,102 @@ proto.ml_metadata.ArtifactStruct.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.ArtifactStruct} message + * @param {!proto.ml_metadata.PutContextsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.ArtifactStruct.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutContextsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifact(); - if (f != null) { - writer.writeMessage( + f = message.getContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, - proto.ml_metadata.ArtifactAndType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } - f = message.getMap(); + f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( 2, f, - proto.ml_metadata.ArtifactStructMap.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } - f = message.getList(); + f = message.getUpdateMask(); if (f != null) { writer.writeMessage( 3, f, - proto.ml_metadata.ArtifactStructList.serializeBinaryToWriter + google_protobuf_field_mask_pb.FieldMask.serializeBinaryToWriter ); } }; /** - * optional ArtifactAndType artifact = 1; - * @return {?proto.ml_metadata.ArtifactAndType} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.ArtifactStruct.prototype.getArtifact = function() { - return /** @type{?proto.ml_metadata.ArtifactAndType} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactAndType, 1)); +proto.ml_metadata.PutContextsRequest.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {?proto.ml_metadata.ArtifactAndType|undefined} value - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.setArtifact = function(value) { - return jspb.Message.setOneofWrapperField(this, 1, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +proto.ml_metadata.PutContextsRequest.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.ArtifactStruct.prototype.clearArtifact = function() { - return this.setArtifact(undefined); +proto.ml_metadata.PutContextsRequest.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.hasArtifact = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.PutContextsRequest.prototype.clearContextsList = function() { + return this.setContextsList([]); }; /** - * optional ArtifactStructMap map = 2; - * @return {?proto.ml_metadata.ArtifactStructMap} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.ArtifactStruct.prototype.getMap = function() { - return /** @type{?proto.ml_metadata.ArtifactStructMap} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactStructMap, 2)); +proto.ml_metadata.PutContextsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {?proto.ml_metadata.ArtifactStructMap|undefined} value - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.setMap = function(value) { - return jspb.Message.setOneofWrapperField(this, 2, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +proto.ml_metadata.PutContextsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.clearMap = function() { - return this.setMap(undefined); +proto.ml_metadata.PutContextsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -2753,36 +9436,36 @@ proto.ml_metadata.ArtifactStruct.prototype.clearMap = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ArtifactStruct.prototype.hasMap = function() { +proto.ml_metadata.PutContextsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; /** - * optional ArtifactStructList list = 3; - * @return {?proto.ml_metadata.ArtifactStructList} + * optional google.protobuf.FieldMask update_mask = 3; + * @return {?proto.google.protobuf.FieldMask} */ -proto.ml_metadata.ArtifactStruct.prototype.getList = function() { - return /** @type{?proto.ml_metadata.ArtifactStructList} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.ArtifactStructList, 3)); +proto.ml_metadata.PutContextsRequest.prototype.getUpdateMask = function() { + return /** @type{?proto.google.protobuf.FieldMask} */ ( + jspb.Message.getWrapperField(this, google_protobuf_field_mask_pb.FieldMask, 3)); }; /** - * @param {?proto.ml_metadata.ArtifactStructList|undefined} value - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @param {?proto.google.protobuf.FieldMask|undefined} value + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.setList = function(value) { - return jspb.Message.setOneofWrapperField(this, 3, proto.ml_metadata.ArtifactStruct.oneofGroups_[0], value); +proto.ml_metadata.PutContextsRequest.prototype.setUpdateMask = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.ArtifactStruct} returns this + * @return {!proto.ml_metadata.PutContextsRequest} returns this */ -proto.ml_metadata.ArtifactStruct.prototype.clearList = function() { - return this.setList(undefined); +proto.ml_metadata.PutContextsRequest.prototype.clearUpdateMask = function() { + return this.setUpdateMask(undefined); }; @@ -2790,7 +9473,7 @@ proto.ml_metadata.ArtifactStruct.prototype.clearList = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.ArtifactStruct.prototype.hasList = function() { +proto.ml_metadata.PutContextsRequest.prototype.hasUpdateMask = function() { return jspb.Message.getField(this, 3) != null; }; @@ -2801,7 +9484,7 @@ proto.ml_metadata.ArtifactStruct.prototype.hasList = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutArtifactsRequest.repeatedFields_ = [1]; +proto.ml_metadata.PutContextsResponse.repeatedFields_ = [1]; @@ -2818,8 +9501,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutArtifactsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutArtifactsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.PutContextsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutContextsResponse.toObject(opt_includeInstance, this); }; @@ -2828,15 +9511,13 @@ proto.ml_metadata.PutArtifactsRequest.prototype.toObject = function(opt_includeI * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutArtifactsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutContextsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutContextsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), - options: (f = msg.getOptions()) && proto.ml_metadata.PutArtifactsRequest.Options.toObject(includeInstance, f) + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f }; if (includeInstance) { @@ -2850,23 +9531,23 @@ proto.ml_metadata.PutArtifactsRequest.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutArtifactsRequest} + * @return {!proto.ml_metadata.PutContextsResponse} */ -proto.ml_metadata.PutArtifactsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.PutContextsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutArtifactsRequest; - return proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutContextsResponse; + return proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutArtifactsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutContextsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutArtifactsRequest} + * @return {!proto.ml_metadata.PutContextsResponse} */ -proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -2874,14 +9555,10 @@ proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader = function(msg var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); - break; - case 2: - var value = new proto.ml_metadata.PutArtifactsRequest.Options; - reader.readMessage(value,proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader); - msg.setOptions(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextIds(values[i]); + } break; default: reader.skipField(); @@ -2896,9 +9573,9 @@ proto.ml_metadata.PutArtifactsRequest.deserializeBinaryFromReader = function(msg * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutArtifactsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.PutContextsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutArtifactsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PutContextsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -2906,31 +9583,66 @@ proto.ml_metadata.PutArtifactsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutArtifactsRequest} message + * @param {!proto.ml_metadata.PutContextsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutContextsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getContextIdsList(); if (f.length > 0) { - writer.writeRepeatedMessage( + writer.writeRepeatedInt64( 1, - f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter - ); - } - f = message.getOptions(); - if (f != null) { - writer.writeMessage( - 2, - f, - proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter + f ); } }; +/** + * repeated int64 context_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.PutContextsResponse.prototype.getContextIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.PutContextsResponse} returns this + */ +proto.ml_metadata.PutContextsResponse.prototype.setContextIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.PutContextsResponse} returns this + */ +proto.ml_metadata.PutContextsResponse.prototype.addContextIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutContextsResponse} returns this + */ +proto.ml_metadata.PutContextsResponse.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); +}; + + + +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.PutAttributionsAndAssociationsRequest.repeatedFields_ = [1,2]; @@ -2947,8 +9659,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutArtifactsRequest.Options.toObject(opt_includeInstance, this); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutAttributionsAndAssociationsRequest.toObject(opt_includeInstance, this); }; @@ -2957,13 +9669,17 @@ proto.ml_metadata.PutArtifactsRequest.Options.prototype.toObject = function(opt_ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutArtifactsRequest.Options} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsRequest.Options.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutAttributionsAndAssociationsRequest.toObject = function(includeInstance, msg) { var f, obj = { - abortIfLatestUpdatedTimeChanged: (f = jspb.Message.getBooleanField(msg, 1)) == null ? undefined : f + attributionsList: jspb.Message.toObjectList(msg.getAttributionsList(), + ml_metadata_proto_metadata_store_pb.Attribution.toObject, includeInstance), + associationsList: jspb.Message.toObjectList(msg.getAssociationsList(), + ml_metadata_proto_metadata_store_pb.Association.toObject, includeInstance), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -2977,23 +9693,23 @@ proto.ml_metadata.PutArtifactsRequest.Options.toObject = function(includeInstanc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutArtifactsRequest.Options} + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} */ -proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinary = function(bytes) { +proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutArtifactsRequest.Options; - return proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutAttributionsAndAssociationsRequest; + return proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutArtifactsRequest.Options} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutArtifactsRequest.Options} + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} */ -proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -3001,8 +9717,19 @@ proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader = func var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setAbortIfLatestUpdatedTimeChanged(value); + var value = new ml_metadata_proto_metadata_store_pb.Attribution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Attribution.deserializeBinaryFromReader); + msg.addAttributions(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.Association; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Association.deserializeBinaryFromReader); + msg.addAssociations(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -3017,9 +9744,9 @@ proto.ml_metadata.PutArtifactsRequest.Options.deserializeBinaryFromReader = func * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.serializeBinary = function() { +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PutAttributionsAndAssociationsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -3027,121 +9754,140 @@ proto.ml_metadata.PutArtifactsRequest.Options.prototype.serializeBinary = functi /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutArtifactsRequest.Options} message + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsRequest.Options.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutAttributionsAndAssociationsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {boolean} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeBool( + f = message.getAttributionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.Attribution.serializeBinaryToWriter + ); + } + f = message.getAssociationsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.Association.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional bool abort_if_latest_updated_time_changed = 1; - * @return {boolean} + * repeated Attribution attributions = 1; + * @return {!Array} */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.getAbortIfLatestUpdatedTimeChanged = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 1, false)); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.getAttributionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Attribution, 1)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutArtifactsRequest.Options} returns this - */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.setAbortIfLatestUpdatedTimeChanged = function(value) { - return jspb.Message.setField(this, 1, value); + * @param {!Array} value + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this +*/ +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.setAttributionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactsRequest.Options} returns this + * @param {!proto.ml_metadata.Attribution=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Attribution} */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.clearAbortIfLatestUpdatedTimeChanged = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.addAttributions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Attribution, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this */ -proto.ml_metadata.PutArtifactsRequest.Options.prototype.hasAbortIfLatestUpdatedTimeChanged = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.clearAttributionsList = function() { + return this.setAttributionsList([]); }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated Association associations = 2; + * @return {!Array} */ -proto.ml_metadata.PutArtifactsRequest.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.getAssociationsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Association, 2)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this */ -proto.ml_metadata.PutArtifactsRequest.prototype.setArtifactsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.setAssociationsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 2, value); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {!proto.ml_metadata.Association=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.Association} */ -proto.ml_metadata.PutArtifactsRequest.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.addAssociations = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.Association, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this */ -proto.ml_metadata.PutArtifactsRequest.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.clearAssociationsList = function() { + return this.setAssociationsList([]); }; /** - * optional Options options = 2; - * @return {?proto.ml_metadata.PutArtifactsRequest.Options} + * optional TransactionOptions transaction_options = 3; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutArtifactsRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.PutArtifactsRequest.Options} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.PutArtifactsRequest.Options, 2)); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; /** - * @param {?proto.ml_metadata.PutArtifactsRequest.Options|undefined} value - * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this */ -proto.ml_metadata.PutArtifactsRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutArtifactsRequest} returns this + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this */ -proto.ml_metadata.PutArtifactsRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -3149,19 +9895,12 @@ proto.ml_metadata.PutArtifactsRequest.prototype.clearOptions = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutArtifactsRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.PutArtifactsResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -3177,8 +9916,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutArtifactsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutArtifactsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutAttributionsAndAssociationsResponse.toObject(opt_includeInstance, this); }; @@ -3187,13 +9926,13 @@ proto.ml_metadata.PutArtifactsResponse.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutArtifactsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutAttributionsAndAssociationsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f + }; if (includeInstance) { @@ -3207,111 +9946,68 @@ proto.ml_metadata.PutArtifactsResponse.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutArtifactsResponse} + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} */ -proto.ml_metadata.PutArtifactsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutArtifactsResponse; - return proto.ml_metadata.PutArtifactsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutAttributionsAndAssociationsResponse; + return proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutArtifactsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutArtifactsResponse} + * @return {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} */ -proto.ml_metadata.PutArtifactsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addArtifactIds(values[i]); - } - break; default: reader.skipField(); break; } } - return msg; -}; - - -/** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} - */ -proto.ml_metadata.PutArtifactsResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutArtifactsResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); -}; - - -/** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutArtifactsResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.PutArtifactsResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getArtifactIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 1, - f - ); - } -}; - - -/** - * repeated int64 artifact_ids = 1; - * @return {!Array} - */ -proto.ml_metadata.PutArtifactsResponse.prototype.getArtifactIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); + return msg; }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.PutArtifactsResponse.prototype.setArtifactIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); +proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutAttributionsAndAssociationsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactsResponse.prototype.addArtifactIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.PutAttributionsAndAssociationsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; }; + /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutArtifactsResponse} returns this + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.PutArtifactsResponse.prototype.clearArtifactIdsList = function() { - return this.setArtifactIdsList([]); -}; - - +proto.ml_metadata.PutParentContextsRequest.repeatedFields_ = [1]; @@ -3328,8 +10024,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutArtifactTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.PutParentContextsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutParentContextsRequest.toObject(opt_includeInstance, this); }; @@ -3338,17 +10034,15 @@ proto.ml_metadata.PutArtifactTypeRequest.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutArtifactTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.PutParentContextsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.PutParentContextsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactType: (f = msg.getArtifactType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f), - canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, - canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, - canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, - allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true) + parentContextsList: jspb.Message.toObjectList(msg.getParentContextsList(), + ml_metadata_proto_metadata_store_pb.ParentContext.toObject, includeInstance), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -3362,23 +10056,23 @@ proto.ml_metadata.PutArtifactTypeRequest.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} + * @return {!proto.ml_metadata.PutParentContextsRequest} */ -proto.ml_metadata.PutArtifactTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.PutParentContextsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutArtifactTypeRequest; - return proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.PutParentContextsRequest; + return proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutArtifactTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.PutParentContextsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} + * @return {!proto.ml_metadata.PutParentContextsRequest} */ -proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -3386,25 +10080,14 @@ proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.setArtifactType(value); + var value = new ml_metadata_proto_metadata_store_pb.ParentContext; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ParentContext.deserializeBinaryFromReader); + msg.addParentContexts(value); break; case 2: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanAddFields(value); - break; - case 5: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanOmitFields(value); - break; - case 3: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanDeleteFields(value); - break; - case 4: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setAllFieldsMatch(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -3419,9 +10102,9 @@ proto.ml_metadata.PutArtifactTypeRequest.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.PutParentContextsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutArtifactTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.PutParentContextsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -3429,112 +10112,94 @@ proto.ml_metadata.PutArtifactTypeRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutArtifactTypeRequest} message + * @param {!proto.ml_metadata.PutParentContextsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.PutParentContextsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactType(); - if (f != null) { - writer.writeMessage( + f = message.getParentContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ParentContext.serializeBinaryToWriter ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); + f = message.getTransactionOptions(); if (f != null) { - writer.writeBool( + writer.writeMessage( 2, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); - if (f != null) { - writer.writeBool( - 5, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeBool( - 3, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); - if (f != null) { - writer.writeBool( - 4, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional ArtifactType artifact_type = 1; - * @return {?proto.ml_metadata.ArtifactType} + * repeated ParentContext parent_contexts = 1; + * @return {!Array} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.getArtifactType = function() { - return /** @type{?proto.ml_metadata.ArtifactType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +proto.ml_metadata.PutParentContextsRequest.prototype.getParentContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ParentContext, 1)); }; /** - * @param {?proto.ml_metadata.ArtifactType|undefined} value - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.PutParentContextsRequest} returns this */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.setArtifactType = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.PutParentContextsRequest.prototype.setParentContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * @param {!proto.ml_metadata.ParentContext=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ParentContext} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.clearArtifactType = function() { - return this.setArtifactType(undefined); +proto.ml_metadata.PutParentContextsRequest.prototype.addParentContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ParentContext, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.PutParentContextsRequest} returns this */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.hasArtifactType = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.PutParentContextsRequest.prototype.clearParentContextsList = function() { + return this.setParentContextsList([]); }; /** - * optional bool can_add_fields = 2; - * @return {boolean} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanAddFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +proto.ml_metadata.PutParentContextsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanAddFields = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.PutParentContextsRequest} returns this +*/ +proto.ml_metadata.PutParentContextsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.PutParentContextsRequest} returns this */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanAddFields = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.PutParentContextsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -3542,116 +10207,109 @@ proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanAddFields = function( * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanAddFields = function() { +proto.ml_metadata.PutParentContextsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; -/** - * optional bool can_omit_fields = 5; - * @return {boolean} - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanOmitFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); -}; - - -/** - * @param {boolean} value - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanOmitFields = function(value) { - return jspb.Message.setField(this, 5, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanOmitFields = function() { - return jspb.Message.setField(this, 5, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanOmitFields = function() { - return jspb.Message.getField(this, 5) != null; -}; - - -/** - * optional bool can_delete_fields = 3; - * @return {boolean} - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.getCanDeleteFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); -}; - -/** - * @param {boolean} value - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this - */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.setCanDeleteFields = function(value) { - return jspb.Message.setField(this, 3, value); -}; +if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.clearCanDeleteFields = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.PutParentContextsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.PutParentContextsResponse.toObject(opt_includeInstance, this); }; /** - * Returns whether this field is set. - * @return {boolean} + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.PutParentContextsResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.hasCanDeleteFields = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.PutParentContextsResponse.toObject = function(includeInstance, msg) { + var f, obj = { + + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * optional bool all_fields_match = 4; - * @return {boolean} + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.PutParentContextsResponse} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.getAllFieldsMatch = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); +proto.ml_metadata.PutParentContextsResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.PutParentContextsResponse; + return proto.ml_metadata.PutParentContextsResponse.deserializeBinaryFromReader(msg, reader); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.PutParentContextsResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.PutParentContextsResponse} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.setAllFieldsMatch = function(value) { - return jspb.Message.setField(this, 4, value); +proto.ml_metadata.PutParentContextsResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeRequest} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.clearAllFieldsMatch = function() { - return jspb.Message.setField(this, 4, undefined); +proto.ml_metadata.PutParentContextsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.PutParentContextsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.PutParentContextsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactTypeRequest.prototype.hasAllFieldsMatch = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.PutParentContextsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; }; @@ -3671,8 +10329,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutArtifactTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByTypeRequest.toObject(opt_includeInstance, this); }; @@ -3681,13 +10339,16 @@ proto.ml_metadata.PutArtifactTypeResponse.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutArtifactTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutArtifactTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -3701,23 +10362,23 @@ proto.ml_metadata.PutArtifactTypeResponse.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutArtifactTypeResponse} + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} */ -proto.ml_metadata.PutArtifactTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutArtifactTypeResponse; - return proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByTypeRequest; + return proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutArtifactTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutArtifactTypeResponse} + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} */ -proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -3725,8 +10386,22 @@ proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setTypeId(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 4: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -3738,59 +10413,192 @@ proto.ml_metadata.PutArtifactTypeResponse.deserializeBinaryFromReader = function /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} - */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutArtifactTypeResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetArtifactsByTypeRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.GetArtifactsByTypeRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 4, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * optional string type_name = 1; + * @return {string} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional string type_version = 2; + * @return {string} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional ListOperationOptions options = 3; + * @return {?proto.ml_metadata.ListOperationOptions} + */ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutArtifactTypeResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this */ -proto.ml_metadata.PutArtifactTypeResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; /** - * optional int64 type_id = 1; - * @return {number} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.getTypeId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * @param {number} value - * @return {!proto.ml_metadata.PutArtifactTypeResponse} returns this + * optional TransactionOptions transaction_options = 4; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.setTypeId = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutArtifactTypeResponse} returns this + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.clearTypeId = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -3798,8 +10606,8 @@ proto.ml_metadata.PutArtifactTypeResponse.prototype.clearTypeId = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutArtifactTypeResponse.prototype.hasTypeId = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 4) != null; }; @@ -3809,7 +10617,7 @@ proto.ml_metadata.PutArtifactTypeResponse.prototype.hasTypeId = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutExecutionsRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactsByTypeResponse.repeatedFields_ = [1]; @@ -3826,8 +10634,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByTypeResponse.toObject(opt_includeInstance, this); }; @@ -3836,14 +10644,15 @@ proto.ml_metadata.PutExecutionsRequest.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), - ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance) + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -3857,23 +10666,23 @@ proto.ml_metadata.PutExecutionsRequest.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionsRequest} + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} */ -proto.ml_metadata.PutExecutionsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionsRequest; - return proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByTypeResponse; + return proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionsRequest} + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} */ -proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -3881,9 +10690,13 @@ proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader = function(ms var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.addExecutions(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -3898,9 +10711,9 @@ proto.ml_metadata.PutExecutionsRequest.deserializeBinaryFromReader = function(ms * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -3908,216 +10721,101 @@ proto.ml_metadata.PutExecutionsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionsRequest} message + * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionsList(); + f = message.getArtifactsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f ); } }; /** - * repeated Execution executions = 1; - * @return {!Array} + * repeated Artifact artifacts = 1; + * @return {!Array} */ -proto.ml_metadata.PutExecutionsRequest.prototype.getExecutionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionsRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this */ -proto.ml_metadata.PutExecutionsRequest.prototype.setExecutionsList = function(value) { +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.setArtifactsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Execution=} opt_value + * @param {!proto.ml_metadata.Artifact=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Execution} - */ -proto.ml_metadata.PutExecutionsRequest.prototype.addExecutions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionsRequest} returns this - */ -proto.ml_metadata.PutExecutionsRequest.prototype.clearExecutionsList = function() { - return this.setExecutionsList([]); -}; - - - -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.PutExecutionsResponse.repeatedFields_ = [1]; - - - -if (jspb.Message.GENERATE_TO_OBJECT) { -/** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} - */ -proto.ml_metadata.PutExecutionsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionsResponse.toObject(opt_includeInstance, this); -}; - - -/** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionsResponse} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.PutExecutionsResponse.toObject = function(includeInstance, msg) { - var f, obj = { - executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; -}; -} - - -/** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionsResponse} - */ -proto.ml_metadata.PutExecutionsResponse.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionsResponse; - return proto.ml_metadata.PutExecutionsResponse.deserializeBinaryFromReader(msg, reader); -}; - - -/** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionsResponse} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionsResponse} - */ -proto.ml_metadata.PutExecutionsResponse.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addExecutionIds(values[i]); - } - break; - default: - reader.skipField(); - break; - } - } - return msg; -}; - - -/** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * @return {!proto.ml_metadata.Artifact} */ -proto.ml_metadata.PutExecutionsResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionsResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionsResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this */ -proto.ml_metadata.PutExecutionsResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getExecutionIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 1, - f - ); - } +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); }; /** - * repeated int64 execution_ids = 1; - * @return {!Array} + * optional string next_page_token = 2; + * @return {string} */ -proto.ml_metadata.PutExecutionsResponse.prototype.getExecutionIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this */ -proto.ml_metadata.PutExecutionsResponse.prototype.setExecutionIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this */ -proto.ml_metadata.PutExecutionsResponse.prototype.addExecutionIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionsResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutExecutionsResponse.prototype.clearExecutionIdsList = function() { - return this.setExecutionIdsList([]); +proto.ml_metadata.GetArtifactsByTypeResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -4137,8 +10835,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactByTypeAndNameRequest.toObject(opt_includeInstance, this); }; @@ -4147,17 +10845,16 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.toObject = function(includeInstance, msg) { var f, obj = { - executionType: (f = msg.getExecutionType()) && ml_metadata_proto_metadata_store_pb.ExecutionType.toObject(includeInstance, f), - canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, - canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, - canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, - allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true) + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + artifactName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -4171,23 +10868,23 @@ proto.ml_metadata.PutExecutionTypeRequest.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} */ -proto.ml_metadata.PutExecutionTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionTypeRequest; - return proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactByTypeAndNameRequest; + return proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} */ -proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -4195,25 +10892,21 @@ proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); - msg.setExecutionType(value); - break; - case 2: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanAddFields(value); - break; - case 5: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanOmitFields(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); break; case 3: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanDeleteFields(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setArtifactName(value); break; case 4: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setAllFieldsMatch(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -4228,9 +10921,9 @@ proto.ml_metadata.PutExecutionTypeRequest.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactByTypeAndNameRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -4238,112 +10931,68 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionTypeRequest} message + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionType(); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeMessage( + writer.writeString( 1, - f, - ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeBool( - 2, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + f = /** @type {string} */ (jspb.Message.getField(message, 3)); if (f != null) { - writer.writeBool( - 5, + writer.writeString( + 3, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeBool( - 3, + writer.writeString( + 2, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); + f = message.getTransactionOptions(); if (f != null) { - writer.writeBool( + writer.writeMessage( 4, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional ExecutionType execution_type = 1; - * @return {?proto.ml_metadata.ExecutionType} - */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.getExecutionType = function() { - return /** @type{?proto.ml_metadata.ExecutionType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); -}; - - -/** - * @param {?proto.ml_metadata.ExecutionType|undefined} value - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this -*/ -proto.ml_metadata.PutExecutionTypeRequest.prototype.setExecutionType = function(value) { - return jspb.Message.setWrapperField(this, 1, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this - */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.clearExecutionType = function() { - return this.setExecutionType(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.hasExecutionType = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional bool can_add_fields = 2; - * @return {boolean} + * optional string type_name = 1; + * @return {string} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanAddFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanAddFields = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanAddFields = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -4351,35 +11000,35 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanAddFields = function * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanAddFields = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional bool can_omit_fields = 5; - * @return {boolean} + * optional string type_version = 3; + * @return {string} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanOmitFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanOmitFields = function(value) { - return jspb.Message.setField(this, 5, value); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 3, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanOmitFields = function() { - return jspb.Message.setField(this, 5, undefined); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 3, undefined); }; @@ -4387,35 +11036,35 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanOmitFields = functio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanOmitFields = function() { - return jspb.Message.getField(this, 5) != null; +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional bool can_delete_fields = 3; - * @return {boolean} + * optional string artifact_name = 2; + * @return {string} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.getCanDeleteFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getArtifactName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.setCanDeleteFields = function(value) { - return jspb.Message.setField(this, 3, value); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setArtifactName = function(value) { + return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanDeleteFields = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearArtifactName = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -4423,35 +11072,36 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.clearCanDeleteFields = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.hasCanDeleteFields = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasArtifactName = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * optional bool all_fields_match = 4; - * @return {boolean} + * optional TransactionOptions transaction_options = 4; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.getAllFieldsMatch = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this - */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.setAllFieldsMatch = function(value) { - return jspb.Message.setField(this, 4, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this +*/ +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.clearAllFieldsMatch = function() { - return jspb.Message.setField(this, 4, undefined); +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -4459,7 +11109,7 @@ proto.ml_metadata.PutExecutionTypeRequest.prototype.clearAllFieldsMatch = functi * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionTypeRequest.prototype.hasAllFieldsMatch = function() { +proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 4) != null; }; @@ -4480,8 +11130,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactByTypeAndNameResponse.toObject(opt_includeInstance, this); }; @@ -4490,13 +11140,13 @@ proto.ml_metadata.PutExecutionTypeResponse.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.toObject = function(includeInstance, msg) { var f, obj = { - typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f) }; if (includeInstance) { @@ -4510,23 +11160,23 @@ proto.ml_metadata.PutExecutionTypeResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionTypeResponse} + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} */ -proto.ml_metadata.PutExecutionTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionTypeResponse; - return proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactByTypeAndNameResponse; + return proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionTypeResponse} + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} */ -proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -4534,8 +11184,9 @@ proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setTypeId(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.setArtifact(value); break; default: reader.skipField(); @@ -4550,9 +11201,9 @@ proto.ml_metadata.PutExecutionTypeResponse.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactByTypeAndNameResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -4560,46 +11211,48 @@ proto.ml_metadata.PutExecutionTypeResponse.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionTypeResponse} message + * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); + f = message.getArtifact(); if (f != null) { - writer.writeInt64( + writer.writeMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter ); } }; /** - * optional int64 type_id = 1; - * @return {number} + * optional Artifact artifact = 1; + * @return {?proto.ml_metadata.Artifact} */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.getTypeId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.getArtifact = function() { + return /** @type{?proto.ml_metadata.Artifact} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {number} value - * @return {!proto.ml_metadata.PutExecutionTypeResponse} returns this - */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.setTypeId = function(value) { - return jspb.Message.setField(this, 1, value); + * @param {?proto.ml_metadata.Artifact|undefined} value + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} returns this +*/ +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.setArtifact = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionTypeResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} returns this */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.clearTypeId = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.clearArtifact = function() { + return this.setArtifact(undefined); }; @@ -4607,7 +11260,7 @@ proto.ml_metadata.PutExecutionTypeResponse.prototype.clearTypeId = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionTypeResponse.prototype.hasTypeId = function() { +proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.hasArtifact = function() { return jspb.Message.getField(this, 1) != null; }; @@ -4618,7 +11271,7 @@ proto.ml_metadata.PutExecutionTypeResponse.prototype.hasTypeId = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutEventsRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactsByIDRequest.repeatedFields_ = [1]; @@ -4635,8 +11288,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutEventsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutEventsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByIDRequest.toObject(opt_includeInstance, this); }; @@ -4645,14 +11298,15 @@ proto.ml_metadata.PutEventsRequest.prototype.toObject = function(opt_includeInst * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutEventsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutEventsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { - eventsList: jspb.Message.toObjectList(msg.getEventsList(), - ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance) + artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + populateArtifactTypes: jspb.Message.getBooleanFieldWithDefault(msg, 3, false), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -4666,23 +11320,23 @@ proto.ml_metadata.PutEventsRequest.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutEventsRequest} + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} */ -proto.ml_metadata.PutEventsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutEventsRequest; - return proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByIDRequest; + return proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutEventsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutEventsRequest} + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} */ -proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -4690,9 +11344,19 @@ proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader = function(msg, r var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Event; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); - msg.addEvents(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactIds(values[i]); + } + break; + case 3: + var value = /** @type {boolean} */ (reader.readBool()); + msg.setPopulateArtifactTypes(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -4707,9 +11371,9 @@ proto.ml_metadata.PutEventsRequest.deserializeBinaryFromReader = function(msg, r * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutEventsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutEventsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -4717,159 +11381,144 @@ proto.ml_metadata.PutEventsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutEventsRequest} message + * @param {!proto.ml_metadata.GetArtifactsByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutEventsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getEventsList(); + f = message.getArtifactIdsList(); if (f.length > 0) { - writer.writeRepeatedMessage( + writer.writeRepeatedInt64( 1, + f + ); + } + f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeBool( + 3, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, f, - ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * repeated Event events = 1; - * @return {!Array} + * repeated int64 artifact_ids = 1; + * @return {!Array} */ -proto.ml_metadata.PutEventsRequest.prototype.getEventsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.getArtifactIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutEventsRequest} returns this -*/ -proto.ml_metadata.PutEventsRequest.prototype.setEventsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + */ +proto.ml_metadata.GetArtifactsByIDRequest.prototype.setArtifactIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * @param {!proto.ml_metadata.Event=} opt_value + * @param {number} value * @param {number=} opt_index - * @return {!proto.ml_metadata.Event} + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this */ -proto.ml_metadata.PutEventsRequest.prototype.addEvents = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.addArtifactIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutEventsRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this */ -proto.ml_metadata.PutEventsRequest.prototype.clearEventsList = function() { - return this.setEventsList([]); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearArtifactIdsList = function() { + return this.setArtifactIdsList([]); }; - - - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * optional bool populate_artifact_types = 3; + * @return {boolean} */ -proto.ml_metadata.PutEventsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutEventsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.getPopulateArtifactTypes = function() { + return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutEventsResponse} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * @param {boolean} value + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this */ -proto.ml_metadata.PutEventsResponse.toObject = function(includeInstance, msg) { - var f, obj = { +proto.ml_metadata.GetArtifactsByIDRequest.prototype.setPopulateArtifactTypes = function(value) { + return jspb.Message.setField(this, 3, value); +}; - }; - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + */ +proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearPopulateArtifactTypes = function() { + return jspb.Message.setField(this, 3, undefined); }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutEventsResponse} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutEventsResponse.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutEventsResponse; - return proto.ml_metadata.PutEventsResponse.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.hasPopulateArtifactTypes = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.PutEventsResponse} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutEventsResponse} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutEventsResponse.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.GetArtifactsByIDRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByIDRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this */ -proto.ml_metadata.PutEventsResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutEventsResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutEventsResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutEventsResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; +proto.ml_metadata.GetArtifactsByIDRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -4879,7 +11528,7 @@ proto.ml_metadata.PutEventsResponse.serializeBinaryToWriter = function(message, * @private {!Array} * @const */ -proto.ml_metadata.PutExecutionRequest.repeatedFields_ = [2,3]; +proto.ml_metadata.GetArtifactsByIDResponse.repeatedFields_ = [1,2]; @@ -4896,8 +11545,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByIDResponse.toObject(opt_includeInstance, this); }; @@ -4906,18 +11555,16 @@ proto.ml_metadata.PutExecutionRequest.prototype.toObject = function(opt_includeI * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - execution: (f = msg.getExecution()) && ml_metadata_proto_metadata_store_pb.Execution.toObject(includeInstance, f), - artifactEventPairsList: jspb.Message.toObjectList(msg.getArtifactEventPairsList(), - proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject, includeInstance), - contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), - options: (f = msg.getOptions()) && proto.ml_metadata.PutExecutionRequest.Options.toObject(includeInstance, f) + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), + ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) }; if (includeInstance) { @@ -4931,23 +11578,23 @@ proto.ml_metadata.PutExecutionRequest.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionRequest} + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} */ -proto.ml_metadata.PutExecutionRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionRequest; - return proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByIDResponse; + return proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionRequest} + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} */ -proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -4955,24 +11602,14 @@ proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader = function(msg var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.setExecution(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); break; case 2: - var value = new proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent; - reader.readMessage(value,proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader); - msg.addArtifactEventPairs(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.Context; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); - break; - case 4: - var value = new proto.ml_metadata.PutExecutionRequest.Options; - reader.readMessage(value,proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader); - msg.setOptions(value); + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.addArtifactTypes(value); break; default: reader.skipField(); @@ -4987,9 +11624,9 @@ proto.ml_metadata.PutExecutionRequest.deserializeBinaryFromReader = function(msg * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -4997,47 +11634,107 @@ proto.ml_metadata.PutExecutionRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionRequest} message + * @param {!proto.ml_metadata.GetArtifactsByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecution(); - if (f != null) { - writer.writeMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter - ); - } - f = message.getArtifactEventPairsList(); + f = message.getArtifactsList(); if (f.length > 0) { writer.writeRepeatedMessage( - 2, + 1, f, - proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter ); } - f = message.getContextsList(); + f = message.getArtifactTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( - 3, - f, - ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter - ); - } - f = message.getOptions(); - if (f != null) { - writer.writeMessage( - 4, + 2, f, - proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter ); } }; +/** + * repeated Artifact artifacts = 1; + * @return {!Array} + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this +*/ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.setArtifactsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Artifact} + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); +}; + + +/** + * repeated ArtifactType artifact_types = 2; + * @return {!Array} + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.getArtifactTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 2)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this +*/ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.setArtifactTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 2, value); +}; + + +/** + * @param {!proto.ml_metadata.ArtifactType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ArtifactType} + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.ArtifactType, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this + */ +proto.ml_metadata.GetArtifactsByIDResponse.prototype.clearArtifactTypesList = function() { + return this.setArtifactTypesList([]); +}; + + @@ -5054,8 +11751,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsRequest.toObject(opt_includeInstance, this); }; @@ -5064,14 +11761,14 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.toObject = func * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f), - event: (f = msg.getEvent()) && ml_metadata_proto_metadata_store_pb.Event.toObject(includeInstance, f) + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -5085,38 +11782,38 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.toObject = function(inclu /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} + * @return {!proto.ml_metadata.GetArtifactsRequest} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent; - return proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsRequest; + return proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} + * @return {!proto.ml_metadata.GetArtifactsRequest} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.setArtifact(value); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); break; case 2: - var value = new ml_metadata_proto_metadata_store_pb.Event; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); - msg.setEvent(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -5131,9 +11828,9 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.deserializeBinaryFromRead * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -5141,56 +11838,56 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.serializeBinary /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} message + * @param {!proto.ml_metadata.GetArtifactsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifact(); + f = message.getOptions(); if (f != null) { writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter ); } - f = message.getEvent(); + f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( 2, f, - ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional Artifact artifact = 1; - * @return {?proto.ml_metadata.Artifact} + * optional ListOperationOptions options = 1; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.getArtifact = function() { - return /** @type{?proto.ml_metadata.Artifact} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetArtifactsRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); }; /** - * @param {?proto.ml_metadata.Artifact|undefined} value - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsRequest} returns this */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.setArtifact = function(value) { +proto.ml_metadata.GetArtifactsRequest.prototype.setOptions = function(value) { return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + * @return {!proto.ml_metadata.GetArtifactsRequest} returns this */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearArtifact = function() { - return this.setArtifact(undefined); +proto.ml_metadata.GetArtifactsRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; @@ -5198,36 +11895,36 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearArtifact = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.hasArtifact = function() { +proto.ml_metadata.GetArtifactsRequest.prototype.hasOptions = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional Event event = 2; - * @return {?proto.ml_metadata.Event} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.getEvent = function() { - return /** @type{?proto.ml_metadata.Event} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 2)); +proto.ml_metadata.GetArtifactsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {?proto.ml_metadata.Event|undefined} value - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsRequest} returns this */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.setEvent = function(value) { +proto.ml_metadata.GetArtifactsRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} returns this + * @return {!proto.ml_metadata.GetArtifactsRequest} returns this */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearEvent = function() { - return this.setEvent(undefined); +proto.ml_metadata.GetArtifactsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -5235,12 +11932,19 @@ proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.clearEvent = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent.prototype.hasEvent = function() { +proto.ml_metadata.GetArtifactsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetArtifactsResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -5256,8 +11960,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionRequest.Options.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsResponse.toObject(opt_includeInstance, this); }; @@ -5266,13 +11970,15 @@ proto.ml_metadata.PutExecutionRequest.Options.prototype.toObject = function(opt_ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionRequest.Options} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.Options.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsResponse.toObject = function(includeInstance, msg) { var f, obj = { - reuseContextIfAlreadyExist: (f = jspb.Message.getBooleanField(msg, 1)) == null ? undefined : f + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -5286,23 +11992,23 @@ proto.ml_metadata.PutExecutionRequest.Options.toObject = function(includeInstanc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionRequest.Options} + * @return {!proto.ml_metadata.GetArtifactsResponse} */ -proto.ml_metadata.PutExecutionRequest.Options.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionRequest.Options; - return proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsResponse; + return proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionRequest.Options} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionRequest.Options} + * @return {!proto.ml_metadata.GetArtifactsResponse} */ -proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -5310,8 +12016,13 @@ proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader = func var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setReuseContextIfAlreadyExist(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -5326,9 +12037,9 @@ proto.ml_metadata.PutExecutionRequest.Options.deserializeBinaryFromReader = func * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -5336,16 +12047,24 @@ proto.ml_metadata.PutExecutionRequest.Options.prototype.serializeBinary = functi /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionRequest.Options} message + * @param {!proto.ml_metadata.GetArtifactsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {boolean} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeBool( + f = message.getArtifactsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, f ); } @@ -5353,179 +12072,67 @@ proto.ml_metadata.PutExecutionRequest.Options.serializeBinaryToWriter = function /** - * optional bool reuse_context_if_already_exist = 1; - * @return {boolean} - */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.getReuseContextIfAlreadyExist = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 1, false)); -}; - - -/** - * @param {boolean} value - * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this - */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.setReuseContextIfAlreadyExist = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionRequest.Options} returns this - */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.clearReuseContextIfAlreadyExist = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutExecutionRequest.Options.prototype.hasReuseContextIfAlreadyExist = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional Execution execution = 1; - * @return {?proto.ml_metadata.Execution} - */ -proto.ml_metadata.PutExecutionRequest.prototype.getExecution = function() { - return /** @type{?proto.ml_metadata.Execution} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); -}; - - -/** - * @param {?proto.ml_metadata.Execution|undefined} value - * @return {!proto.ml_metadata.PutExecutionRequest} returns this -*/ -proto.ml_metadata.PutExecutionRequest.prototype.setExecution = function(value) { - return jspb.Message.setWrapperField(this, 1, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutExecutionRequest} returns this - */ -proto.ml_metadata.PutExecutionRequest.prototype.clearExecution = function() { - return this.setExecution(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutExecutionRequest.prototype.hasExecution = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * repeated ArtifactAndEvent artifact_event_pairs = 2; - * @return {!Array} - */ -proto.ml_metadata.PutExecutionRequest.prototype.getArtifactEventPairsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent, 2)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionRequest} returns this -*/ -proto.ml_metadata.PutExecutionRequest.prototype.setArtifactEventPairsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 2, value); -}; - - -/** - * @param {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent} - */ -proto.ml_metadata.PutExecutionRequest.prototype.addArtifactEventPairs = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.PutExecutionRequest.ArtifactAndEvent, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionRequest} returns this - */ -proto.ml_metadata.PutExecutionRequest.prototype.clearArtifactEventPairsList = function() { - return this.setArtifactEventPairsList([]); -}; - - -/** - * repeated Context contexts = 3; - * @return {!Array} + * repeated Artifact artifacts = 1; + * @return {!Array} */ -proto.ml_metadata.PutExecutionRequest.prototype.getContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 3)); +proto.ml_metadata.GetArtifactsResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsResponse} returns this */ -proto.ml_metadata.PutExecutionRequest.prototype.setContextsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 3, value); +proto.ml_metadata.GetArtifactsResponse.prototype.setArtifactsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Context=} opt_value + * @param {!proto.ml_metadata.Artifact=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} + * @return {!proto.ml_metadata.Artifact} */ -proto.ml_metadata.PutExecutionRequest.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 3, opt_value, proto.ml_metadata.Context, opt_index); +proto.ml_metadata.GetArtifactsResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsResponse} returns this */ -proto.ml_metadata.PutExecutionRequest.prototype.clearContextsList = function() { - return this.setContextsList([]); +proto.ml_metadata.GetArtifactsResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); }; /** - * optional Options options = 4; - * @return {?proto.ml_metadata.PutExecutionRequest.Options} + * optional string next_page_token = 2; + * @return {string} */ -proto.ml_metadata.PutExecutionRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.PutExecutionRequest.Options} */ ( - jspb.Message.getWrapperField(this, proto.ml_metadata.PutExecutionRequest.Options, 4)); +proto.ml_metadata.GetArtifactsResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {?proto.ml_metadata.PutExecutionRequest.Options|undefined} value - * @return {!proto.ml_metadata.PutExecutionRequest} returns this -*/ -proto.ml_metadata.PutExecutionRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactsResponse} returns this + */ +proto.ml_metadata.GetArtifactsResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutExecutionRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsResponse} returns this */ -proto.ml_metadata.PutExecutionRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetArtifactsResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -5533,8 +12140,8 @@ proto.ml_metadata.PutExecutionRequest.prototype.clearOptions = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutExecutionRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetArtifactsResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -5544,7 +12151,7 @@ proto.ml_metadata.PutExecutionRequest.prototype.hasOptions = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutExecutionResponse.repeatedFields_ = [2,3]; +proto.ml_metadata.GetArtifactsByURIRequest.repeatedFields_ = [2]; @@ -5561,8 +12168,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutExecutionResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutExecutionResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByURIRequest.toObject(opt_includeInstance, this); }; @@ -5571,15 +12178,14 @@ proto.ml_metadata.PutExecutionResponse.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutExecutionResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByURIRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByURIRequest.toObject = function(includeInstance, msg) { var f, obj = { - executionId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, - contextIdsList: (f = jspb.Message.getRepeatedField(msg, 3)) == null ? undefined : f + urisList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -5593,44 +12199,37 @@ proto.ml_metadata.PutExecutionResponse.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutExecutionResponse} + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} */ -proto.ml_metadata.PutExecutionResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutExecutionResponse; - return proto.ml_metadata.PutExecutionResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByURIRequest; + return proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutExecutionResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByURIRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutExecutionResponse} + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} */ -proto.ml_metadata.PutExecutionResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setExecutionId(value); - break; case 2: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addArtifactIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.addUris(value); break; case 3: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addContextIds(values[i]); - } + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -5645,9 +12244,9 @@ proto.ml_metadata.PutExecutionResponse.deserializeBinaryFromReader = function(ms * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutExecutionResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByURIRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutExecutionResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByURIRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -5655,143 +12254,101 @@ proto.ml_metadata.PutExecutionResponse.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutExecutionResponse} message + * @param {!proto.ml_metadata.GetArtifactsByURIRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutExecutionResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByURIRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } - f = message.getArtifactIdsList(); + f = message.getUrisList(); if (f.length > 0) { - writer.writeRepeatedInt64( + writer.writeRepeatedString( 2, f ); } - f = message.getContextIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( 3, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional int64 execution_id = 1; - * @return {number} - */ -proto.ml_metadata.PutExecutionResponse.prototype.getExecutionId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.PutExecutionResponse} returns this - */ -proto.ml_metadata.PutExecutionResponse.prototype.setExecutionId = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutExecutionResponse} returns this - */ -proto.ml_metadata.PutExecutionResponse.prototype.clearExecutionId = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutExecutionResponse.prototype.hasExecutionId = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * repeated int64 artifact_ids = 2; - * @return {!Array} + * repeated string uris = 2; + * @return {!Array} */ -proto.ml_metadata.PutExecutionResponse.prototype.getArtifactIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.getUrisList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this */ -proto.ml_metadata.PutExecutionResponse.prototype.setArtifactIdsList = function(value) { +proto.ml_metadata.GetArtifactsByURIRequest.prototype.setUrisList = function(value) { return jspb.Message.setField(this, 2, value || []); }; /** - * @param {number} value + * @param {string} value * @param {number=} opt_index - * @return {!proto.ml_metadata.PutExecutionResponse} returns this + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this */ -proto.ml_metadata.PutExecutionResponse.prototype.addArtifactIds = function(value, opt_index) { +proto.ml_metadata.GetArtifactsByURIRequest.prototype.addUris = function(value, opt_index) { return jspb.Message.addToRepeatedField(this, 2, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionResponse} returns this + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this */ -proto.ml_metadata.PutExecutionResponse.prototype.clearArtifactIdsList = function() { - return this.setArtifactIdsList([]); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.clearUrisList = function() { + return this.setUrisList([]); }; /** - * repeated int64 context_ids = 3; - * @return {!Array} + * optional TransactionOptions transaction_options = 3; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutExecutionResponse.prototype.getContextIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 3)); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutExecutionResponse} returns this - */ -proto.ml_metadata.PutExecutionResponse.prototype.setContextIdsList = function(value) { - return jspb.Message.setField(this, 3, value || []); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByURIRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutExecutionResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this */ -proto.ml_metadata.PutExecutionResponse.prototype.addContextIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 3, value, opt_index); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutExecutionResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutExecutionResponse.prototype.clearContextIdsList = function() { - return this.setContextIdsList([]); +proto.ml_metadata.GetArtifactsByURIRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; @@ -5801,7 +12358,7 @@ proto.ml_metadata.PutExecutionResponse.prototype.clearContextIdsList = function( * @private {!Array} * @const */ -proto.ml_metadata.PutTypesRequest.repeatedFields_ = [1,2,3]; +proto.ml_metadata.GetArtifactsByURIResponse.repeatedFields_ = [1]; @@ -5818,8 +12375,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutTypesRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutTypesRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByURIResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByURIResponse.toObject(opt_includeInstance, this); }; @@ -5828,22 +12385,14 @@ proto.ml_metadata.PutTypesRequest.prototype.toObject = function(opt_includeInsta * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutTypesRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByURIResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByURIResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), - ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance), - executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), - ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance), - contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), - ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance), - canAddFields: (f = jspb.Message.getBooleanField(msg, 4)) == null ? undefined : f, - canOmitFields: (f = jspb.Message.getBooleanField(msg, 7)) == null ? undefined : f, - canDeleteFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, - allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 6, true) + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance) }; if (includeInstance) { @@ -5857,23 +12406,23 @@ proto.ml_metadata.PutTypesRequest.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutTypesRequest} + * @return {!proto.ml_metadata.GetArtifactsByURIResponse} */ -proto.ml_metadata.PutTypesRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutTypesRequest; - return proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByURIResponse; + return proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutTypesRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByURIResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutTypesRequest} + * @return {!proto.ml_metadata.GetArtifactsByURIResponse} */ -proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -5881,35 +12430,9 @@ proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader = function(msg, re var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.addArtifactTypes(value); - break; - case 2: - var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); - msg.addExecutionTypes(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.ContextType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); - msg.addContextTypes(value); - break; - case 4: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanAddFields(value); - break; - case 7: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanOmitFields(value); - break; - case 5: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanDeleteFields(value); - break; - case 6: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setAllFieldsMatch(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); break; default: reader.skipField(); @@ -5924,9 +12447,9 @@ proto.ml_metadata.PutTypesRequest.deserializeBinaryFromReader = function(msg, re * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutTypesRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByURIResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutTypesRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByURIResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -5934,277 +12457,214 @@ proto.ml_metadata.PutTypesRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutTypesRequest} message + * @param {!proto.ml_metadata.GetArtifactsByURIResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByURIResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactTypesList(); + f = message.getArtifactsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter - ); - } - f = message.getExecutionTypesList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 2, - f, - ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter - ); - } - f = message.getContextTypesList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 3, - f, - ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); - if (f != null) { - writer.writeBool( - 4, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 7)); - if (f != null) { - writer.writeBool( - 7, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); - if (f != null) { - writer.writeBool( - 5, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 6)); - if (f != null) { - writer.writeBool( - 6, - f + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter ); } }; /** - * repeated ArtifactType artifact_types = 1; - * @return {!Array} + * repeated Artifact artifacts = 1; + * @return {!Array} */ -proto.ml_metadata.PutTypesRequest.prototype.getArtifactTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +proto.ml_metadata.GetArtifactsByURIResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByURIResponse} returns this */ -proto.ml_metadata.PutTypesRequest.prototype.setArtifactTypesList = function(value) { +proto.ml_metadata.GetArtifactsByURIResponse.prototype.setArtifactsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ArtifactType=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ArtifactType} - */ -proto.ml_metadata.PutTypesRequest.prototype.addArtifactTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesRequest} returns this - */ -proto.ml_metadata.PutTypesRequest.prototype.clearArtifactTypesList = function() { - return this.setArtifactTypesList([]); -}; - - -/** - * repeated ExecutionType execution_types = 2; - * @return {!Array} - */ -proto.ml_metadata.PutTypesRequest.prototype.getExecutionTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 2)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this -*/ -proto.ml_metadata.PutTypesRequest.prototype.setExecutionTypesList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 2, value); -}; - - -/** - * @param {!proto.ml_metadata.ExecutionType=} opt_value + * @param {!proto.ml_metadata.Artifact=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.ExecutionType} + * @return {!proto.ml_metadata.Artifact} */ -proto.ml_metadata.PutTypesRequest.prototype.addExecutionTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.ExecutionType, opt_index); +proto.ml_metadata.GetArtifactsByURIResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesRequest} returns this - */ -proto.ml_metadata.PutTypesRequest.prototype.clearExecutionTypesList = function() { - return this.setExecutionTypesList([]); -}; - - -/** - * repeated ContextType context_types = 3; - * @return {!Array} - */ -proto.ml_metadata.PutTypesRequest.prototype.getContextTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 3)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this -*/ -proto.ml_metadata.PutTypesRequest.prototype.setContextTypesList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 3, value); -}; - - -/** - * @param {!proto.ml_metadata.ContextType=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ContextType} + * @return {!proto.ml_metadata.GetArtifactsByURIResponse} returns this */ -proto.ml_metadata.PutTypesRequest.prototype.addContextTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 3, opt_value, proto.ml_metadata.ContextType, opt_index); +proto.ml_metadata.GetArtifactsByURIResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); }; -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesRequest} returns this - */ -proto.ml_metadata.PutTypesRequest.prototype.clearContextTypesList = function() { - return this.setContextTypesList([]); -}; - -/** - * optional bool can_add_fields = 4; - * @return {boolean} - */ -proto.ml_metadata.PutTypesRequest.prototype.getCanAddFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, false)); -}; +if (jspb.Message.GENERATE_TO_OBJECT) { /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} */ -proto.ml_metadata.PutTypesRequest.prototype.setCanAddFields = function(value) { - return jspb.Message.setField(this, 4, value); +proto.ml_metadata.GetExecutionsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsRequest.toObject(opt_includeInstance, this); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.GetExecutionsRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesRequest.prototype.clearCanAddFields = function() { - return jspb.Message.setField(this, 4, undefined); -}; - +proto.ml_metadata.GetExecutionsRequest.toObject = function(includeInstance, msg) { + var f, obj = { + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.PutTypesRequest.prototype.hasCanAddFields = function() { - return jspb.Message.getField(this, 4) != null; + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} -/** - * optional bool can_omit_fields = 7; - * @return {boolean} +/** + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.GetExecutionsRequest} */ -proto.ml_metadata.PutTypesRequest.prototype.getCanOmitFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 7, false)); +proto.ml_metadata.GetExecutionsRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.GetExecutionsRequest; + return proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader(msg, reader); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.GetExecutionsRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.GetExecutionsRequest} */ -proto.ml_metadata.PutTypesRequest.prototype.setCanOmitFields = function(value) { - return jspb.Message.setField(this, 7, value); +proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.PutTypesRequest.prototype.clearCanOmitFields = function() { - return jspb.Message.setField(this, 7, undefined); +proto.ml_metadata.GetExecutionsRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetExecutionsRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesRequest.prototype.hasCanOmitFields = function() { - return jspb.Message.getField(this, 7) != null; +proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } }; /** - * optional bool can_delete_fields = 5; - * @return {boolean} + * optional ListOperationOptions options = 1; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.PutTypesRequest.prototype.getCanDeleteFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +proto.ml_metadata.GetExecutionsRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this - */ -proto.ml_metadata.PutTypesRequest.prototype.setCanDeleteFields = function(value) { - return jspb.Message.setField(this, 5, value); + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsRequest} returns this +*/ +proto.ml_metadata.GetExecutionsRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsRequest} returns this */ -proto.ml_metadata.PutTypesRequest.prototype.clearCanDeleteFields = function() { - return jspb.Message.setField(this, 5, undefined); +proto.ml_metadata.GetExecutionsRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; @@ -6212,35 +12672,36 @@ proto.ml_metadata.PutTypesRequest.prototype.clearCanDeleteFields = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutTypesRequest.prototype.hasCanDeleteFields = function() { - return jspb.Message.getField(this, 5) != null; +proto.ml_metadata.GetExecutionsRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional bool all_fields_match = 6; - * @return {boolean} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutTypesRequest.prototype.getAllFieldsMatch = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 6, true)); +proto.ml_metadata.GetExecutionsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutTypesRequest} returns this - */ -proto.ml_metadata.PutTypesRequest.prototype.setAllFieldsMatch = function(value) { - return jspb.Message.setField(this, 6, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsRequest} returns this +*/ +proto.ml_metadata.GetExecutionsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutTypesRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsRequest} returns this */ -proto.ml_metadata.PutTypesRequest.prototype.clearAllFieldsMatch = function() { - return jspb.Message.setField(this, 6, undefined); +proto.ml_metadata.GetExecutionsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -6248,8 +12709,8 @@ proto.ml_metadata.PutTypesRequest.prototype.clearAllFieldsMatch = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutTypesRequest.prototype.hasAllFieldsMatch = function() { - return jspb.Message.getField(this, 6) != null; +proto.ml_metadata.GetExecutionsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -6259,7 +12720,7 @@ proto.ml_metadata.PutTypesRequest.prototype.hasAllFieldsMatch = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutTypesResponse.repeatedFields_ = [1,2,3]; +proto.ml_metadata.GetExecutionsResponse.repeatedFields_ = [1]; @@ -6276,8 +12737,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutTypesResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutTypesResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsResponse.toObject(opt_includeInstance, this); }; @@ -6286,15 +12747,15 @@ proto.ml_metadata.PutTypesResponse.prototype.toObject = function(opt_includeInst * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutTypesResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, - executionTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, - contextTypeIdsList: (f = jspb.Message.getRepeatedField(msg, 3)) == null ? undefined : f + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -6308,23 +12769,23 @@ proto.ml_metadata.PutTypesResponse.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutTypesResponse} + * @return {!proto.ml_metadata.GetExecutionsResponse} */ -proto.ml_metadata.PutTypesResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutTypesResponse; - return proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsResponse; + return proto.ml_metadata.GetExecutionsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutTypesResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutTypesResponse} + * @return {!proto.ml_metadata.GetExecutionsResponse} */ -proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -6332,22 +12793,13 @@ proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader = function(msg, r var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addArtifactTypeIds(values[i]); - } + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); break; case 2: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addExecutionTypeIds(values[i]); - } - break; - case 3: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addContextTypeIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -6362,154 +12814,111 @@ proto.ml_metadata.PutTypesResponse.deserializeBinaryFromReader = function(msg, r * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutTypesResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutTypesResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); -}; - - -/** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutTypesResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.PutTypesResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getArtifactTypeIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 1, - f - ); - } - f = message.getExecutionTypeIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 2, - f - ); - } - f = message.getContextTypeIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 3, - f - ); - } -}; - - -/** - * repeated int64 artifact_type_ids = 1; - * @return {!Array} - */ -proto.ml_metadata.PutTypesResponse.prototype.getArtifactTypeIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesResponse} returns this - */ -proto.ml_metadata.PutTypesResponse.prototype.setArtifactTypeIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); -}; - - -/** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutTypesResponse} returns this - */ -proto.ml_metadata.PutTypesResponse.prototype.addArtifactTypeIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetExecutionsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetExecutionsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetExecutionsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutTypesResponse.prototype.clearArtifactTypeIdsList = function() { - return this.setArtifactTypeIdsList([]); +proto.ml_metadata.GetExecutionsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExecutionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } }; /** - * repeated int64 execution_type_ids = 2; - * @return {!Array} + * repeated Execution executions = 1; + * @return {!Array} */ -proto.ml_metadata.PutTypesResponse.prototype.getExecutionTypeIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +proto.ml_metadata.GetExecutionsResponse.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesResponse} returns this - */ -proto.ml_metadata.PutTypesResponse.prototype.setExecutionTypeIdsList = function(value) { - return jspb.Message.setField(this, 2, value || []); + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsResponse} returns this +*/ +proto.ml_metadata.GetExecutionsResponse.prototype.setExecutionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {number} value + * @param {!proto.ml_metadata.Execution=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * @return {!proto.ml_metadata.Execution} */ -proto.ml_metadata.PutTypesResponse.prototype.addExecutionTypeIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 2, value, opt_index); +proto.ml_metadata.GetExecutionsResponse.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsResponse} returns this */ -proto.ml_metadata.PutTypesResponse.prototype.clearExecutionTypeIdsList = function() { - return this.setExecutionTypeIdsList([]); +proto.ml_metadata.GetExecutionsResponse.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); }; /** - * repeated int64 context_type_ids = 3; - * @return {!Array} + * optional string next_page_token = 2; + * @return {string} */ -proto.ml_metadata.PutTypesResponse.prototype.getContextTypeIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 3)); +proto.ml_metadata.GetExecutionsResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionsResponse} returns this */ -proto.ml_metadata.PutTypesResponse.prototype.setContextTypeIdsList = function(value) { - return jspb.Message.setField(this, 3, value || []); +proto.ml_metadata.GetExecutionsResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsResponse} returns this */ -proto.ml_metadata.PutTypesResponse.prototype.addContextTypeIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 3, value, opt_index); +proto.ml_metadata.GetExecutionsResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutTypesResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutTypesResponse.prototype.clearContextTypeIdsList = function() { - return this.setContextTypeIdsList([]); +proto.ml_metadata.GetExecutionsResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -6529,8 +12938,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutContextTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutContextTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypeRequest.toObject(opt_includeInstance, this); }; @@ -6539,17 +12948,15 @@ proto.ml_metadata.PutContextTypeRequest.prototype.toObject = function(opt_includ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutContextTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { - contextType: (f = msg.getContextType()) && ml_metadata_proto_metadata_store_pb.ContextType.toObject(includeInstance, f), - canAddFields: (f = jspb.Message.getBooleanField(msg, 2)) == null ? undefined : f, - canOmitFields: (f = jspb.Message.getBooleanField(msg, 5)) == null ? undefined : f, - canDeleteFields: (f = jspb.Message.getBooleanField(msg, 3)) == null ? undefined : f, - allFieldsMatch: jspb.Message.getBooleanFieldWithDefault(msg, 4, true) + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -6563,23 +12970,23 @@ proto.ml_metadata.PutContextTypeRequest.toObject = function(includeInstance, msg /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutContextTypeRequest} + * @return {!proto.ml_metadata.GetArtifactTypeRequest} */ -proto.ml_metadata.PutContextTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutContextTypeRequest; - return proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypeRequest; + return proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutContextTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutContextTypeRequest} + * @return {!proto.ml_metadata.GetArtifactTypeRequest} */ -proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -6587,25 +12994,17 @@ proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader = function(m var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ContextType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); - msg.setContextType(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); break; case 2: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanAddFields(value); - break; - case 5: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanOmitFields(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); break; case 3: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setCanDeleteFields(value); - break; - case 4: - var value = /** @type {boolean} */ (reader.readBool()); - msg.setAllFieldsMatch(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -6620,9 +13019,9 @@ proto.ml_metadata.PutContextTypeRequest.deserializeBinaryFromReader = function(m * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutContextTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypeRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutContextTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypeRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -6630,76 +13029,61 @@ proto.ml_metadata.PutContextTypeRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutContextTypeRequest} message + * @param {!proto.ml_metadata.GetArtifactTypeRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypeRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextType(); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeMessage( + writer.writeString( 1, - f, - ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeBool( - 2, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 5)); + f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { - writer.writeBool( - 5, + writer.writeString( + 2, f ); } - f = /** @type {boolean} */ (jspb.Message.getField(message, 3)); + f = message.getTransactionOptions(); if (f != null) { - writer.writeBool( + writer.writeMessage( 3, - f - ); - } - f = /** @type {boolean} */ (jspb.Message.getField(message, 4)); - if (f != null) { - writer.writeBool( - 4, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional ContextType context_type = 1; - * @return {?proto.ml_metadata.ContextType} + * optional string type_name = 1; + * @return {string} */ -proto.ml_metadata.PutContextTypeRequest.prototype.getContextType = function() { - return /** @type{?proto.ml_metadata.ContextType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); +proto.ml_metadata.GetArtifactTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {?proto.ml_metadata.ContextType|undefined} value - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this -*/ -proto.ml_metadata.PutContextTypeRequest.prototype.setContextType = function(value) { - return jspb.Message.setWrapperField(this, 1, value); + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this */ -proto.ml_metadata.PutContextTypeRequest.prototype.clearContextType = function() { - return this.setContextType(undefined); +proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -6707,143 +13091,223 @@ proto.ml_metadata.PutContextTypeRequest.prototype.clearContextType = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutContextTypeRequest.prototype.hasContextType = function() { +proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTypeName = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional bool can_add_fields = 2; - * @return {boolean} + * optional string type_version = 2; + * @return {string} */ -proto.ml_metadata.PutContextTypeRequest.prototype.getCanAddFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 2, false)); +proto.ml_metadata.GetArtifactTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this */ -proto.ml_metadata.PutContextTypeRequest.prototype.setCanAddFields = function(value) { +proto.ml_metadata.GetArtifactTypeRequest.prototype.setTypeVersion = function(value) { return jspb.Message.setField(this, 2, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this - */ -proto.ml_metadata.PutContextTypeRequest.prototype.clearCanAddFields = function() { - return jspb.Message.setField(this, 2, undefined); + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this + */ +proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 3; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.GetArtifactTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this +*/ +proto.ml_metadata.GetArtifactTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this */ -proto.ml_metadata.PutContextTypeRequest.prototype.hasCanAddFields = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * optional bool can_omit_fields = 5; + * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutContextTypeRequest.prototype.getCanOmitFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 5, false)); +proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; -/** - * @param {boolean} value - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this - */ -proto.ml_metadata.PutContextTypeRequest.prototype.setCanOmitFields = function(value) { - return jspb.Message.setField(this, 5, value); -}; + +if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} */ -proto.ml_metadata.PutContextTypeRequest.prototype.clearCanOmitFields = function() { - return jspb.Message.setField(this, 5, undefined); +proto.ml_metadata.GetArtifactTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypeResponse.toObject(opt_includeInstance, this); }; /** - * Returns whether this field is set. - * @return {boolean} + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.GetArtifactTypeResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeRequest.prototype.hasCanOmitFields = function() { - return jspb.Message.getField(this, 5) != null; +proto.ml_metadata.GetArtifactTypeResponse.toObject = function(includeInstance, msg) { + var f, obj = { + artifactType: (f = msg.getArtifactType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * optional bool can_delete_fields = 3; - * @return {boolean} + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.GetArtifactTypeResponse} */ -proto.ml_metadata.PutContextTypeRequest.prototype.getCanDeleteFields = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 3, false)); +proto.ml_metadata.GetArtifactTypeResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.GetArtifactTypeResponse; + return proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.GetArtifactTypeResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.GetArtifactTypeResponse} */ -proto.ml_metadata.PutContextTypeRequest.prototype.setCanDeleteFields = function(value) { - return jspb.Message.setField(this, 3, value); +proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.setArtifactType(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.PutContextTypeRequest.prototype.clearCanDeleteFields = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.GetArtifactTypeResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetArtifactTypeResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetArtifactTypeResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeRequest.prototype.hasCanDeleteFields = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetArtifactTypeResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactType(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ); + } }; /** - * optional bool all_fields_match = 4; - * @return {boolean} + * optional ArtifactType artifact_type = 1; + * @return {?proto.ml_metadata.ArtifactType} */ -proto.ml_metadata.PutContextTypeRequest.prototype.getAllFieldsMatch = function() { - return /** @type {boolean} */ (jspb.Message.getBooleanFieldWithDefault(this, 4, true)); +proto.ml_metadata.GetArtifactTypeResponse.prototype.getArtifactType = function() { + return /** @type{?proto.ml_metadata.ArtifactType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); }; /** - * @param {boolean} value - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this - */ -proto.ml_metadata.PutContextTypeRequest.prototype.setAllFieldsMatch = function(value) { - return jspb.Message.setField(this, 4, value); + * @param {?proto.ml_metadata.ArtifactType|undefined} value + * @return {!proto.ml_metadata.GetArtifactTypeResponse} returns this +*/ +proto.ml_metadata.GetArtifactTypeResponse.prototype.setArtifactType = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactTypeResponse} returns this */ -proto.ml_metadata.PutContextTypeRequest.prototype.clearAllFieldsMatch = function() { - return jspb.Message.setField(this, 4, undefined); +proto.ml_metadata.GetArtifactTypeResponse.prototype.clearArtifactType = function() { + return this.setArtifactType(undefined); }; @@ -6851,8 +13315,8 @@ proto.ml_metadata.PutContextTypeRequest.prototype.clearAllFieldsMatch = function * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutContextTypeRequest.prototype.hasAllFieldsMatch = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetArtifactTypeResponse.prototype.hasArtifactType = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -6872,8 +13336,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutContextTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutContextTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesRequest.toObject(opt_includeInstance, this); }; @@ -6882,13 +13346,13 @@ proto.ml_metadata.PutContextTypeResponse.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutContextTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -6902,23 +13366,23 @@ proto.ml_metadata.PutContextTypeResponse.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutContextTypeResponse} + * @return {!proto.ml_metadata.GetArtifactTypesRequest} */ -proto.ml_metadata.PutContextTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutContextTypeResponse; - return proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesRequest; + return proto.ml_metadata.GetArtifactTypesRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutContextTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutContextTypeResponse} + * @return {!proto.ml_metadata.GetArtifactTypesRequest} */ -proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -6926,8 +13390,9 @@ proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setTypeId(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -6942,9 +13407,9 @@ proto.ml_metadata.PutContextTypeResponse.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutContextTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutContextTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -6952,46 +13417,48 @@ proto.ml_metadata.PutContextTypeResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutContextTypeResponse} message + * @param {!proto.ml_metadata.GetArtifactTypesRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); + f = message.getTransactionOptions(); if (f != null) { - writer.writeInt64( + writer.writeMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional int64 type_id = 1; - * @return {number} + * optional TransactionOptions transaction_options = 1; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutContextTypeResponse.prototype.getTypeId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.GetArtifactTypesRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); }; /** - * @param {number} value - * @return {!proto.ml_metadata.PutContextTypeResponse} returns this - */ -proto.ml_metadata.PutContextTypeResponse.prototype.setTypeId = function(value) { - return jspb.Message.setField(this, 1, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactTypesRequest} returns this +*/ +proto.ml_metadata.GetArtifactTypesRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.PutContextTypeResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactTypesRequest} returns this */ -proto.ml_metadata.PutContextTypeResponse.prototype.clearTypeId = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetArtifactTypesRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -6999,7 +13466,7 @@ proto.ml_metadata.PutContextTypeResponse.prototype.clearTypeId = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.PutContextTypeResponse.prototype.hasTypeId = function() { +proto.ml_metadata.GetArtifactTypesRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 1) != null; }; @@ -7010,7 +13477,7 @@ proto.ml_metadata.PutContextTypeResponse.prototype.hasTypeId = function() { * @private {!Array} * @const */ -proto.ml_metadata.PutContextsRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactTypesResponse.repeatedFields_ = [1]; @@ -7027,8 +13494,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutContextsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutContextsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesResponse.toObject(opt_includeInstance, this); }; @@ -7037,14 +13504,14 @@ proto.ml_metadata.PutContextsRequest.prototype.toObject = function(opt_includeIn * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutContextsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) + artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), + ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) }; if (includeInstance) { @@ -7058,23 +13525,23 @@ proto.ml_metadata.PutContextsRequest.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutContextsRequest} + * @return {!proto.ml_metadata.GetArtifactTypesResponse} */ -proto.ml_metadata.PutContextsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutContextsRequest; - return proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesResponse; + return proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutContextsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutContextsRequest} + * @return {!proto.ml_metadata.GetArtifactTypesResponse} */ -proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -7082,9 +13549,9 @@ proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader = function(msg, var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Context; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.addArtifactTypes(value); break; default: reader.skipField(); @@ -7099,9 +13566,9 @@ proto.ml_metadata.PutContextsRequest.deserializeBinaryFromReader = function(msg, * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutContextsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutContextsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -7109,69 +13576,62 @@ proto.ml_metadata.PutContextsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutContextsRequest} message + * @param {!proto.ml_metadata.GetArtifactTypesResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextsList(); + f = message.getArtifactTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter ); } }; /** - * repeated Context contexts = 1; - * @return {!Array} + * repeated ArtifactType artifact_types = 1; + * @return {!Array} */ -proto.ml_metadata.PutContextsRequest.prototype.getContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); +proto.ml_metadata.GetArtifactTypesResponse.prototype.getArtifactTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutContextsRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactTypesResponse} returns this */ -proto.ml_metadata.PutContextsRequest.prototype.setContextsList = function(value) { +proto.ml_metadata.GetArtifactTypesResponse.prototype.setArtifactTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Context=} opt_value + * @param {!proto.ml_metadata.ArtifactType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} + * @return {!proto.ml_metadata.ArtifactType} */ -proto.ml_metadata.PutContextsRequest.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); +proto.ml_metadata.GetArtifactTypesResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutContextsRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesResponse} returns this */ -proto.ml_metadata.PutContextsRequest.prototype.clearContextsList = function() { - return this.setContextsList([]); +proto.ml_metadata.GetArtifactTypesResponse.prototype.clearArtifactTypesList = function() { + return this.setArtifactTypesList([]); }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.PutContextsResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -7187,8 +13647,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutContextsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutContextsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesRequest.toObject(opt_includeInstance, this); }; @@ -7197,13 +13657,13 @@ proto.ml_metadata.PutContextsResponse.prototype.toObject = function(opt_includeI * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutContextsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesRequest.toObject = function(includeInstance, msg) { var f, obj = { - contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -7217,23 +13677,23 @@ proto.ml_metadata.PutContextsResponse.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutContextsResponse} + * @return {!proto.ml_metadata.GetExecutionTypesRequest} */ -proto.ml_metadata.PutContextsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutContextsResponse; - return proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesRequest; + return proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutContextsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutContextsResponse} + * @return {!proto.ml_metadata.GetExecutionTypesRequest} */ -proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -7241,10 +13701,9 @@ proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader = function(msg var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addContextIds(values[i]); - } + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -7259,9 +13718,9 @@ proto.ml_metadata.PutContextsResponse.deserializeBinaryFromReader = function(msg * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutContextsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutContextsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -7269,56 +13728,57 @@ proto.ml_metadata.PutContextsResponse.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutContextsResponse} message + * @param {!proto.ml_metadata.GetExecutionTypesRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutContextsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * repeated int64 context_ids = 1; - * @return {!Array} + * optional TransactionOptions transaction_options = 1; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.PutContextsResponse.prototype.getContextIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetExecutionTypesRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutContextsResponse} returns this - */ -proto.ml_metadata.PutContextsResponse.prototype.setContextIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionTypesRequest} returns this +*/ +proto.ml_metadata.GetExecutionTypesRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.PutContextsResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionTypesRequest} returns this */ -proto.ml_metadata.PutContextsResponse.prototype.addContextIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetExecutionTypesRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutContextsResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.PutContextsResponse.prototype.clearContextIdsList = function() { - return this.setContextIdsList([]); +proto.ml_metadata.GetExecutionTypesRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -7328,7 +13788,7 @@ proto.ml_metadata.PutContextsResponse.prototype.clearContextIdsList = function() * @private {!Array} * @const */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.repeatedFields_ = [1,2]; +proto.ml_metadata.GetExecutionTypesResponse.repeatedFields_ = [1]; @@ -7345,8 +13805,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutAttributionsAndAssociationsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesResponse.toObject(opt_includeInstance, this); }; @@ -7355,16 +13815,14 @@ proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.toObject = fun * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesResponse.toObject = function(includeInstance, msg) { var f, obj = { - attributionsList: jspb.Message.toObjectList(msg.getAttributionsList(), - ml_metadata_proto_metadata_store_pb.Attribution.toObject, includeInstance), - associationsList: jspb.Message.toObjectList(msg.getAssociationsList(), - ml_metadata_proto_metadata_store_pb.Association.toObject, includeInstance) + executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), + ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance) }; if (includeInstance) { @@ -7378,23 +13836,23 @@ proto.ml_metadata.PutAttributionsAndAssociationsRequest.toObject = function(incl /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} + * @return {!proto.ml_metadata.GetExecutionTypesResponse} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutAttributionsAndAssociationsRequest; - return proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesResponse; + return proto.ml_metadata.GetExecutionTypesResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} + * @return {!proto.ml_metadata.GetExecutionTypesResponse} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -7402,14 +13860,9 @@ proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromRea var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Attribution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Attribution.deserializeBinaryFromReader); - msg.addAttributions(value); - break; - case 2: - var value = new ml_metadata_proto_metadata_store_pb.Association; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Association.deserializeBinaryFromReader); - msg.addAssociations(value); + var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); + msg.addExecutionTypes(value); break; default: reader.skipField(); @@ -7424,9 +13877,9 @@ proto.ml_metadata.PutAttributionsAndAssociationsRequest.deserializeBinaryFromRea * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutAttributionsAndAssociationsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -7434,104 +13887,58 @@ proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.serializeBinar /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} message + * @param {!proto.ml_metadata.GetExecutionTypesResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getAttributionsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.Attribution.serializeBinaryToWriter - ); - } - f = message.getAssociationsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 2, - f, - ml_metadata_proto_metadata_store_pb.Association.serializeBinaryToWriter - ); - } -}; - - -/** - * repeated Attribution attributions = 1; - * @return {!Array} - */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.getAttributionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Attribution, 1)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this -*/ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.setAttributionsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); -}; - - -/** - * @param {!proto.ml_metadata.Attribution=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Attribution} - */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.addAttributions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Attribution, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this - */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.clearAttributionsList = function() { - return this.setAttributionsList([]); + f = message.getExecutionTypesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter + ); + } }; /** - * repeated Association associations = 2; - * @return {!Array} + * repeated ExecutionType execution_types = 1; + * @return {!Array} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.getAssociationsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Association, 2)); +proto.ml_metadata.GetExecutionTypesResponse.prototype.getExecutionTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionTypesResponse} returns this */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.setAssociationsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 2, value); +proto.ml_metadata.GetExecutionTypesResponse.prototype.setExecutionTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Association=} opt_value + * @param {!proto.ml_metadata.ExecutionType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Association} + * @return {!proto.ml_metadata.ExecutionType} */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.addAssociations = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 2, opt_value, proto.ml_metadata.Association, opt_index); +proto.ml_metadata.GetExecutionTypesResponse.prototype.addExecutionTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ExecutionType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypesResponse} returns this */ -proto.ml_metadata.PutAttributionsAndAssociationsRequest.prototype.clearAssociationsList = function() { - return this.setAssociationsList([]); +proto.ml_metadata.GetExecutionTypesResponse.prototype.clearExecutionTypesList = function() { + return this.setExecutionTypesList([]); }; @@ -7551,8 +13958,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutAttributionsAndAssociationsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesRequest.toObject(opt_includeInstance, this); }; @@ -7561,13 +13968,13 @@ proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.toObject = fu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesRequest.toObject = function(includeInstance, msg) { var f, obj = { - + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -7581,29 +13988,34 @@ proto.ml_metadata.PutAttributionsAndAssociationsResponse.toObject = function(inc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} + * @return {!proto.ml_metadata.GetContextTypesRequest} */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutAttributionsAndAssociationsResponse; - return proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesRequest; + return proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} + * @return {!proto.ml_metadata.GetContextTypesRequest} */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; default: reader.skipField(); break; @@ -7617,9 +14029,9 @@ proto.ml_metadata.PutAttributionsAndAssociationsResponse.deserializeBinaryFromRe * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutAttributionsAndAssociationsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -7627,12 +14039,57 @@ proto.ml_metadata.PutAttributionsAndAssociationsResponse.prototype.serializeBina /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutAttributionsAndAssociationsResponse} message + * @param {!proto.ml_metadata.GetContextTypesRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutAttributionsAndAssociationsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * optional TransactionOptions transaction_options = 1; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.GetContextTypesRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetContextTypesRequest} returns this +*/ +proto.ml_metadata.GetContextTypesRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetContextTypesRequest} returns this + */ +proto.ml_metadata.GetContextTypesRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextTypesRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -7642,7 +14099,7 @@ proto.ml_metadata.PutAttributionsAndAssociationsResponse.serializeBinaryToWriter * @private {!Array} * @const */ -proto.ml_metadata.PutParentContextsRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetContextTypesResponse.repeatedFields_ = [1]; @@ -7659,8 +14116,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.PutParentContextsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutParentContextsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesResponse.toObject(opt_includeInstance, this); }; @@ -7669,14 +14126,14 @@ proto.ml_metadata.PutParentContextsRequest.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutParentContextsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutParentContextsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesResponse.toObject = function(includeInstance, msg) { var f, obj = { - parentContextsList: jspb.Message.toObjectList(msg.getParentContextsList(), - ml_metadata_proto_metadata_store_pb.ParentContext.toObject, includeInstance) + contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), + ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance) }; if (includeInstance) { @@ -7690,23 +14147,23 @@ proto.ml_metadata.PutParentContextsRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutParentContextsRequest} + * @return {!proto.ml_metadata.GetContextTypesResponse} */ -proto.ml_metadata.PutParentContextsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutParentContextsRequest; - return proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesResponse; + return proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.PutParentContextsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutParentContextsRequest} + * @return {!proto.ml_metadata.GetContextTypesResponse} */ -proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -7714,9 +14171,9 @@ proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ParentContext; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ParentContext.deserializeBinaryFromReader); - msg.addParentContexts(value); + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.addContextTypes(value); break; default: reader.skipField(); @@ -7731,9 +14188,9 @@ proto.ml_metadata.PutParentContextsRequest.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.PutParentContextsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutParentContextsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -7741,162 +14198,68 @@ proto.ml_metadata.PutParentContextsRequest.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutParentContextsRequest} message + * @param {!proto.ml_metadata.GetContextTypesResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.PutParentContextsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getParentContextsList(); + f = message.getContextTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ParentContext.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter ); } }; /** - * repeated ParentContext parent_contexts = 1; - * @return {!Array} + * repeated ContextType context_types = 1; + * @return {!Array} */ -proto.ml_metadata.PutParentContextsRequest.prototype.getParentContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ParentContext, 1)); +proto.ml_metadata.GetContextTypesResponse.prototype.getContextTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.PutParentContextsRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextTypesResponse} returns this */ -proto.ml_metadata.PutParentContextsRequest.prototype.setParentContextsList = function(value) { +proto.ml_metadata.GetContextTypesResponse.prototype.setContextTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ParentContext=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ParentContext} - */ -proto.ml_metadata.PutParentContextsRequest.prototype.addParentContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ParentContext, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.PutParentContextsRequest} returns this - */ -proto.ml_metadata.PutParentContextsRequest.prototype.clearParentContextsList = function() { - return this.setParentContextsList([]); -}; - - - - - -if (jspb.Message.GENERATE_TO_OBJECT) { -/** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} - */ -proto.ml_metadata.PutParentContextsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.PutParentContextsResponse.toObject(opt_includeInstance, this); -}; - - -/** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.PutParentContextsResponse} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.PutParentContextsResponse.toObject = function(includeInstance, msg) { - var f, obj = { - - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; -}; -} - - -/** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.PutParentContextsResponse} - */ -proto.ml_metadata.PutParentContextsResponse.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.PutParentContextsResponse; - return proto.ml_metadata.PutParentContextsResponse.deserializeBinaryFromReader(msg, reader); -}; - - -/** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.PutParentContextsResponse} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.PutParentContextsResponse} + * @param {!proto.ml_metadata.ContextType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ContextType} */ -proto.ml_metadata.PutParentContextsResponse.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.GetContextTypesResponse.prototype.addContextTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ContextType, opt_index); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetContextTypesResponse} returns this */ -proto.ml_metadata.PutParentContextsResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.PutParentContextsResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetContextTypesResponse.prototype.clearContextTypesList = function() { + return this.setContextTypesList([]); }; + /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.PutParentContextsResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.PutParentContextsResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; -}; - - +proto.ml_metadata.GetArtifactsByExternalIdsRequest.repeatedFields_ = [1]; @@ -7913,8 +14276,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByExternalIdsRequest.toObject(opt_includeInstance, this); }; @@ -7923,15 +14286,13 @@ proto.ml_metadata.GetArtifactsByTypeRequest.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -7946,23 +14307,23 @@ proto.ml_metadata.GetArtifactsByTypeRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByTypeRequest; - return proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByExternalIdsRequest; + return proto.ml_metadata.GetArtifactsByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -7971,18 +14332,9 @@ proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader = functi switch (field) { case 1: var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); + msg.addExternalIds(value); break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); - msg.setOptions(value); - break; - case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -8000,9 +14352,9 @@ proto.ml_metadata.GetArtifactsByTypeRequest.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByExternalIdsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -8010,38 +14362,23 @@ proto.ml_metadata.GetArtifactsByTypeRequest.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByTypeRequest} message + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( + f = message.getExternalIdsList(); + if (f.length > 0) { + writer.writeRepeatedString( 1, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = message.getOptions(); - if (f != null) { - writer.writeMessage( - 3, - f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter - ); - } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 4, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -8050,65 +14387,67 @@ proto.ml_metadata.GetArtifactsByTypeRequest.serializeBinaryToWriter = function(m /** - * optional string type_name = 1; - * @return {string} + * repeated string external_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.setExternalIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * @param {string} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; /** - * optional string type_version = 2; - * @return {string} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this - */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -8116,82 +14455,168 @@ proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTypeVersion = functio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTypeVersion = function() { +proto.ml_metadata.GetArtifactsByExternalIdsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; + /** - * optional ListOperationOptions options = 3; - * @return {?proto.ml_metadata.ListOperationOptions} + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 3)); +proto.ml_metadata.GetArtifactsByExternalIdsResponse.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByExternalIdsResponse.toObject(opt_includeInstance, this); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this -*/ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.GetArtifactsByExternalIdsResponse.toObject = function(includeInstance, msg) { + var f, obj = { + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetArtifactsByExternalIdsResponse.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.GetArtifactsByExternalIdsResponse; + return proto.ml_metadata.GetArtifactsByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** - * Returns whether this field is set. - * @return {boolean} + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetArtifactsByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * optional TransactionOptions transaction_options = 4; - * @return {?proto.ml_metadata.TransactionOptions} + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetArtifactsByExternalIdsResponse.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.GetArtifactsByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getArtifactsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( + 1, + f, + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } +}; + + +/** + * repeated Artifact artifacts = 1; + * @return {!Array} + */ +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.setArtifactsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByTypeRequest} returns this + * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Artifact} */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetArtifactsByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetArtifactsByExternalIdsResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); }; @@ -8201,7 +14626,7 @@ proto.ml_metadata.GetArtifactsByTypeRequest.prototype.hasTransactionOptions = fu * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByTypeResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetExecutionsByExternalIdsRequest.repeatedFields_ = [1]; @@ -8218,8 +14643,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByExternalIdsRequest.toObject(opt_includeInstance, this); }; @@ -8228,15 +14653,14 @@ proto.ml_metadata.GetArtifactsByTypeResponse.prototype.toObject = function(opt_i * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -8250,23 +14674,23 @@ proto.ml_metadata.GetArtifactsByTypeResponse.toObject = function(includeInstance /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByTypeResponse; - return proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByExternalIdsRequest; + return proto.ml_metadata.GetExecutionsByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -8274,13 +14698,13 @@ proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader = funct var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); + var value = /** @type {string} */ (reader.readString()); + msg.addExternalIds(value); break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -8295,9 +14719,9 @@ proto.ml_metadata.GetArtifactsByTypeResponse.deserializeBinaryFromReader = funct * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByExternalIdsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -8305,92 +14729,92 @@ proto.ml_metadata.GetArtifactsByTypeResponse.prototype.serializeBinary = functio /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByTypeResponse} message + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getExternalIdsList(); if (f.length > 0) { - writer.writeRepeatedMessage( + writer.writeRepeatedString( 1, - f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); + f = message.getTransactionOptions(); if (f != null) { - writer.writeString( + writer.writeMessage( 2, - f + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated string external_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this -*/ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.setArtifactsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} returns this + */ +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.setExternalIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {string} value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; /** - * optional string next_page_token = 2; - * @return {string} + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this - */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} returns this +*/ +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByTypeResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -8398,12 +14822,19 @@ proto.ml_metadata.GetArtifactsByTypeResponse.prototype.clearNextPageToken = func * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByTypeResponse.prototype.hasNextPageToken = function() { +proto.ml_metadata.GetExecutionsByExternalIdsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionsByExternalIdsResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -8419,8 +14850,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactByTypeAndNameRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByExternalIdsResponse.toObject(opt_includeInstance, this); }; @@ -8429,16 +14860,14 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.toObject = function( * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByExternalIdsResponse.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - artifactName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance) }; if (includeInstance) { @@ -8452,23 +14881,23 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.toObject = function(includeIns /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByExternalIdsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactByTypeAndNameRequest; - return proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByExternalIdsResponse; + return proto.ml_metadata.GetExecutionsByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -8476,21 +14905,9 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader = var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setArtifactName(value); - break; - case 4: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); break; default: reader.skipField(); @@ -8505,9 +14922,9 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.deserializeBinaryFromReader = * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactByTypeAndNameRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByExternalIdsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -8515,176 +14932,255 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.serializeBinary = fu /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} message + * @param {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( + f = message.getExecutionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 4, f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter ); } }; /** - * optional string type_name = 1; - * @return {string} + * repeated Execution executions = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} returns this +*/ +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.setExecutionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); +}; + + +/** + * @param {!proto.ml_metadata.Execution=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Execution} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionsByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetExecutionsByExternalIdsResponse.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); }; + /** - * Returns whether this field is set. - * @return {boolean} + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetContextsByExternalIdsRequest.repeatedFields_ = [1]; + + + +if (jspb.Message.GENERATE_TO_OBJECT) { +/** + * Creates an object representation of this proto. + * Field names that are reserved in JavaScript and will be renamed to pb_name. + * Optional fields that are not set will be set to undefined. + * To access a reserved field use, foo.pb_, eg, foo.pb_default. + * For the list of reserved names please see: + * net/proto2/compiler/js/internal/generator.cc#kKeyword. + * @param {boolean=} opt_includeInstance Deprecated. whether to include the + * JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @return {!Object} + */ +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByExternalIdsRequest.toObject(opt_includeInstance, this); +}; + + +/** + * Static version of the {@see toObject} method. + * @param {boolean|undefined} includeInstance Deprecated. Whether to include + * the JSPB instance for transitional soy proto support: + * http://goto/soy-param-migration + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} msg The msg instance to transform. + * @return {!Object} + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.GetContextsByExternalIdsRequest.toObject = function(includeInstance, msg) { + var f, obj = { + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + }; + + if (includeInstance) { + obj.$jspbMessageInstance = msg; + } + return obj; }; +} /** - * optional string type_version = 3; - * @return {string} + * Deserializes binary data (in protobuf wire format). + * @param {jspb.ByteSource} bytes The bytes to deserialize. + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +proto.ml_metadata.GetContextsByExternalIdsRequest.deserializeBinary = function(bytes) { + var reader = new jspb.BinaryReader(bytes); + var msg = new proto.ml_metadata.GetContextsByExternalIdsRequest; + return proto.ml_metadata.GetContextsByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * Deserializes binary data (in protobuf wire format) from the + * given reader into the given message object. + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} msg The message object to deserialize into. + * @param {!jspb.BinaryReader} reader The BinaryReader to use. + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 3, value); +proto.ml_metadata.GetContextsByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { + while (reader.nextField()) { + if (reader.isEndGroup()) { + break; + } + var field = reader.getFieldNumber(); + switch (field) { + case 1: + var value = /** @type {string} */ (reader.readString()); + msg.addExternalIds(value); + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; + default: + reader.skipField(); + break; + } + } + return msg; }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 3, undefined); +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetContextsByExternalIdsRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); }; /** - * Returns whether this field is set. - * @return {boolean} + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetContextsByExternalIdsRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetContextsByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = message.getExternalIdsList(); + if (f.length > 0) { + writer.writeRepeatedString( + 1, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } }; /** - * optional string artifact_name = 2; - * @return {string} + * repeated string external_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getArtifactName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setArtifactName = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.setExternalIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * @param {string} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearArtifactName = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasArtifactName = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; /** - * optional TransactionOptions transaction_options = 4; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetContextsByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -8693,12 +15189,19 @@ proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.clearTransactionOpti * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactByTypeAndNameRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetContextsByExternalIdsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetContextsByExternalIdsResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -8714,8 +15217,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactByTypeAndNameResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByExternalIdsResponse.toObject(opt_includeInstance, this); }; @@ -8724,13 +15227,14 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.toObject = function * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByExternalIdsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByExternalIdsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifact: (f = msg.getArtifact()) && ml_metadata_proto_metadata_store_pb.Artifact.toObject(includeInstance, f) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -8744,23 +15248,23 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.toObject = function(includeIn /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetContextsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByExternalIdsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactByTypeAndNameResponse; - return proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByExternalIdsResponse; + return proto.ml_metadata.GetContextsByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByExternalIdsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetContextsByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -8768,9 +15272,9 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader = var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.setArtifact(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); break; default: reader.skipField(); @@ -8785,9 +15289,9 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.deserializeBinaryFromReader = * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactByTypeAndNameResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByExternalIdsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -8795,57 +15299,58 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.serializeBinary = f /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} message + * @param {!proto.ml_metadata.GetContextsByExternalIdsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifact(); - if (f != null) { - writer.writeMessage( + f = message.getContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } }; /** - * optional Artifact artifact = 1; - * @return {?proto.ml_metadata.Artifact} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.getArtifact = function() { - return /** @type{?proto.ml_metadata.Artifact} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {?proto.ml_metadata.Artifact|undefined} value - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextsByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.setArtifact = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactByTypeAndNameResponse} returns this + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.clearArtifact = function() { - return this.setArtifact(undefined); +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetContextsByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.hasArtifact = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetContextsByExternalIdsResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); }; @@ -8855,7 +15360,7 @@ proto.ml_metadata.GetArtifactByTypeAndNameResponse.prototype.hasArtifact = funct * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByIDRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.repeatedFields_ = [1]; @@ -8872,8 +15377,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.toObject(opt_includeInstance, this); }; @@ -8882,13 +15387,13 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -8903,23 +15408,23 @@ proto.ml_metadata.GetArtifactsByIDRequest.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByIDRequest; - return proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesByExternalIdsRequest; + return proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -8927,10 +15432,8 @@ proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addArtifactIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.addExternalIds(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -8950,9 +15453,9 @@ proto.ml_metadata.GetArtifactsByIDRequest.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByIDRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -8960,15 +15463,15 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByIDRequest} message + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByIDRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactIdsList(); + f = message.getExternalIdsList(); if (f.length > 0) { - writer.writeRepeatedInt64( + writer.writeRepeatedString( 1, f ); @@ -8985,39 +15488,39 @@ proto.ml_metadata.GetArtifactsByIDRequest.serializeBinaryToWriter = function(mes /** - * repeated int64 artifact_ids = 1; - * @return {!Array} + * repeated string external_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.getArtifactIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.setArtifactIdsList = function(value) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.setExternalIdsList = function(value) { return jspb.Message.setField(this, 1, value || []); }; /** - * @param {number} value + * @param {string} value * @param {number=} opt_index - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.addArtifactIds = function(value, opt_index) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearArtifactIdsList = function() { - return this.setArtifactIdsList([]); +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; @@ -9025,7 +15528,7 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearArtifactIdsList = funct * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -9033,18 +15536,18 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.getTransactionOptions = func /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -9053,7 +15556,7 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.clearTransactionOptions = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByIDRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByExternalIdsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -9064,7 +15567,7 @@ proto.ml_metadata.GetArtifactsByIDRequest.prototype.hasTransactionOptions = func * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.repeatedFields_ = [1]; @@ -9081,8 +15584,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.toObject(opt_includeInstance, this); }; @@ -9091,14 +15594,14 @@ proto.ml_metadata.GetArtifactsByIDResponse.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance) + artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), + ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) }; if (includeInstance) { @@ -9112,23 +15615,23 @@ proto.ml_metadata.GetArtifactsByIDResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByIDResponse} + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByIDResponse; - return proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesByExternalIdsResponse; + return proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByIDResponse} + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -9136,9 +15639,9 @@ proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.addArtifactTypes(value); break; default: reader.skipField(); @@ -9153,9 +15656,9 @@ proto.ml_metadata.GetArtifactsByIDResponse.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9163,62 +15666,69 @@ proto.ml_metadata.GetArtifactsByIDResponse.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByIDResponse} message + * @param {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getArtifactTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter ); } }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated ArtifactType artifact_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.getArtifactTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.setArtifactsList = function(value) { +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.setArtifactTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {!proto.ml_metadata.ArtifactType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.ArtifactType} */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByIDResponse} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByIDResponse.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); +proto.ml_metadata.GetArtifactTypesByExternalIdsResponse.prototype.clearArtifactTypesList = function() { + return this.setArtifactTypesList([]); }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -9234,8 +15744,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.toObject(opt_includeInstance, this); }; @@ -9244,13 +15754,13 @@ proto.ml_metadata.GetArtifactsRequest.prototype.toObject = function(opt_includeI * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.toObject = function(includeInstance, msg) { var f, obj = { - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -9265,23 +15775,23 @@ proto.ml_metadata.GetArtifactsRequest.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsRequest} + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsRequest; - return proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesByExternalIdsRequest; + return proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsRequest} + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -9289,9 +15799,8 @@ proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader = function(msg var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); - msg.setOptions(value); + var value = /** @type {string} */ (reader.readString()); + msg.addExternalIds(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -9311,9 +15820,9 @@ proto.ml_metadata.GetArtifactsRequest.deserializeBinaryFromReader = function(msg * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9321,18 +15830,17 @@ proto.ml_metadata.GetArtifactsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsRequest} message + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getOptions(); - if (f != null) { - writer.writeMessage( + f = message.getExternalIdsList(); + if (f.length > 0) { + writer.writeRepeatedString( 1, - f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + f ); } f = message.getTransactionOptions(); @@ -9347,39 +15855,39 @@ proto.ml_metadata.GetArtifactsRequest.serializeBinaryToWriter = function(message /** - * optional ListOperationOptions options = 1; - * @return {?proto.ml_metadata.ListOperationOptions} + * repeated string external_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsRequest} returns this -*/ -proto.ml_metadata.GetArtifactsRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} returns this + */ +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.setExternalIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsRequest} returns this + * @param {string} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; @@ -9387,7 +15895,7 @@ proto.ml_metadata.GetArtifactsRequest.prototype.hasOptions = function() { * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -9395,18 +15903,18 @@ proto.ml_metadata.GetArtifactsRequest.prototype.getTransactionOptions = function /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -9415,7 +15923,7 @@ proto.ml_metadata.GetArtifactsRequest.prototype.clearTransactionOptions = functi * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypesByExternalIdsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -9426,7 +15934,7 @@ proto.ml_metadata.GetArtifactsRequest.prototype.hasTransactionOptions = function * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.repeatedFields_ = [1]; @@ -9443,8 +15951,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.toObject(opt_includeInstance, this); }; @@ -9453,15 +15961,14 @@ proto.ml_metadata.GetArtifactsResponse.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), + ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance) }; if (includeInstance) { @@ -9475,23 +15982,23 @@ proto.ml_metadata.GetArtifactsResponse.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsResponse} + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsResponse; - return proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesByExternalIdsResponse; + return proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsResponse} + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -9499,13 +16006,9 @@ proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader = function(ms var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); + var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); + msg.addExecutionTypes(value); break; default: reader.skipField(); @@ -9520,9 +16023,9 @@ proto.ml_metadata.GetArtifactsResponse.deserializeBinaryFromReader = function(ms * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9530,101 +16033,58 @@ proto.ml_metadata.GetArtifactsResponse.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsResponse} message + * @param {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getExecutionTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f + ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter ); } }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated ExecutionType execution_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsResponse.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.getExecutionTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsResponse.prototype.setArtifactsList = function(value) { +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.setExecutionTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {!proto.ml_metadata.ExecutionType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.ExecutionType} */ -proto.ml_metadata.GetArtifactsResponse.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.addExecutionTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ExecutionType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsResponse} returns this - */ -proto.ml_metadata.GetArtifactsResponse.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); -}; - - -/** - * optional string next_page_token = 2; - * @return {string} - */ -proto.ml_metadata.GetArtifactsResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactsResponse} returns this + * @return {!proto.ml_metadata.GetExecutionTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsResponse} returns this - */ -proto.ml_metadata.GetArtifactsResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetArtifactsResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetExecutionTypesByExternalIdsResponse.prototype.clearExecutionTypesList = function() { + return this.setExecutionTypesList([]); }; @@ -9634,7 +16094,7 @@ proto.ml_metadata.GetArtifactsResponse.prototype.hasNextPageToken = function() { * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByURIRequest.repeatedFields_ = [2]; +proto.ml_metadata.GetContextTypesByExternalIdsRequest.repeatedFields_ = [1]; @@ -9651,8 +16111,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByURIRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesByExternalIdsRequest.toObject(opt_includeInstance, this); }; @@ -9661,13 +16121,13 @@ proto.ml_metadata.GetArtifactsByURIRequest.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByURIRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByURIRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.toObject = function(includeInstance, msg) { var f, obj = { - urisList: (f = jspb.Message.getRepeatedField(msg, 2)) == null ? undefined : f, + externalIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -9682,34 +16142,34 @@ proto.ml_metadata.GetArtifactsByURIRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByURIRequest; - return proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesByExternalIdsRequest; + return proto.ml_metadata.GetContextTypesByExternalIdsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByURIRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} */ -proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 2: + case 1: var value = /** @type {string} */ (reader.readString()); - msg.addUris(value); + msg.addExternalIds(value); break; - case 3: + case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -9727,9 +16187,9 @@ proto.ml_metadata.GetArtifactsByURIRequest.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByURIRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesByExternalIdsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9737,23 +16197,23 @@ proto.ml_metadata.GetArtifactsByURIRequest.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByURIRequest} message + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByURIRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getUrisList(); + f = message.getExternalIdsList(); if (f.length > 0) { writer.writeRepeatedString( - 2, + 1, f ); } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 3, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -9762,66 +16222,66 @@ proto.ml_metadata.GetArtifactsByURIRequest.serializeBinaryToWriter = function(me /** - * repeated string uris = 2; + * repeated string external_ids = 1; * @return {!Array} */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.getUrisList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 2)); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.getExternalIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.setUrisList = function(value) { - return jspb.Message.setField(this, 2, value || []); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.setExternalIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** * @param {string} value * @param {number=} opt_index - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.addUris = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 2, value, opt_index); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.addExternalIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.clearUrisList = function() { - return this.setUrisList([]); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.clearExternalIdsList = function() { + return this.setExternalIdsList([]); }; /** - * optional TransactionOptions transaction_options = 3; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByURIRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsRequest} returns this */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -9830,8 +16290,8 @@ proto.ml_metadata.GetArtifactsByURIRequest.prototype.clearTransactionOptions = f * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByURIRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetContextTypesByExternalIdsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -9841,7 +16301,7 @@ proto.ml_metadata.GetArtifactsByURIRequest.prototype.hasTransactionOptions = fun * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByURIResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextTypesByExternalIdsResponse.repeatedFields_ = [1]; @@ -9858,8 +16318,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByURIResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesByExternalIdsResponse.toObject(opt_includeInstance, this); }; @@ -9868,14 +16328,14 @@ proto.ml_metadata.GetArtifactsByURIResponse.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByURIResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByURIResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance) + contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), + ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance) }; if (includeInstance) { @@ -9889,33 +16349,33 @@ proto.ml_metadata.GetArtifactsByURIResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByURIResponse} + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByURIResponse; - return proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesByExternalIdsResponse; + return proto.ml_metadata.GetContextTypesByExternalIdsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByURIResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByURIResponse} + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} */ -proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); + case 1: + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.addContextTypes(value); break; default: reader.skipField(); @@ -9930,9 +16390,9 @@ proto.ml_metadata.GetArtifactsByURIResponse.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByURIResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesByExternalIdsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -9940,58 +16400,58 @@ proto.ml_metadata.GetArtifactsByURIResponse.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByURIResponse} message + * @param {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByURIResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getContextTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter ); } }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated ContextType context_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.getContextTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByURIResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.setArtifactsList = function(value) { +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.setContextTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {!proto.ml_metadata.ContextType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.ContextType} */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.addContextTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ContextType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByURIResponse} returns this + * @return {!proto.ml_metadata.GetContextTypesByExternalIdsResponse} returns this */ -proto.ml_metadata.GetArtifactsByURIResponse.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); +proto.ml_metadata.GetContextTypesByExternalIdsResponse.prototype.clearContextTypesList = function() { + return this.setContextTypesList([]); }; @@ -10011,8 +16471,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByTypeRequest.toObject(opt_includeInstance, this); }; @@ -10021,12 +16481,14 @@ proto.ml_metadata.GetExecutionsRequest.prototype.toObject = function(opt_include * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -10042,23 +16504,23 @@ proto.ml_metadata.GetExecutionsRequest.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsRequest} + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} */ -proto.ml_metadata.GetExecutionsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsRequest; - return proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByTypeRequest; + return proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsRequest} + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} */ -proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -10066,11 +16528,19 @@ proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader = function(ms var field = reader.getFieldNumber(); switch (field) { case 1: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 3: var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); msg.setOptions(value); break; - case 2: + case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -10088,9 +16558,9 @@ proto.ml_metadata.GetExecutionsRequest.deserializeBinaryFromReader = function(ms * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByTypeRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -10098,16 +16568,30 @@ proto.ml_metadata.GetExecutionsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsRequest} message + * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByTypeRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } f = message.getOptions(); if (f != null) { writer.writeMessage( - 1, + 3, f, ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter ); @@ -10115,7 +16599,7 @@ proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter = function(messag f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 2, + 4, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -10124,67 +16608,29 @@ proto.ml_metadata.GetExecutionsRequest.serializeBinaryToWriter = function(messag /** - * optional ListOperationOptions options = 1; - * @return {?proto.ml_metadata.ListOperationOptions} - */ -proto.ml_metadata.GetExecutionsRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); -}; - - -/** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsRequest} returns this -*/ -proto.ml_metadata.GetExecutionsRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsRequest} returns this - */ -proto.ml_metadata.GetExecutionsRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} + * optional string type_name = 1; + * @return {string} */ -proto.ml_metadata.GetExecutionsRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * optional TransactionOptions transaction_options = 2; - * @return {?proto.ml_metadata.TransactionOptions} + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsRequest.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); -}; - - -/** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsRequest} returns this -*/ -proto.ml_metadata.GetExecutionsRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsRequest.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -10192,207 +16638,109 @@ proto.ml_metadata.GetExecutionsRequest.prototype.clearTransactionOptions = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; -}; - - - -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetExecutionsResponse.repeatedFields_ = [1]; - - - -if (jspb.Message.GENERATE_TO_OBJECT) { -/** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} - */ -proto.ml_metadata.GetExecutionsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsResponse.toObject(opt_includeInstance, this); -}; - - -/** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsResponse} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages - */ -proto.ml_metadata.GetExecutionsResponse.toObject = function(includeInstance, msg) { - var f, obj = { - executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), - ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsResponse} + * optional string type_version = 2; + * @return {string} */ -proto.ml_metadata.GetExecutionsResponse.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsResponse; - return proto.ml_metadata.GetExecutionsResponse.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; - - -/** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsResponse} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsResponse} - */ -proto.ml_metadata.GetExecutionsResponse.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.addExecutions(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this + */ +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsResponse.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsResponse.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 2, undefined); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsResponse} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetExecutionsResponse.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getExecutionsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * repeated Execution executions = 1; - * @return {!Array} + * optional ListOperationOptions options = 3; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.GetExecutionsResponse.prototype.getExecutionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 3)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionsResponse} returns this + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsResponse.prototype.setExecutionsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** - * @param {!proto.ml_metadata.Execution=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Execution} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsResponse.prototype.addExecutions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionsResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetExecutionsResponse.prototype.clearExecutionsList = function() { - return this.setExecutionsList([]); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * optional string next_page_token = 2; - * @return {string} + * optional TransactionOptions transaction_options = 4; + * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionsResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionsResponse} returns this - */ -proto.ml_metadata.GetExecutionsResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this +*/ +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsResponse} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionsResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; @@ -10400,12 +16748,19 @@ proto.ml_metadata.GetExecutionsResponse.prototype.clearNextPageToken = function( * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 4) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionsByTypeResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -10421,8 +16776,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByTypeResponse.toObject(opt_includeInstance, this); }; @@ -10431,15 +16786,15 @@ proto.ml_metadata.GetArtifactTypeRequest.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -10453,23 +16808,23 @@ proto.ml_metadata.GetArtifactTypeRequest.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypeRequest} + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} */ -proto.ml_metadata.GetArtifactTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypeRequest; - return proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByTypeResponse; + return proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypeRequest} + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} */ -proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -10477,17 +16832,13 @@ proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); break; case 2: var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -10502,9 +16853,9 @@ proto.ml_metadata.GetArtifactTypeRequest.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -10512,17 +16863,18 @@ proto.ml_metadata.GetArtifactTypeRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypeRequest} message + * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( + f = message.getExecutionsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter ); } f = /** @type {string} */ (jspb.Message.getField(message, 2)); @@ -10532,114 +16884,71 @@ proto.ml_metadata.GetArtifactTypeRequest.serializeBinaryToWriter = function(mess f ); } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 3, - f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter - ); - } -}; - - -/** - * optional string type_name = 1; - * @return {string} - */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this - */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this - */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; }; /** - * optional string type_version = 2; - * @return {string} + * repeated Execution executions = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this - */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 2, value); + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this +*/ +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.setExecutionsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this + * @param {!proto.ml_metadata.Execution=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Execution} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); }; /** - * optional TransactionOptions transaction_options = 3; - * @return {?proto.ml_metadata.TransactionOptions} + * optional string next_page_token = 2; + * @return {string} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this -*/ -proto.ml_metadata.GetArtifactTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this + */ +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypeRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); }; @@ -10647,8 +16956,8 @@ proto.ml_metadata.GetArtifactTypeRequest.prototype.clearTransactionOptions = fun * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetExecutionsByTypeResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -10668,8 +16977,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionByTypeAndNameRequest.toObject(opt_includeInstance, this); }; @@ -10678,13 +16987,16 @@ proto.ml_metadata.GetArtifactTypeResponse.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactType: (f = msg.getArtifactType()) && ml_metadata_proto_metadata_store_pb.ArtifactType.toObject(includeInstance, f) + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + executionName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -10698,23 +17010,23 @@ proto.ml_metadata.GetArtifactTypeResponse.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypeResponse} + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} */ -proto.ml_metadata.GetArtifactTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypeResponse; - return proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionByTypeAndNameRequest; + return proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypeResponse} + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} */ -proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -10722,9 +17034,21 @@ proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.setArtifactType(value); + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setExecutionName(value); + break; + case 4: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -10739,9 +17063,9 @@ proto.ml_metadata.GetArtifactTypeResponse.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionByTypeAndNameRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -10749,48 +17073,68 @@ proto.ml_metadata.GetArtifactTypeResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypeResponse} message + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactType(); + f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeMessage( + writer.writeString( 1, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 4, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * optional ArtifactType artifact_type = 1; - * @return {?proto.ml_metadata.ArtifactType} + * optional string type_name = 1; + * @return {string} */ -proto.ml_metadata.GetArtifactTypeResponse.prototype.getArtifactType = function() { - return /** @type{?proto.ml_metadata.ArtifactType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {?proto.ml_metadata.ArtifactType|undefined} value - * @return {!proto.ml_metadata.GetArtifactTypeResponse} returns this -*/ -proto.ml_metadata.GetArtifactTypeResponse.prototype.setArtifactType = function(value) { - return jspb.Message.setWrapperField(this, 1, value); + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this + */ +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypeResponse} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypeResponse.prototype.clearArtifactType = function() { - return this.setArtifactType(undefined); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -10798,149 +17142,107 @@ proto.ml_metadata.GetArtifactTypeResponse.prototype.clearArtifactType = function * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactTypeResponse.prototype.hasArtifactType = function() { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTypeName = function() { return jspb.Message.getField(this, 1) != null; }; +/** + * optional string type_version = 3; + * @return {string} + */ +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +}; - -if (jspb.Message.GENERATE_TO_OBJECT) { /** - * Creates an object representation of this proto. - * Field names that are reserved in JavaScript and will be renamed to pb_name. - * Optional fields that are not set will be set to undefined. - * To access a reserved field use, foo.pb_, eg, foo.pb_default. - * For the list of reserved names please see: - * net/proto2/compiler/js/internal/generator.cc#kKeyword. - * @param {boolean=} opt_includeInstance Deprecated. whether to include the - * JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @return {!Object} + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypesRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Static version of the {@see toObject} method. - * @param {boolean|undefined} includeInstance Deprecated. Whether to include - * the JSPB instance for transitional soy proto support: - * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypesRequest} msg The msg instance to transform. - * @return {!Object} - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.toObject = function(includeInstance, msg) { - var f, obj = { - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) - }; - - if (includeInstance) { - obj.$jspbMessageInstance = msg; - } - return obj; +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 3, undefined); }; -} /** - * Deserializes binary data (in protobuf wire format). - * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypesRequest} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetArtifactTypesRequest.deserializeBinary = function(bytes) { - var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypesRequest; - return proto.ml_metadata.GetArtifactTypesRequest.deserializeBinaryFromReader(msg, reader); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 3) != null; }; /** - * Deserializes binary data (in protobuf wire format) from the - * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypesRequest} msg The message object to deserialize into. - * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypesRequest} + * optional string execution_name = 2; + * @return {string} */ -proto.ml_metadata.GetArtifactTypesRequest.deserializeBinaryFromReader = function(msg, reader) { - while (reader.nextField()) { - if (reader.isEndGroup()) { - break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); - break; - default: - reader.skipField(); - break; - } - } - return msg; +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getExecutionName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypesRequest.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setExecutionName = function(value) { + return jspb.Message.setField(this, 2, value); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypesRequest} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter - ); - } +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearExecutionName = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasExecutionName = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * optional TransactionOptions transaction_options = 1; + * optional TransactionOptions transaction_options = 4; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -10949,19 +17251,12 @@ proto.ml_metadata.GetArtifactTypesRequest.prototype.clearTransactionOptions = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactTypesRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 4) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetArtifactTypesResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -10977,8 +17272,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypesResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionByTypeAndNameResponse.toObject(opt_includeInstance, this); }; @@ -10987,14 +17282,13 @@ proto.ml_metadata.GetArtifactTypesResponse.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypesResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionByTypeAndNameResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), - ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) + execution: (f = msg.getExecution()) && ml_metadata_proto_metadata_store_pb.Execution.toObject(includeInstance, f) }; if (includeInstance) { @@ -11008,23 +17302,23 @@ proto.ml_metadata.GetArtifactTypesResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypesResponse} + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} */ -proto.ml_metadata.GetArtifactTypesResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypesResponse; - return proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionByTypeAndNameResponse; + return proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypesResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypesResponse} + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} */ -proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11032,9 +17326,9 @@ proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.addArtifactTypes(value); + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.setExecution(value); break; default: reader.skipField(); @@ -11049,9 +17343,9 @@ proto.ml_metadata.GetArtifactTypesResponse.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypesResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionByTypeAndNameResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11059,62 +17353,68 @@ proto.ml_metadata.GetArtifactTypesResponse.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypesResponse} message + * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactTypesList(); - if (f.length > 0) { - writer.writeRepeatedMessage( + f = message.getExecution(); + if (f != null) { + writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter ); } }; /** - * repeated ArtifactType artifact_types = 1; - * @return {!Array} + * optional Execution execution = 1; + * @return {?proto.ml_metadata.Execution} */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.getArtifactTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.getExecution = function() { + return /** @type{?proto.ml_metadata.Execution} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactTypesResponse} returns this + * @param {?proto.ml_metadata.Execution|undefined} value + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} returns this */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.setArtifactTypesList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.setExecution = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ArtifactType=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ArtifactType} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} returns this */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.clearExecution = function() { + return this.setExecution(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactTypesResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetArtifactTypesResponse.prototype.clearArtifactTypesList = function() { - return this.setArtifactTypesList([]); +proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.hasExecution = function() { + return jspb.Message.getField(this, 1) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionsByIDRequest.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -11130,8 +17430,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypesRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByIDRequest.toObject(opt_includeInstance, this); }; @@ -11140,12 +17440,13 @@ proto.ml_metadata.GetExecutionTypesRequest.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypesRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { + executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -11160,23 +17461,23 @@ proto.ml_metadata.GetExecutionTypesRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypesRequest} + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} */ -proto.ml_metadata.GetExecutionTypesRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypesRequest; - return proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByIDRequest; + return proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypesRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypesRequest} + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} */ -proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11184,6 +17485,12 @@ proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addExecutionIds(values[i]); + } + break; + case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -11201,9 +17508,9 @@ proto.ml_metadata.GetExecutionTypesRequest.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypesRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11211,16 +17518,23 @@ proto.ml_metadata.GetExecutionTypesRequest.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypesRequest} message + * @param {!proto.ml_metadata.GetExecutionsByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; + f = message.getExecutionIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 1, + f + ); + } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 1, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -11229,29 +17543,66 @@ proto.ml_metadata.GetExecutionTypesRequest.serializeBinaryToWriter = function(me /** - * optional TransactionOptions transaction_options = 1; + * repeated int64 execution_ids = 1; + * @return {!Array} + */ +proto.ml_metadata.GetExecutionsByIDRequest.prototype.getExecutionIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +}; + + +/** + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + */ +proto.ml_metadata.GetExecutionsByIDRequest.prototype.setExecutionIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); +}; + + +/** + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + */ +proto.ml_metadata.GetExecutionsByIDRequest.prototype.addExecutionIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + */ +proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearExecutionIdsList = function() { + return this.setExecutionIdsList([]); +}; + + +/** + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionsByIDRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionsByIDRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -11260,8 +17611,8 @@ proto.ml_metadata.GetExecutionTypesRequest.prototype.clearTransactionOptions = f * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionTypesRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionsByIDRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -11271,7 +17622,7 @@ proto.ml_metadata.GetExecutionTypesRequest.prototype.hasTransactionOptions = fun * @private {!Array} * @const */ -proto.ml_metadata.GetExecutionTypesResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetExecutionsByIDResponse.repeatedFields_ = [1]; @@ -11288,8 +17639,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypesResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByIDResponse.toObject(opt_includeInstance, this); }; @@ -11298,14 +17649,14 @@ proto.ml_metadata.GetExecutionTypesResponse.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypesResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), - ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance) + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance) }; if (includeInstance) { @@ -11319,33 +17670,33 @@ proto.ml_metadata.GetExecutionTypesResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypesResponse} + * @return {!proto.ml_metadata.GetExecutionsByIDResponse} */ -proto.ml_metadata.GetExecutionTypesResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypesResponse; - return proto.ml_metadata.GetExecutionTypesResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByIDResponse; + return proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypesResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypesResponse} + * @return {!proto.ml_metadata.GetExecutionsByIDResponse} */ -proto.ml_metadata.GetExecutionTypesResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); - msg.addExecutionTypes(value); + switch (field) { + case 1: + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); break; default: reader.skipField(); @@ -11360,9 +17711,9 @@ proto.ml_metadata.GetExecutionTypesResponse.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypesResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11370,58 +17721,58 @@ proto.ml_metadata.GetExecutionTypesResponse.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypesResponse} message + * @param {!proto.ml_metadata.GetExecutionsByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionTypesList(); + f = message.getExecutionsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter ); } }; /** - * repeated ExecutionType execution_types = 1; - * @return {!Array} + * repeated Execution executions = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.getExecutionTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); +proto.ml_metadata.GetExecutionsByIDResponse.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionTypesResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByIDResponse} returns this */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.setExecutionTypesList = function(value) { +proto.ml_metadata.GetExecutionsByIDResponse.prototype.setExecutionsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ExecutionType=} opt_value + * @param {!proto.ml_metadata.Execution=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.ExecutionType} + * @return {!proto.ml_metadata.Execution} */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.addExecutionTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ExecutionType, opt_index); +proto.ml_metadata.GetExecutionsByIDResponse.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionTypesResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsByIDResponse} returns this */ -proto.ml_metadata.GetExecutionTypesResponse.prototype.clearExecutionTypesList = function() { - return this.setExecutionTypesList([]); +proto.ml_metadata.GetExecutionsByIDResponse.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); }; @@ -11441,8 +17792,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypesRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypesRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypeRequest.toObject(opt_includeInstance, this); }; @@ -11451,12 +17802,14 @@ proto.ml_metadata.GetContextTypesRequest.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypesRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -11471,23 +17824,23 @@ proto.ml_metadata.GetContextTypesRequest.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypesRequest} + * @return {!proto.ml_metadata.GetExecutionTypeRequest} */ -proto.ml_metadata.GetContextTypesRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypesRequest; - return proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypeRequest; + return proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypesRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypesRequest} + * @return {!proto.ml_metadata.GetExecutionTypeRequest} */ -proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11495,6 +17848,14 @@ proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 3: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -11512,9 +17873,9 @@ proto.ml_metadata.GetContextTypesRequest.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypesRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypeRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypesRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypeRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11522,16 +17883,30 @@ proto.ml_metadata.GetContextTypesRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypesRequest} message + * @param {!proto.ml_metadata.GetExecutionTypeRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypeRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 1, + 3, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -11540,29 +17915,101 @@ proto.ml_metadata.GetContextTypesRequest.serializeBinaryToWriter = function(mess /** - * optional TransactionOptions transaction_options = 1; + * optional string type_name = 1; + * @return {string} + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional string type_version = 2; + * @return {string} + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 3; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextTypesRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypeRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 1)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this */ -proto.ml_metadata.GetContextTypesRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextTypesRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this */ -proto.ml_metadata.GetContextTypesRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -11571,19 +18018,12 @@ proto.ml_metadata.GetContextTypesRequest.prototype.clearTransactionOptions = fun * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypesRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetContextTypesResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -11599,8 +18039,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypesResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypesResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypeResponse.toObject(opt_includeInstance, this); }; @@ -11609,14 +18049,13 @@ proto.ml_metadata.GetContextTypesResponse.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypesResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), - ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance) + executionType: (f = msg.getExecutionType()) && ml_metadata_proto_metadata_store_pb.ExecutionType.toObject(includeInstance, f) }; if (includeInstance) { @@ -11630,23 +18069,23 @@ proto.ml_metadata.GetContextTypesResponse.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypesResponse} + * @return {!proto.ml_metadata.GetExecutionTypeResponse} */ -proto.ml_metadata.GetContextTypesResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypesResponse; - return proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypeResponse; + return proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypesResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypesResponse} + * @return {!proto.ml_metadata.GetExecutionTypeResponse} */ -proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11654,9 +18093,9 @@ proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ContextType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); - msg.addContextTypes(value); + var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); + msg.setExecutionType(value); break; default: reader.skipField(); @@ -11671,9 +18110,9 @@ proto.ml_metadata.GetContextTypesResponse.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypesResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypesResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11681,62 +18120,68 @@ proto.ml_metadata.GetContextTypesResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypesResponse} message + * @param {!proto.ml_metadata.GetExecutionTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextTypesList(); - if (f.length > 0) { - writer.writeRepeatedMessage( + f = message.getExecutionType(); + if (f != null) { + writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter ); } }; /** - * repeated ContextType context_types = 1; - * @return {!Array} - */ -proto.ml_metadata.GetContextTypesResponse.prototype.getContextTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); + * optional ExecutionType execution_type = 1; + * @return {?proto.ml_metadata.ExecutionType} + */ +proto.ml_metadata.GetExecutionTypeResponse.prototype.getExecutionType = function() { + return /** @type{?proto.ml_metadata.ExecutionType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetContextTypesResponse} returns this + * @param {?proto.ml_metadata.ExecutionType|undefined} value + * @return {!proto.ml_metadata.GetExecutionTypeResponse} returns this */ -proto.ml_metadata.GetContextTypesResponse.prototype.setContextTypesList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionTypeResponse.prototype.setExecutionType = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ContextType=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.ContextType} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionTypeResponse} returns this */ -proto.ml_metadata.GetContextTypesResponse.prototype.addContextTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ContextType, opt_index); +proto.ml_metadata.GetExecutionTypeResponse.prototype.clearExecutionType = function() { + return this.setExecutionType(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextTypesResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetContextTypesResponse.prototype.clearContextTypesList = function() { - return this.setContextTypesList([]); +proto.ml_metadata.GetExecutionTypeResponse.prototype.hasExecutionType = function() { + return jspb.Message.getField(this, 1) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetEventsByExecutionIDsRequest.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -11752,8 +18197,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetEventsByExecutionIDsRequest.toObject(opt_includeInstance, this); }; @@ -11762,15 +18207,13 @@ proto.ml_metadata.GetExecutionsByTypeRequest.prototype.toObject = function(opt_i * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetEventsByExecutionIDsRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -11785,23 +18228,23 @@ proto.ml_metadata.GetExecutionsByTypeRequest.toObject = function(includeInstance /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} */ -proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByTypeRequest; - return proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetEventsByExecutionIDsRequest; + return proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} */ -proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -11809,19 +18252,12 @@ proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader = funct var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addExecutionIds(values[i]); + } break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); - msg.setOptions(value); - break; - case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -11839,9 +18275,9 @@ proto.ml_metadata.GetExecutionsByTypeRequest.deserializeBinaryFromReader = funct * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetEventsByExecutionIDsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -11849,38 +18285,23 @@ proto.ml_metadata.GetExecutionsByTypeRequest.prototype.serializeBinary = functio /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByTypeRequest} message + * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetEventsByExecutionIDsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( + f = message.getExecutionIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( 1, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = message.getOptions(); - if (f != null) { - writer.writeMessage( - 3, - f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter - ); - } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 4, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -11889,138 +18310,66 @@ proto.ml_metadata.GetExecutionsByTypeRequest.serializeBinaryToWriter = function( /** - * optional string type_name = 1; - * @return {string} - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional string type_version = 2; - * @return {string} - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} + * repeated int64 execution_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.getExecutionIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * optional ListOperationOptions options = 3; - * @return {?proto.ml_metadata.ListOperationOptions} + * @param {!Array} value + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 3)); -}; - - -/** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this -*/ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.setExecutionIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.addExecutionIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.clearExecutionIdsList = function() { + return this.setExecutionIdsList([]); }; /** - * optional TransactionOptions transaction_options = 4; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByTypeRequest} returns this + * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -12029,8 +18378,8 @@ proto.ml_metadata.GetExecutionsByTypeRequest.prototype.clearTransactionOptions = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -12040,7 +18389,7 @@ proto.ml_metadata.GetExecutionsByTypeRequest.prototype.hasTransactionOptions = f * @private {!Array} * @const */ -proto.ml_metadata.GetExecutionsByTypeResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetEventsByExecutionIDsResponse.repeatedFields_ = [1]; @@ -12057,8 +18406,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetEventsByExecutionIDsResponse.toObject(opt_includeInstance, this); }; @@ -12067,15 +18416,14 @@ proto.ml_metadata.GetExecutionsByTypeResponse.prototype.toObject = function(opt_ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetEventsByExecutionIDsResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), - ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + eventsList: jspb.Message.toObjectList(msg.getEventsList(), + ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance) }; if (includeInstance) { @@ -12089,23 +18437,23 @@ proto.ml_metadata.GetExecutionsByTypeResponse.toObject = function(includeInstanc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} + * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} */ -proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByTypeResponse; - return proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetEventsByExecutionIDsResponse; + return proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} + * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} */ -proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12113,13 +18461,9 @@ proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader = func var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.addExecutions(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); + var value = new ml_metadata_proto_metadata_store_pb.Event; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); + msg.addEvents(value); break; default: reader.skipField(); @@ -12134,9 +18478,9 @@ proto.ml_metadata.GetExecutionsByTypeResponse.deserializeBinaryFromReader = func * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetEventsByExecutionIDsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -12144,104 +18488,68 @@ proto.ml_metadata.GetExecutionsByTypeResponse.prototype.serializeBinary = functi /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByTypeResponse} message + * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetEventsByExecutionIDsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionsList(); + f = message.getEventsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f + ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter ); } }; /** - * repeated Execution executions = 1; - * @return {!Array} + * repeated Event events = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.getExecutionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.getEventsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} returns this */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.setExecutionsList = function(value) { +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.setEventsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Execution=} opt_value + * @param {!proto.ml_metadata.Event=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Execution} + * @return {!proto.ml_metadata.Event} */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.addExecutions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.addEvents = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this - */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.clearExecutionsList = function() { - return this.setExecutionsList([]); -}; - - -/** - * optional string next_page_token = 2; - * @return {string} - */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this + * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} returns this */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.clearEventsList = function() { + return this.setEventsList([]); }; -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByTypeResponse} returns this - */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); -}; - /** - * Returns whether this field is set. - * @return {boolean} + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.GetExecutionsByTypeResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; -}; - - +proto.ml_metadata.GetEventsByArtifactIDsRequest.repeatedFields_ = [1]; @@ -12258,8 +18566,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionByTypeAndNameRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetEventsByArtifactIDsRequest.toObject(opt_includeInstance, this); }; @@ -12268,15 +18576,13 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.toObject = function * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetEventsByArtifactIDsRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - executionName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -12291,23 +18597,23 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.toObject = function(includeIn /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionByTypeAndNameRequest; - return proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetEventsByArtifactIDsRequest; + return proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12315,18 +18621,12 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader = var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addArtifactIds(values[i]); + } break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setExecutionName(value); - break; - case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -12344,9 +18644,9 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.deserializeBinaryFromReader = * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionByTypeAndNameRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetEventsByArtifactIDsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -12354,176 +18654,91 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.serializeBinary = f /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} message + * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetEventsByArtifactIDsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( - 1, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 4, - f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter - ); - } -}; - - -/** - * optional string type_name = 1; - * @return {string} - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional string type_version = 3; - * @return {string} - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 3, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 3, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 3) != null; + f = message.getArtifactIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( + 1, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } }; /** - * optional string execution_name = 2; - * @return {string} + * repeated int64 artifact_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getExecutionName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.getArtifactIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setExecutionName = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.setArtifactIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearExecutionName = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.addArtifactIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasExecutionName = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearArtifactIdsList = function() { + return this.setArtifactIdsList([]); }; /** - * optional TransactionOptions transaction_options = 4; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -12532,12 +18747,19 @@ proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.clearTransactionOpt * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionByTypeAndNameRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetEventsByArtifactIDsResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -12553,8 +18775,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionByTypeAndNameResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetEventsByArtifactIDsResponse.toObject(opt_includeInstance, this); }; @@ -12563,13 +18785,14 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.toObject = functio * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetEventsByArtifactIDsResponse.toObject = function(includeInstance, msg) { var f, obj = { - execution: (f = msg.getExecution()) && ml_metadata_proto_metadata_store_pb.Execution.toObject(includeInstance, f) + eventsList: jspb.Message.toObjectList(msg.getEventsList(), + ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance) }; if (includeInstance) { @@ -12583,23 +18806,23 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.toObject = function(includeI /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionByTypeAndNameResponse; - return proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetEventsByArtifactIDsResponse; + return proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12607,9 +18830,9 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.setExecution(value); + var value = new ml_metadata_proto_metadata_store_pb.Event; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); + msg.addEvents(value); break; default: reader.skipField(); @@ -12624,9 +18847,9 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.deserializeBinaryFromReader * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionByTypeAndNameResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetEventsByArtifactIDsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -12634,57 +18857,58 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.serializeBinary = /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} message + * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetEventsByArtifactIDsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecution(); - if (f != null) { - writer.writeMessage( + f = message.getEventsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter ); } }; /** - * optional Execution execution = 1; - * @return {?proto.ml_metadata.Execution} + * repeated Event events = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.getExecution = function() { - return /** @type{?proto.ml_metadata.Execution} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.getEventsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); }; /** - * @param {?proto.ml_metadata.Execution|undefined} value - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.setExecution = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.setEventsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionByTypeAndNameResponse} returns this + * @param {!proto.ml_metadata.Event=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Event} */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.clearExecution = function() { - return this.setExecution(undefined); +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.addEvents = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} returns this */ -proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.hasExecution = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.clearEventsList = function() { + return this.setEventsList([]); }; @@ -12694,7 +18918,7 @@ proto.ml_metadata.GetExecutionByTypeAndNameResponse.prototype.hasExecution = fun * @private {!Array} * @const */ -proto.ml_metadata.GetExecutionsByIDRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactTypesByIDRequest.repeatedFields_ = [1]; @@ -12711,8 +18935,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesByIDRequest.toObject(opt_includeInstance, this); }; @@ -12721,13 +18945,13 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { - executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -12742,23 +18966,23 @@ proto.ml_metadata.GetExecutionsByIDRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} */ -proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByIDRequest; - return proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesByIDRequest; + return proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} */ -proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12768,7 +18992,7 @@ proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader = functio case 1: var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); for (var i = 0; i < values.length; i++) { - msg.addExecutionIds(values[i]); + msg.addTypeIds(values[i]); } break; case 2: @@ -12789,9 +19013,9 @@ proto.ml_metadata.GetExecutionsByIDRequest.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByIDRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -12799,13 +19023,13 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByIDRequest} message + * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByIDRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionIdsList(); + f = message.getTypeIdsList(); if (f.length > 0) { writer.writeRepeatedInt64( 1, @@ -12824,19 +19048,19 @@ proto.ml_metadata.GetExecutionsByIDRequest.serializeBinaryToWriter = function(me /** - * repeated int64 execution_ids = 1; + * repeated int64 type_ids = 1; * @return {!Array} */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.getExecutionIdsList = function() { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.getTypeIdsList = function() { return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.setExecutionIdsList = function(value) { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.setTypeIdsList = function(value) { return jspb.Message.setField(this, 1, value || []); }; @@ -12844,19 +19068,19 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.setExecutionIdsList = funct /** * @param {number} value * @param {number=} opt_index - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.addExecutionIds = function(value, opt_index) { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearExecutionIdsList = function() { - return this.setExecutionIdsList([]); +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTypeIdsList = function() { + return this.setTypeIdsList([]); }; @@ -12864,7 +19088,7 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearExecutionIdsList = fun * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -12872,18 +19096,18 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.getTransactionOptions = fun /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByIDRequest} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -12892,7 +19116,7 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.clearTransactionOptions = f * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsByIDRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -12903,7 +19127,7 @@ proto.ml_metadata.GetExecutionsByIDRequest.prototype.hasTransactionOptions = fun * @private {!Array} * @const */ -proto.ml_metadata.GetExecutionsByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactTypesByIDResponse.repeatedFields_ = [1]; @@ -12920,8 +19144,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactTypesByIDResponse.toObject(opt_includeInstance, this); }; @@ -12930,14 +19154,14 @@ proto.ml_metadata.GetExecutionsByIDResponse.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactTypesByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), - ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance) + artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), + ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) }; if (includeInstance) { @@ -12951,23 +19175,23 @@ proto.ml_metadata.GetExecutionsByIDResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByIDResponse} + * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} */ -proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByIDResponse; - return proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactTypesByIDResponse; + return proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByIDResponse} + * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} */ -proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -12975,9 +19199,9 @@ proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader = functi var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.addExecutions(value); + var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); + msg.addArtifactTypes(value); break; default: reader.skipField(); @@ -12992,9 +19216,9 @@ proto.ml_metadata.GetExecutionsByIDResponse.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactTypesByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13002,62 +19226,69 @@ proto.ml_metadata.GetExecutionsByIDResponse.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByIDResponse} message + * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionsList(); + f = message.getArtifactTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter ); } }; /** - * repeated Execution executions = 1; - * @return {!Array} + * repeated ArtifactType artifact_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.getExecutionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.getArtifactTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionsByIDResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} returns this */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.setExecutionsList = function(value) { +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.setArtifactTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Execution=} opt_value + * @param {!proto.ml_metadata.ArtifactType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Execution} + * @return {!proto.ml_metadata.ArtifactType} */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.addExecutions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionsByIDResponse} returns this + * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} returns this */ -proto.ml_metadata.GetExecutionsByIDResponse.prototype.clearExecutionsList = function() { - return this.setExecutionsList([]); +proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.clearArtifactTypesList = function() { + return this.setArtifactTypesList([]); }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionTypesByIDRequest.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -13073,8 +19304,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesByIDRequest.toObject(opt_includeInstance, this); }; @@ -13083,14 +19314,13 @@ proto.ml_metadata.GetExecutionTypeRequest.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -13105,23 +19335,23 @@ proto.ml_metadata.GetExecutionTypeRequest.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypeRequest} + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} */ -proto.ml_metadata.GetExecutionTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypeRequest; - return proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesByIDRequest; + return proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypeRequest} + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} */ -proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -13129,14 +19359,12 @@ proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader = function var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addTypeIds(values[i]); + } break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 3: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -13154,9 +19382,9 @@ proto.ml_metadata.GetExecutionTypeRequest.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13164,30 +19392,23 @@ proto.ml_metadata.GetExecutionTypeRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypeRequest} message + * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeString( + f = message.getTypeIdsList(); + if (f.length > 0) { + writer.writeRepeatedInt64( 1, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 3, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -13196,101 +19417,66 @@ proto.ml_metadata.GetExecutionTypeRequest.serializeBinaryToWriter = function(mes /** - * optional string type_name = 1; - * @return {string} - */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.setTypeName = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this - */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTypeName = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTypeName = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional string type_version = 2; - * @return {string} + * repeated int64 type_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.getTypeIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.setTypeIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.clearTypeIdsList = function() { + return this.setTypeIdsList([]); }; /** - * optional TransactionOptions transaction_options = 3; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypeRequest} returns this + * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -13299,12 +19485,19 @@ proto.ml_metadata.GetExecutionTypeRequest.prototype.clearTransactionOptions = fu * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetExecutionTypesByIDResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -13320,8 +19513,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionTypesByIDResponse.toObject(opt_includeInstance, this); }; @@ -13330,13 +19523,14 @@ proto.ml_metadata.GetExecutionTypeResponse.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionTypesByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionType: (f = msg.getExecutionType()) && ml_metadata_proto_metadata_store_pb.ExecutionType.toObject(includeInstance, f) + executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), + ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance) }; if (includeInstance) { @@ -13350,23 +19544,23 @@ proto.ml_metadata.GetExecutionTypeResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypeResponse} + * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} */ -proto.ml_metadata.GetExecutionTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypeResponse; - return proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionTypesByIDResponse; + return proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypeResponse} + * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} */ -proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -13376,7 +19570,7 @@ proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader = functio case 1: var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); - msg.setExecutionType(value); + msg.addExecutionTypes(value); break; default: reader.skipField(); @@ -13391,9 +19585,9 @@ proto.ml_metadata.GetExecutionTypeResponse.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionTypesByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13401,15 +19595,15 @@ proto.ml_metadata.GetExecutionTypeResponse.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypeResponse} message + * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionType(); - if (f != null) { - writer.writeMessage( + f = message.getExecutionTypesList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter @@ -13419,50 +19613,44 @@ proto.ml_metadata.GetExecutionTypeResponse.serializeBinaryToWriter = function(me /** - * optional ExecutionType execution_type = 1; - * @return {?proto.ml_metadata.ExecutionType} + * repeated ExecutionType execution_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.getExecutionType = function() { - return /** @type{?proto.ml_metadata.ExecutionType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.getExecutionTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); }; /** - * @param {?proto.ml_metadata.ExecutionType|undefined} value - * @return {!proto.ml_metadata.GetExecutionTypeResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} returns this */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.setExecutionType = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.setExecutionTypesList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypeResponse} returns this + * @param {!proto.ml_metadata.ExecutionType=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.ExecutionType} */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.clearExecutionType = function() { - return this.setExecutionType(undefined); +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.addExecutionTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ExecutionType, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} returns this */ -proto.ml_metadata.GetExecutionTypeResponse.prototype.hasExecutionType = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.clearExecutionTypesList = function() { + return this.setExecutionTypesList([]); }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -13478,8 +19666,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetEventsByExecutionIDsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypeRequest.toObject(opt_includeInstance, this); }; @@ -13488,13 +19676,14 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.toObject = function(o * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { - executionIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -13509,23 +19698,23 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.toObject = function(includeInst /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} + * @return {!proto.ml_metadata.GetContextTypeRequest} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetEventsByExecutionIDsRequest; - return proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypeRequest; + return proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} + * @return {!proto.ml_metadata.GetContextTypeRequest} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -13533,12 +19722,14 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader = f var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addExecutionIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); break; case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 3: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -13556,9 +19747,9 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.deserializeBinaryFromReader = f * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypeRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetEventsByExecutionIDsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13566,23 +19757,30 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.serializeBinary = fun /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetEventsByExecutionIDsRequest} message + * @param {!proto.ml_metadata.GetContextTypeRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( 1, f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f + ); + } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 2, + 3, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -13591,66 +19789,101 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.serializeBinaryToWriter = funct /** - * repeated int64 execution_ids = 1; - * @return {!Array} + * optional string type_name = 1; + * @return {string} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.getExecutionIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetContextTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.setExecutionIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); +proto.ml_metadata.GetContextTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.addExecutionIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.clearExecutionIdsList = function() { - return this.setExecutionIdsList([]); +proto.ml_metadata.GetContextTypeRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional TransactionOptions transaction_options = 2; + * optional string type_version = 2; + * @return {string} + */ +proto.ml_metadata.GetContextTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + */ +proto.ml_metadata.GetContextTypeRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + */ +proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 3; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextTypeRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetContextTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypeRequest} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextTypeRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -13659,19 +19892,12 @@ proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.clearTransactionOptio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetEventsByExecutionIDsRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetContextTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -13687,8 +19913,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetEventsByExecutionIDsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypeResponse.toObject(opt_includeInstance, this); }; @@ -13697,14 +19923,13 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.toObject = function( * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - eventsList: jspb.Message.toObjectList(msg.getEventsList(), - ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance) + contextType: (f = msg.getContextType()) && ml_metadata_proto_metadata_store_pb.ContextType.toObject(includeInstance, f) }; if (includeInstance) { @@ -13718,23 +19943,23 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.toObject = function(includeIns /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} + * @return {!proto.ml_metadata.GetContextTypeResponse} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetEventsByExecutionIDsResponse; - return proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypeResponse; + return proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} + * @return {!proto.ml_metadata.GetContextTypeResponse} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -13742,9 +19967,9 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader = var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Event; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); - msg.addEvents(value); + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.setContextType(value); break; default: reader.skipField(); @@ -13759,9 +19984,9 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.deserializeBinaryFromReader = * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetEventsByExecutionIDsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13769,58 +19994,57 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.serializeBinary = fu /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetEventsByExecutionIDsResponse} message + * @param {!proto.ml_metadata.GetContextTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getEventsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( + f = message.getContextType(); + if (f != null) { + writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter ); } }; /** - * repeated Event events = 1; - * @return {!Array} + * optional ContextType context_type = 1; + * @return {?proto.ml_metadata.ContextType} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.getEventsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); +proto.ml_metadata.GetContextTypeResponse.prototype.getContextType = function() { + return /** @type{?proto.ml_metadata.ContextType} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} returns this + * @param {?proto.ml_metadata.ContextType|undefined} value + * @return {!proto.ml_metadata.GetContextTypeResponse} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.setEventsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); +proto.ml_metadata.GetContextTypeResponse.prototype.setContextType = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Event=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Event} + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetContextTypeResponse} returns this */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.addEvents = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); +proto.ml_metadata.GetContextTypeResponse.prototype.clearContextType = function() { + return this.setContextType(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetEventsByExecutionIDsResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.clearEventsList = function() { - return this.setEventsList([]); +proto.ml_metadata.GetContextTypeResponse.prototype.hasContextType = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -13830,7 +20054,7 @@ proto.ml_metadata.GetEventsByExecutionIDsResponse.prototype.clearEventsList = fu * @private {!Array} * @const */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetContextTypesByIDRequest.repeatedFields_ = [1]; @@ -13847,8 +20071,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetEventsByArtifactIDsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesByIDRequest.toObject(opt_includeInstance, this); }; @@ -13857,13 +20081,13 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -13878,23 +20102,23 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetEventsByArtifactIDsRequest; - return proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesByIDRequest; + return proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -13904,7 +20128,7 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader = fu case 1: var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); for (var i = 0; i < values.length; i++) { - msg.addArtifactIds(values[i]); + msg.addTypeIds(values[i]); } break; case 2: @@ -13925,9 +20149,9 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetEventsByArtifactIDsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -13935,13 +20159,13 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetEventsByArtifactIDsRequest} message + * @param {!proto.ml_metadata.GetContextTypesByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactIdsList(); + f = message.getTypeIdsList(); if (f.length > 0) { writer.writeRepeatedInt64( 1, @@ -13960,19 +20184,19 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.serializeBinaryToWriter = functi /** - * repeated int64 artifact_ids = 1; + * repeated int64 type_ids = 1; * @return {!Array} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.getArtifactIdsList = function() { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.getTypeIdsList = function() { return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** * @param {!Array} value - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.setArtifactIdsList = function(value) { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.setTypeIdsList = function(value) { return jspb.Message.setField(this, 1, value || []); }; @@ -13980,19 +20204,19 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.setArtifactIdsList = f /** * @param {number} value * @param {number=} opt_index - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.addArtifactIds = function(value, opt_index) { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearArtifactIdsList = function() { - return this.setArtifactIdsList([]); +proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTypeIdsList = function() { + return this.setTypeIdsList([]); }; @@ -14000,7 +20224,7 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearArtifactIdsList = * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -14008,18 +20232,18 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.getTransactionOptions /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsRequest} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -14028,7 +20252,7 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.clearTransactionOption * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetContextTypesByIDRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -14039,7 +20263,7 @@ proto.ml_metadata.GetEventsByArtifactIDsRequest.prototype.hasTransactionOptions * @private {!Array} * @const */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextTypesByIDResponse.repeatedFields_ = [1]; @@ -14056,8 +20280,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetEventsByArtifactIDsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextTypesByIDResponse.toObject(opt_includeInstance, this); }; @@ -14066,14 +20290,14 @@ proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.toObject = function(o * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextTypesByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextTypesByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - eventsList: jspb.Message.toObjectList(msg.getEventsList(), - ml_metadata_proto_metadata_store_pb.Event.toObject, includeInstance) + contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), + ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance) }; if (includeInstance) { @@ -14087,23 +20311,23 @@ proto.ml_metadata.GetEventsByArtifactIDsResponse.toObject = function(includeInst /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} + * @return {!proto.ml_metadata.GetContextTypesByIDResponse} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetEventsByArtifactIDsResponse; - return proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextTypesByIDResponse; + return proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextTypesByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} + * @return {!proto.ml_metadata.GetContextTypesByIDResponse} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -14111,9 +20335,9 @@ proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader = f var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Event; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Event.deserializeBinaryFromReader); - msg.addEvents(value); + var value = new ml_metadata_proto_metadata_store_pb.ContextType; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); + msg.addContextTypes(value); break; default: reader.skipField(); @@ -14128,9 +20352,9 @@ proto.ml_metadata.GetEventsByArtifactIDsResponse.deserializeBinaryFromReader = f * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextTypesByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetEventsByArtifactIDsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextTypesByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -14138,69 +20362,62 @@ proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.serializeBinary = fun /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetEventsByArtifactIDsResponse} message + * @param {!proto.ml_metadata.GetContextTypesByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getEventsList(); + f = message.getContextTypesList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Event.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter ); } }; /** - * repeated Event events = 1; - * @return {!Array} + * repeated ContextType context_types = 1; + * @return {!Array} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.getEventsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Event, 1)); +proto.ml_metadata.GetContextTypesByIDResponse.prototype.getContextTypesList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextTypesByIDResponse} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.setEventsList = function(value) { +proto.ml_metadata.GetContextTypesByIDResponse.prototype.setContextTypesList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Event=} opt_value + * @param {!proto.ml_metadata.ContextType=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Event} + * @return {!proto.ml_metadata.ContextType} */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.addEvents = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Event, opt_index); +proto.ml_metadata.GetContextTypesByIDResponse.prototype.addContextTypes = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ContextType, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetEventsByArtifactIDsResponse} returns this + * @return {!proto.ml_metadata.GetContextTypesByIDResponse} returns this */ -proto.ml_metadata.GetEventsByArtifactIDsResponse.prototype.clearEventsList = function() { - return this.setEventsList([]); +proto.ml_metadata.GetContextTypesByIDResponse.prototype.clearContextTypesList = function() { + return this.setContextTypesList([]); }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetArtifactTypesByIDRequest.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -14216,8 +20433,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypesByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsRequest.toObject(opt_includeInstance, this); }; @@ -14226,13 +20443,13 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.toObject = function(opt_ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -14247,23 +20464,23 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.toObject = function(includeInstanc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsRequest} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypesByIDRequest; - return proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsRequest; + return proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsRequest} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -14271,10 +20488,9 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader = func var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addTypeIds(values[i]); - } + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -14294,9 +20510,9 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.deserializeBinaryFromReader = func * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypesByIDRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -14304,17 +20520,18 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.serializeBinary = functi /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypesByIDRequest} message + * @param {!proto.ml_metadata.GetContextsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getTypeIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( + f = message.getOptions(); + if (f != null) { + writer.writeMessage( 1, - f + f, + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter ); } f = message.getTransactionOptions(); @@ -14329,39 +20546,39 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.serializeBinaryToWriter = function /** - * repeated int64 type_ids = 1; - * @return {!Array} + * optional ListOperationOptions options = 1; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.getTypeIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetContextsRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this - */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.setTypeIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetContextsRequest} returns this +*/ +proto.ml_metadata.GetContextsRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetContextsRequest} returns this */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetContextsRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTypeIdsList = function() { - return this.setTypeIdsList([]); +proto.ml_metadata.GetContextsRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -14369,7 +20586,7 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTypeIdsList = funct * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -14377,18 +20594,18 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.getTransactionOptions = /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsRequest} returns this */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetContextsRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsRequest} returns this */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -14397,7 +20614,7 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.clearTransactionOptions * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetContextsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -14408,7 +20625,7 @@ proto.ml_metadata.GetArtifactTypesByIDRequest.prototype.hasTransactionOptions = * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactTypesByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsResponse.repeatedFields_ = [1]; @@ -14425,8 +20642,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactTypesByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsResponse.toObject(opt_includeInstance, this); }; @@ -14435,14 +20652,15 @@ proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.toObject = function(opt * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactTypesList: jspb.Message.toObjectList(msg.getArtifactTypesList(), - ml_metadata_proto_metadata_store_pb.ArtifactType.toObject, includeInstance) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -14456,23 +20674,23 @@ proto.ml_metadata.GetArtifactTypesByIDResponse.toObject = function(includeInstan /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsResponse} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactTypesByIDResponse; - return proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsResponse; + return proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsResponse} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -14480,9 +20698,13 @@ proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader = fun var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ArtifactType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ArtifactType.deserializeBinaryFromReader); - msg.addArtifactTypes(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -14497,9 +20719,9 @@ proto.ml_metadata.GetArtifactTypesByIDResponse.deserializeBinaryFromReader = fun * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactTypesByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -14507,68 +20729,104 @@ proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.serializeBinary = funct /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactTypesByIDResponse} message + * @param {!proto.ml_metadata.GetContextsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactTypesList(); + f = message.getContextsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ArtifactType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f ); } }; /** - * repeated ArtifactType artifact_types = 1; - * @return {!Array} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.getArtifactTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ArtifactType, 1)); +proto.ml_metadata.GetContextsResponse.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextsResponse} returns this */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.setArtifactTypesList = function(value) { +proto.ml_metadata.GetContextsResponse.prototype.setContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ArtifactType=} opt_value + * @param {!proto.ml_metadata.Context=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.ArtifactType} + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.addArtifactTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ArtifactType, opt_index); +proto.ml_metadata.GetContextsResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactTypesByIDResponse} returns this + * @return {!proto.ml_metadata.GetContextsResponse} returns this */ -proto.ml_metadata.GetArtifactTypesByIDResponse.prototype.clearArtifactTypesList = function() { - return this.setArtifactTypesList([]); +proto.ml_metadata.GetContextsResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); +}; + + +/** + * optional string next_page_token = 2; + * @return {string} + */ +proto.ml_metadata.GetContextsResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; +/** + * @param {string} value + * @return {!proto.ml_metadata.GetContextsResponse} returns this + */ +proto.ml_metadata.GetContextsResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); +}; + /** - * List of repeated fields within this message type. - * @private {!Array} - * @const + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextsResponse} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextsResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; +}; + + @@ -14585,8 +20843,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypesByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByTypeRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByTypeRequest.toObject(opt_includeInstance, this); }; @@ -14595,13 +20853,15 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.toObject = function(opt * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByTypeRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByTypeRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -14616,23 +20876,23 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.toObject = function(includeInstan /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsByTypeRequest} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByTypeRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypesByIDRequest; - return proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByTypeRequest; + return proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByTypeRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsByTypeRequest} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -14640,12 +20900,19 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader = fun var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addTypeIds(values[i]); - } + var value = /** @type {string} */ (reader.readString()); + msg.setTypeName(value); break; case 2: + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 3: + var value = /** @type {string} */ (reader.readString()); + msg.setTypeVersion(value); + break; + case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -14660,104 +20927,191 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.deserializeBinaryFromReader = fun /** - * Serializes the message to binary data (in protobuf wire format). - * @return {!Uint8Array} + * Serializes the message to binary data (in protobuf wire format). + * @return {!Uint8Array} + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.serializeBinary = function() { + var writer = new jspb.BinaryWriter(); + proto.ml_metadata.GetContextsByTypeRequest.serializeBinaryToWriter(this, writer); + return writer.getResultBuffer(); +}; + + +/** + * Serializes the given message to binary data (in protobuf wire + * format), writing to the given BinaryWriter. + * @param {!proto.ml_metadata.GetContextsByTypeRequest} message + * @param {!jspb.BinaryWriter} writer + * @suppress {unusedLocalVariables} f is only used for nested messages + */ +proto.ml_metadata.GetContextsByTypeRequest.serializeBinaryToWriter = function(message, writer) { + var f = undefined; + f = /** @type {string} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeString( + 1, + f + ); + } + f = message.getOptions(); + if (f != null) { + writer.writeMessage( + 2, + f, + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 4, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } +}; + + +/** + * optional string type_name = 1; + * @return {string} + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.getTypeName = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.setTypeName = function(value) { + return jspb.Message.setField(this, 1, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTypeName = function() { + return jspb.Message.setField(this, 1, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTypeName = function() { + return jspb.Message.getField(this, 1) != null; +}; + + +/** + * optional ListOperationOptions options = 2; + * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.serializeBinary = function() { - var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypesByIDRequest.serializeBinaryToWriter(this, writer); - return writer.getResultBuffer(); +proto.ml_metadata.GetContextsByTypeRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 2)); }; /** - * Serializes the given message to binary data (in protobuf wire - * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypesByIDRequest} message - * @param {!jspb.BinaryWriter} writer - * @suppress {unusedLocalVariables} f is only used for nested messages + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this +*/ +proto.ml_metadata.GetContextsByTypeRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { - var f = undefined; - f = message.getTypeIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( - 1, - f - ); - } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 2, - f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter - ); - } +proto.ml_metadata.GetContextsByTypeRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); }; /** - * repeated int64 type_ids = 1; - * @return {!Array} + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.getTypeIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetContextsByTypeRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 2) != null; }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this + * optional string type_version = 3; + * @return {string} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.setTypeIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); +proto.ml_metadata.GetContextsByTypeRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this + * @param {string} value + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetContextsByTypeRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 3, value); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.clearTypeIdsList = function() { - return this.setTypeIdsList([]); +proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 3, undefined); }; /** - * optional TransactionOptions transaction_options = 2; + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 4; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsByTypeRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetContextsByTypeRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -14766,8 +21120,8 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.clearTransactionOptions * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 4) != null; }; @@ -14777,7 +21131,7 @@ proto.ml_metadata.GetExecutionTypesByIDRequest.prototype.hasTransactionOptions = * @private {!Array} * @const */ -proto.ml_metadata.GetExecutionTypesByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsByTypeResponse.repeatedFields_ = [1]; @@ -14794,8 +21148,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionTypesByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByTypeResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByTypeResponse.toObject(opt_includeInstance, this); }; @@ -14804,14 +21158,15 @@ proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByTypeResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByTypeResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionTypesList: jspb.Message.toObjectList(msg.getExecutionTypesList(), - ml_metadata_proto_metadata_store_pb.ExecutionType.toObject, includeInstance) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -14825,23 +21180,23 @@ proto.ml_metadata.GetExecutionTypesByIDResponse.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsByTypeResponse} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByTypeResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionTypesByIDResponse; - return proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByTypeResponse; + return proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByTypeResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsByTypeResponse} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -14849,9 +21204,13 @@ proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader = fu var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ExecutionType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ExecutionType.deserializeBinaryFromReader); - msg.addExecutionTypes(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -14866,9 +21225,9 @@ proto.ml_metadata.GetExecutionTypesByIDResponse.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByTypeResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionTypesByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -14876,58 +21235,101 @@ proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionTypesByIDResponse} message + * @param {!proto.ml_metadata.GetContextsByTypeResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionTypesList(); + f = message.getContextsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ExecutionType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f ); } }; /** - * repeated ExecutionType execution_types = 1; - * @return {!Array} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.getExecutionTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ExecutionType, 1)); +proto.ml_metadata.GetContextsByTypeResponse.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.setExecutionTypesList = function(value) { +proto.ml_metadata.GetContextsByTypeResponse.prototype.setContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ExecutionType=} opt_value + * @param {!proto.ml_metadata.Context=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.ExecutionType} + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.addExecutionTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ExecutionType, opt_index); +proto.ml_metadata.GetContextsByTypeResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionTypesByIDResponse} returns this + * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this */ -proto.ml_metadata.GetExecutionTypesByIDResponse.prototype.clearExecutionTypesList = function() { - return this.setExecutionTypesList([]); +proto.ml_metadata.GetContextsByTypeResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); +}; + + +/** + * optional string next_page_token = 2; + * @return {string} + */ +proto.ml_metadata.GetContextsByTypeResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this + */ +proto.ml_metadata.GetContextsByTypeResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this + */ +proto.ml_metadata.GetContextsByTypeResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextsByTypeResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -14947,8 +21349,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextByTypeAndNameRequest.toObject(opt_includeInstance, this); }; @@ -14957,14 +21359,15 @@ proto.ml_metadata.GetContextTypeRequest.prototype.toObject = function(opt_includ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextByTypeAndNameRequest.toObject = function(includeInstance, msg) { var f, obj = { typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + contextName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -14979,23 +21382,23 @@ proto.ml_metadata.GetContextTypeRequest.toObject = function(includeInstance, msg /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypeRequest} + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} */ -proto.ml_metadata.GetContextTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypeRequest; - return proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextByTypeAndNameRequest; + return proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypeRequest} + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} */ -proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15006,11 +21409,15 @@ proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader = function(m var value = /** @type {string} */ (reader.readString()); msg.setTypeName(value); break; - case 2: + case 3: var value = /** @type {string} */ (reader.readString()); msg.setTypeVersion(value); break; - case 3: + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setContextName(value); + break; + case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -15028,9 +21435,9 @@ proto.ml_metadata.GetContextTypeRequest.deserializeBinaryFromReader = function(m * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextByTypeAndNameRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -15038,11 +21445,11 @@ proto.ml_metadata.GetContextTypeRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypeRequest} message + * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = /** @type {string} */ (jspb.Message.getField(message, 1)); if (f != null) { @@ -15051,6 +21458,13 @@ proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter = function(messa f ); } + f = /** @type {string} */ (jspb.Message.getField(message, 3)); + if (f != null) { + writer.writeString( + 3, + f + ); + } f = /** @type {string} */ (jspb.Message.getField(message, 2)); if (f != null) { writer.writeString( @@ -15061,7 +21475,7 @@ proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter = function(messa f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 3, + 4, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -15073,25 +21487,25 @@ proto.ml_metadata.GetContextTypeRequest.serializeBinaryToWriter = function(messa * optional string type_name = 1; * @return {string} */ -proto.ml_metadata.GetContextTypeRequest.prototype.getTypeName = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTypeName = function() { return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); }; /** * @param {string} value - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.setTypeName = function(value) { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTypeName = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeName = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTypeName = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -15100,34 +21514,70 @@ proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeName = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypeRequest.prototype.hasTypeName = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTypeName = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional string type_version = 2; + * optional string type_version = 3; * @return {string} */ -proto.ml_metadata.GetContextTypeRequest.prototype.getTypeVersion = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTypeVersion = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + */ +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTypeVersion = function(value) { + return jspb.Message.setField(this, 3, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + */ +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTypeVersion = function() { + return jspb.Message.setField(this, 3, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTypeVersion = function() { + return jspb.Message.getField(this, 3) != null; +}; + + +/** + * optional string context_name = 2; + * @return {string} + */ +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getContextName = function() { return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** * @param {string} value - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.setTypeVersion = function(value) { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setContextName = function(value) { return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeVersion = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearContextName = function() { return jspb.Message.setField(this, 2, undefined); }; @@ -15136,35 +21586,35 @@ proto.ml_metadata.GetContextTypeRequest.prototype.clearTypeVersion = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypeRequest.prototype.hasTypeVersion = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasContextName = function() { return jspb.Message.getField(this, 2) != null; }; /** - * optional TransactionOptions transaction_options = 3; + * optional TransactionOptions transaction_options = 4; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextTypeRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 4, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this */ -proto.ml_metadata.GetContextTypeRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -15173,8 +21623,8 @@ proto.ml_metadata.GetContextTypeRequest.prototype.clearTransactionOptions = func * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 4) != null; }; @@ -15194,8 +21644,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextByTypeAndNameResponse.toObject(opt_includeInstance, this); }; @@ -15204,13 +21654,13 @@ proto.ml_metadata.GetContextTypeResponse.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextByTypeAndNameResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextType: (f = msg.getContextType()) && ml_metadata_proto_metadata_store_pb.ContextType.toObject(includeInstance, f) + context: (f = msg.getContext()) && ml_metadata_proto_metadata_store_pb.Context.toObject(includeInstance, f) }; if (includeInstance) { @@ -15224,23 +21674,23 @@ proto.ml_metadata.GetContextTypeResponse.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypeResponse} + * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} */ -proto.ml_metadata.GetContextTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypeResponse; - return proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextByTypeAndNameResponse; + return proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypeResponse} + * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} */ -proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15248,9 +21698,9 @@ proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ContextType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); - msg.setContextType(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.setContext(value); break; default: reader.skipField(); @@ -15265,9 +21715,9 @@ proto.ml_metadata.GetContextTypeResponse.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextByTypeAndNameResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -15275,48 +21725,48 @@ proto.ml_metadata.GetContextTypeResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypeResponse} message + * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextType(); + f = message.getContext(); if (f != null) { writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } }; /** - * optional ContextType context_type = 1; - * @return {?proto.ml_metadata.ContextType} + * optional Context context = 1; + * @return {?proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextTypeResponse.prototype.getContextType = function() { - return /** @type{?proto.ml_metadata.ContextType} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.getContext = function() { + return /** @type{?proto.ml_metadata.Context} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {?proto.ml_metadata.ContextType|undefined} value - * @return {!proto.ml_metadata.GetContextTypeResponse} returns this + * @param {?proto.ml_metadata.Context|undefined} value + * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} returns this */ -proto.ml_metadata.GetContextTypeResponse.prototype.setContextType = function(value) { +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.setContext = function(value) { return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextTypeResponse} returns this + * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} returns this */ -proto.ml_metadata.GetContextTypeResponse.prototype.clearContextType = function() { - return this.setContextType(undefined); +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.clearContext = function() { + return this.setContext(undefined); }; @@ -15324,7 +21774,7 @@ proto.ml_metadata.GetContextTypeResponse.prototype.clearContextType = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypeResponse.prototype.hasContextType = function() { +proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.hasContext = function() { return jspb.Message.getField(this, 1) != null; }; @@ -15335,7 +21785,7 @@ proto.ml_metadata.GetContextTypeResponse.prototype.hasContextType = function() { * @private {!Array} * @const */ -proto.ml_metadata.GetContextTypesByIDRequest.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsByIDRequest.repeatedFields_ = [1]; @@ -15352,8 +21802,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypesByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByIDRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByIDRequest.toObject(opt_includeInstance, this); }; @@ -15362,13 +21812,13 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.toObject = function(opt_i * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypesByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByIDRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByIDRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -15383,23 +21833,23 @@ proto.ml_metadata.GetContextTypesByIDRequest.toObject = function(includeInstance /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsByIDRequest} */ -proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByIDRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypesByIDRequest; - return proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByIDRequest; + return proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypesByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByIDRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} + * @return {!proto.ml_metadata.GetContextsByIDRequest} */ -proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15409,7 +21859,7 @@ proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader = funct case 1: var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); for (var i = 0; i < values.length; i++) { - msg.addTypeIds(values[i]); + msg.addContextIds(values[i]); } break; case 2: @@ -15430,9 +21880,9 @@ proto.ml_metadata.GetContextTypesByIDRequest.deserializeBinaryFromReader = funct * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByIDRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypesByIDRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByIDRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -15440,13 +21890,13 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.serializeBinary = functio /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypesByIDRequest} message + * @param {!proto.ml_metadata.GetContextsByIDRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesByIDRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByIDRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getTypeIdsList(); + f = message.getContextIdsList(); if (f.length > 0) { writer.writeRepeatedInt64( 1, @@ -15465,19 +21915,19 @@ proto.ml_metadata.GetContextTypesByIDRequest.serializeBinaryToWriter = function( /** - * repeated int64 type_ids = 1; + * repeated int64 context_ids = 1; * @return {!Array} */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.getTypeIdsList = function() { +proto.ml_metadata.GetContextsByIDRequest.prototype.getContextIdsList = function() { return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** * @param {!Array} value - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.setTypeIdsList = function(value) { +proto.ml_metadata.GetContextsByIDRequest.prototype.setContextIdsList = function(value) { return jspb.Message.setField(this, 1, value || []); }; @@ -15485,19 +21935,19 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.setTypeIdsList = function /** * @param {number} value * @param {number=} opt_index - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.addTypeIds = function(value, opt_index) { +proto.ml_metadata.GetContextsByIDRequest.prototype.addContextIds = function(value, opt_index) { return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTypeIdsList = function() { - return this.setTypeIdsList([]); +proto.ml_metadata.GetContextsByIDRequest.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); }; @@ -15505,7 +21955,7 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTypeIdsList = functi * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsByIDRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -15513,18 +21963,18 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.getTransactionOptions = f /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetContextsByIDRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextTypesByIDRequest} returns this + * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsByIDRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -15533,7 +21983,7 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.clearTransactionOptions = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextTypesByIDRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetContextsByIDRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -15544,7 +21994,7 @@ proto.ml_metadata.GetContextTypesByIDRequest.prototype.hasTransactionOptions = f * @private {!Array} * @const */ -proto.ml_metadata.GetContextTypesByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsByIDResponse.repeatedFields_ = [1]; @@ -15561,8 +22011,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextTypesByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByIDResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByIDResponse.toObject(opt_includeInstance, this); }; @@ -15571,14 +22021,14 @@ proto.ml_metadata.GetContextTypesByIDResponse.prototype.toObject = function(opt_ * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextTypesByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByIDResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByIDResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextTypesList: jspb.Message.toObjectList(msg.getContextTypesList(), - ml_metadata_proto_metadata_store_pb.ContextType.toObject, includeInstance) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -15592,23 +22042,23 @@ proto.ml_metadata.GetContextTypesByIDResponse.toObject = function(includeInstanc /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsByIDResponse} */ -proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByIDResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextTypesByIDResponse; - return proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByIDResponse; + return proto.ml_metadata.GetContextsByIDResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextTypesByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByIDResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextTypesByIDResponse} + * @return {!proto.ml_metadata.GetContextsByIDResponse} */ -proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15616,9 +22066,9 @@ proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader = func var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ContextType; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ContextType.deserializeBinaryFromReader); - msg.addContextTypes(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addContexts(value); break; default: reader.skipField(); @@ -15633,9 +22083,9 @@ proto.ml_metadata.GetContextTypesByIDResponse.deserializeBinaryFromReader = func * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByIDResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextTypesByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByIDResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -15643,58 +22093,58 @@ proto.ml_metadata.GetContextTypesByIDResponse.prototype.serializeBinary = functi /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextTypesByIDResponse} message + * @param {!proto.ml_metadata.GetContextsByIDResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextTypesByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByIDResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextTypesList(); + f = message.getContextsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.ContextType.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } }; /** - * repeated ContextType context_types = 1; - * @return {!Array} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.getContextTypesList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.ContextType, 1)); +proto.ml_metadata.GetContextsByIDResponse.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetContextTypesByIDResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetContextsByIDResponse} returns this */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.setContextTypesList = function(value) { +proto.ml_metadata.GetContextsByIDResponse.prototype.setContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.ContextType=} opt_value + * @param {!proto.ml_metadata.Context=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.ContextType} + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.addContextTypes = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.ContextType, opt_index); +proto.ml_metadata.GetContextsByIDResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextTypesByIDResponse} returns this + * @return {!proto.ml_metadata.GetContextsByIDResponse} returns this */ -proto.ml_metadata.GetContextTypesByIDResponse.prototype.clearContextTypesList = function() { - return this.setContextTypesList([]); +proto.ml_metadata.GetContextsByIDResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); }; @@ -15714,8 +22164,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByArtifactRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByArtifactRequest.toObject(opt_includeInstance, this); }; @@ -15724,13 +22174,13 @@ proto.ml_metadata.GetContextsRequest.prototype.toObject = function(opt_includeIn * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByArtifactRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByArtifactRequest.toObject = function(includeInstance, msg) { var f, obj = { - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + artifactId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -15745,23 +22195,23 @@ proto.ml_metadata.GetContextsRequest.toObject = function(includeInstance, msg) { /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsRequest} + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} */ -proto.ml_metadata.GetContextsRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsRequest; - return proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByArtifactRequest; + return proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByArtifactRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsRequest} + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} */ -proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15769,9 +22219,8 @@ proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader = function(msg, var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); - msg.setOptions(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setArtifactId(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -15791,9 +22240,9 @@ proto.ml_metadata.GetContextsRequest.deserializeBinaryFromReader = function(msg, * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByArtifactRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -15801,18 +22250,17 @@ proto.ml_metadata.GetContextsRequest.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsRequest} message + * @param {!proto.ml_metadata.GetContextsByArtifactRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByArtifactRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getOptions(); + f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeMessage( + writer.writeInt64( 1, - f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + f ); } f = message.getTransactionOptions(); @@ -15827,30 +22275,29 @@ proto.ml_metadata.GetContextsRequest.serializeBinaryToWriter = function(message, /** - * optional ListOperationOptions options = 1; - * @return {?proto.ml_metadata.ListOperationOptions} + * optional int64 artifact_id = 1; + * @return {number} */ -proto.ml_metadata.GetContextsRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 1)); +proto.ml_metadata.GetContextsByArtifactRequest.prototype.getArtifactId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsRequest} returns this -*/ -proto.ml_metadata.GetContextsRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 1, value); + * @param {number} value + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this + */ +proto.ml_metadata.GetContextsByArtifactRequest.prototype.setArtifactId = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this */ -proto.ml_metadata.GetContextsRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetContextsByArtifactRequest.prototype.clearArtifactId = function() { + return jspb.Message.setField(this, 1, undefined); }; @@ -15858,7 +22305,7 @@ proto.ml_metadata.GetContextsRequest.prototype.clearOptions = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsRequest.prototype.hasOptions = function() { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.hasArtifactId = function() { return jspb.Message.getField(this, 1) != null; }; @@ -15867,7 +22314,7 @@ proto.ml_metadata.GetContextsRequest.prototype.hasOptions = function() { * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextsRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -15875,18 +22322,18 @@ proto.ml_metadata.GetContextsRequest.prototype.getTransactionOptions = function( /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsRequest} returns this + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this */ -proto.ml_metadata.GetContextsRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsRequest} returns this + * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this */ -proto.ml_metadata.GetContextsRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -15895,7 +22342,7 @@ proto.ml_metadata.GetContextsRequest.prototype.clearTransactionOptions = functio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetContextsByArtifactRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -15906,7 +22353,7 @@ proto.ml_metadata.GetContextsRequest.prototype.hasTransactionOptions = function( * @private {!Array} * @const */ -proto.ml_metadata.GetContextsResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsByArtifactResponse.repeatedFields_ = [1]; @@ -15923,8 +22370,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByArtifactResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByArtifactResponse.toObject(opt_includeInstance, this); }; @@ -15933,15 +22380,14 @@ proto.ml_metadata.GetContextsResponse.prototype.toObject = function(opt_includeI * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByArtifactResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByArtifactResponse.toObject = function(includeInstance, msg) { var f, obj = { contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -15955,23 +22401,23 @@ proto.ml_metadata.GetContextsResponse.toObject = function(includeInstance, msg) /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsResponse} + * @return {!proto.ml_metadata.GetContextsByArtifactResponse} */ -proto.ml_metadata.GetContextsResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsResponse; - return proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByArtifactResponse; + return proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByArtifactResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsResponse} + * @return {!proto.ml_metadata.GetContextsByArtifactResponse} */ -proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -15983,10 +22429,6 @@ proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader = function(msg reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); msg.addContexts(value); break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); - break; default: reader.skipField(); break; @@ -16000,9 +22442,9 @@ proto.ml_metadata.GetContextsResponse.deserializeBinaryFromReader = function(msg * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByArtifactResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByArtifactResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -16010,11 +22452,11 @@ proto.ml_metadata.GetContextsResponse.prototype.serializeBinary = function() { /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsResponse} message + * @param {!proto.ml_metadata.GetContextsByArtifactResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByArtifactResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = message.getContextsList(); if (f.length > 0) { @@ -16024,13 +22466,6 @@ proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter = function(message ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } }; @@ -16038,7 +22473,7 @@ proto.ml_metadata.GetContextsResponse.serializeBinaryToWriter = function(message * repeated Context contexts = 1; * @return {!Array} */ -proto.ml_metadata.GetContextsResponse.prototype.getContextsList = function() { +proto.ml_metadata.GetContextsByArtifactResponse.prototype.getContextsList = function() { return /** @type{!Array} */ ( jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; @@ -16046,9 +22481,9 @@ proto.ml_metadata.GetContextsResponse.prototype.getContextsList = function() { /** * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsResponse} returns this + * @return {!proto.ml_metadata.GetContextsByArtifactResponse} returns this */ -proto.ml_metadata.GetContextsResponse.prototype.setContextsList = function(value) { +proto.ml_metadata.GetContextsByArtifactResponse.prototype.setContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; @@ -16058,56 +22493,20 @@ proto.ml_metadata.GetContextsResponse.prototype.setContextsList = function(value * @param {number=} opt_index * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextsResponse.prototype.addContexts = function(opt_value, opt_index) { +proto.ml_metadata.GetContextsByArtifactResponse.prototype.addContexts = function(opt_value, opt_index) { return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsResponse} returns this + * @return {!proto.ml_metadata.GetContextsByArtifactResponse} returns this */ -proto.ml_metadata.GetContextsResponse.prototype.clearContextsList = function() { +proto.ml_metadata.GetContextsByArtifactResponse.prototype.clearContextsList = function() { return this.setContextsList([]); }; -/** - * optional string next_page_token = 2; - * @return {string} - */ -proto.ml_metadata.GetContextsResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetContextsResponse} returns this - */ -proto.ml_metadata.GetContextsResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsResponse} returns this - */ -proto.ml_metadata.GetContextsResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetContextsResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; -}; - - @@ -16124,8 +22523,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByTypeRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByExecutionRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByExecutionRequest.toObject(opt_includeInstance, this); }; @@ -16134,15 +22533,13 @@ proto.ml_metadata.GetContextsByTypeRequest.prototype.toObject = function(opt_inc * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByTypeRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByExecutionRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByTypeRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByExecutionRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), - typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, + executionId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -16157,23 +22554,23 @@ proto.ml_metadata.GetContextsByTypeRequest.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} */ -proto.ml_metadata.GetContextsByTypeRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByTypeRequest; - return proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByExecutionRequest; + return proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByTypeRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByExecutionRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} */ -proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -16181,19 +22578,10 @@ proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader = functio var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setExecutionId(value); break; case 2: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); - msg.setOptions(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); - break; - case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -16211,9 +22599,9 @@ proto.ml_metadata.GetContextsByTypeRequest.deserializeBinaryFromReader = functio * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByTypeRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByExecutionRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -16221,38 +22609,23 @@ proto.ml_metadata.GetContextsByTypeRequest.prototype.serializeBinary = function( /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByTypeRequest} message + * @param {!proto.ml_metadata.GetContextsByExecutionRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByTypeRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByExecutionRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); + f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeString( + writer.writeInt64( 1, f ); } - f = message.getOptions(); - if (f != null) { - writer.writeMessage( - 2, - f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 4, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -16261,28 +22634,28 @@ proto.ml_metadata.GetContextsByTypeRequest.serializeBinaryToWriter = function(me /** - * optional string type_name = 1; - * @return {string} + * optional int64 execution_id = 1; + * @return {number} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.GetContextsByExecutionRequest.prototype.getExecutionId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + * @param {number} value + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.setTypeName = function(value) { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.setExecutionId = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTypeName = function() { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.clearExecutionId = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -16291,108 +22664,35 @@ proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTypeName = function() * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTypeName = function() { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.hasExecutionId = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional ListOperationOptions options = 2; - * @return {?proto.ml_metadata.ListOperationOptions} - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 2)); -}; - - -/** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this -*/ -proto.ml_metadata.GetContextsByTypeRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 2) != null; -}; - - -/** - * optional string type_version = 3; - * @return {string} - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 3, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 3, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 3) != null; -}; - - -/** - * optional TransactionOptions transaction_options = 4; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetContextsByExecutionRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsByTypeRequest} returns this + * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetContextsByExecutionRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -16401,8 +22701,8 @@ proto.ml_metadata.GetContextsByTypeRequest.prototype.clearTransactionOptions = f * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetContextsByExecutionRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -16412,7 +22712,7 @@ proto.ml_metadata.GetContextsByTypeRequest.prototype.hasTransactionOptions = fun * @private {!Array} * @const */ -proto.ml_metadata.GetContextsByTypeResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetContextsByExecutionResponse.repeatedFields_ = [1]; @@ -16429,8 +22729,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByTypeResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetContextsByExecutionResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetContextsByExecutionResponse.toObject(opt_includeInstance, this); }; @@ -16439,15 +22739,14 @@ proto.ml_metadata.GetContextsByTypeResponse.prototype.toObject = function(opt_in * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByTypeResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetContextsByExecutionResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByTypeResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetContextsByExecutionResponse.toObject = function(includeInstance, msg) { var f, obj = { contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -16461,23 +22760,23 @@ proto.ml_metadata.GetContextsByTypeResponse.toObject = function(includeInstance, /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByTypeResponse} + * @return {!proto.ml_metadata.GetContextsByExecutionResponse} */ -proto.ml_metadata.GetContextsByTypeResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByTypeResponse; - return proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetContextsByExecutionResponse; + return proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByTypeResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetContextsByExecutionResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByTypeResponse} + * @return {!proto.ml_metadata.GetContextsByExecutionResponse} */ -proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -16489,10 +22788,6 @@ proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader = functi reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); msg.addContexts(value); break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); - break; default: reader.skipField(); break; @@ -16506,9 +22801,9 @@ proto.ml_metadata.GetContextsByTypeResponse.deserializeBinaryFromReader = functi * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetContextsByExecutionResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetContextsByExecutionResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -16516,11 +22811,11 @@ proto.ml_metadata.GetContextsByTypeResponse.prototype.serializeBinary = function /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByTypeResponse} message + * @param {!proto.ml_metadata.GetContextsByExecutionResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetContextsByExecutionResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = message.getContextsList(); if (f.length > 0) { @@ -16530,13 +22825,6 @@ proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter = function(m ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } }; @@ -16544,7 +22832,7 @@ proto.ml_metadata.GetContextsByTypeResponse.serializeBinaryToWriter = function(m * repeated Context contexts = 1; * @return {!Array} */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.getContextsList = function() { +proto.ml_metadata.GetContextsByExecutionResponse.prototype.getContextsList = function() { return /** @type{!Array} */ ( jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; @@ -16552,65 +22840,29 @@ proto.ml_metadata.GetContextsByTypeResponse.prototype.getContextsList = function /** * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this + * @return {!proto.ml_metadata.GetContextsByExecutionResponse} returns this */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.setContextsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); -}; - - -/** - * @param {!proto.ml_metadata.Context=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} - */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this - */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.clearContextsList = function() { - return this.setContextsList([]); -}; - - -/** - * optional string next_page_token = 2; - * @return {string} - */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this - */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); +proto.ml_metadata.GetContextsByExecutionResponse.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsByTypeResponse} returns this + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); +proto.ml_metadata.GetContextsByExecutionResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetContextsByExecutionResponse} returns this */ -proto.ml_metadata.GetContextsByTypeResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetContextsByExecutionResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); }; @@ -16630,8 +22882,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextByTypeAndNameRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetParentContextsByContextRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetParentContextsByContextRequest.toObject(opt_includeInstance, this); }; @@ -16640,15 +22892,13 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.toObject = function(o * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetParentContextsByContextRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextByTypeAndNameRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetParentContextsByContextRequest.toObject = function(includeInstance, msg) { var f, obj = { - typeName: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - typeVersion: (f = jspb.Message.getField(msg, 3)) == null ? undefined : f, - contextName: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -16663,23 +22913,23 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.toObject = function(includeInst /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextByTypeAndNameRequest; - return proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetParentContextsByContextRequest; + return proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetParentContextsByContextRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -16687,18 +22937,10 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader = f var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeName(value); - break; - case 3: - var value = /** @type {string} */ (reader.readString()); - msg.setTypeVersion(value); + var value = /** @type {number} */ (reader.readInt64()); + msg.setContextId(value); break; case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setContextName(value); - break; - case 4: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -16716,9 +22958,9 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.deserializeBinaryFromReader = f * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextByTypeAndNameRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetParentContextsByContextRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -16726,37 +22968,23 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.serializeBinary = fun /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextByTypeAndNameRequest} message + * @param {!proto.ml_metadata.GetParentContextsByContextRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextByTypeAndNameRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetParentContextsByContextRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {string} */ (jspb.Message.getField(message, 1)); + f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { - writer.writeString( + writer.writeInt64( 1, f ); } - f = /** @type {string} */ (jspb.Message.getField(message, 3)); - if (f != null) { - writer.writeString( - 3, - f - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 4, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -16765,28 +22993,28 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.serializeBinaryToWriter = funct /** - * optional string type_name = 1; - * @return {string} + * optional int64 context_id = 1; + * @return {number} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTypeName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 1, "")); +proto.ml_metadata.GetParentContextsByContextRequest.prototype.getContextId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * @param {string} value - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + * @param {number} value + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTypeName = function(value) { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.setContextId = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTypeName = function() { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.clearContextId = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -16795,107 +23023,35 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTypeName = funct * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTypeName = function() { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.hasContextId = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional string type_version = 3; - * @return {string} - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTypeVersion = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 3, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTypeVersion = function(value) { - return jspb.Message.setField(this, 3, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTypeVersion = function() { - return jspb.Message.setField(this, 3, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTypeVersion = function() { - return jspb.Message.getField(this, 3) != null; -}; - - -/** - * optional string context_name = 2; - * @return {string} - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getContextName = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setContextName = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearContextName = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasContextName = function() { - return jspb.Message.getField(this, 2) != null; -}; - - -/** - * optional TransactionOptions transaction_options = 4; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 4)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 4, value); +proto.ml_metadata.GetParentContextsByContextRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextByTypeAndNameRequest} returns this + * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetParentContextsByContextRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -16904,12 +23060,19 @@ proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.clearTransactionOptio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextByTypeAndNameRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 4) != null; +proto.ml_metadata.GetParentContextsByContextRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetParentContextsByContextResponse.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -16925,8 +23088,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextByTypeAndNameResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetParentContextsByContextResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetParentContextsByContextResponse.toObject(opt_includeInstance, this); }; @@ -16935,13 +23098,14 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.toObject = function( * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetParentContextsByContextResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextByTypeAndNameResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetParentContextsByContextResponse.toObject = function(includeInstance, msg) { var f, obj = { - context: (f = msg.getContext()) && ml_metadata_proto_metadata_store_pb.Context.toObject(includeInstance, f) + contextsList: jspb.Message.toObjectList(msg.getContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -16955,23 +23119,23 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.toObject = function(includeIns /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetParentContextsByContextResponse} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextByTypeAndNameResponse; - return proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetParentContextsByContextResponse; + return proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetParentContextsByContextResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} + * @return {!proto.ml_metadata.GetParentContextsByContextResponse} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -16981,7 +23145,7 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader = case 1: var value = new ml_metadata_proto_metadata_store_pb.Context; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.setContext(value); + msg.addContexts(value); break; default: reader.skipField(); @@ -16996,9 +23160,9 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.deserializeBinaryFromReader = * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetParentContextsByContextResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextByTypeAndNameResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetParentContextsByContextResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17006,15 +23170,15 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.serializeBinary = fu /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextByTypeAndNameResponse} message + * @param {!proto.ml_metadata.GetParentContextsByContextResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextByTypeAndNameResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetParentContextsByContextResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContext(); - if (f != null) { - writer.writeMessage( + f = message.getContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, f, ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter @@ -17024,50 +23188,44 @@ proto.ml_metadata.GetContextByTypeAndNameResponse.serializeBinaryToWriter = func /** - * optional Context context = 1; - * @return {?proto.ml_metadata.Context} + * repeated Context contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.getContext = function() { - return /** @type{?proto.ml_metadata.Context} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); +proto.ml_metadata.GetParentContextsByContextResponse.prototype.getContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * @param {?proto.ml_metadata.Context|undefined} value - * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetParentContextsByContextResponse} returns this */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.setContext = function(value) { - return jspb.Message.setWrapperField(this, 1, value); +proto.ml_metadata.GetParentContextsByContextResponse.prototype.setContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextByTypeAndNameResponse} returns this + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.clearContext = function() { - return this.setContext(undefined); +proto.ml_metadata.GetParentContextsByContextResponse.prototype.addContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetParentContextsByContextResponse} returns this */ -proto.ml_metadata.GetContextByTypeAndNameResponse.prototype.hasContext = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetParentContextsByContextResponse.prototype.clearContextsList = function() { + return this.setContextsList([]); }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetContextsByIDRequest.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -17083,8 +23241,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByIDRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetChildrenContextsByContextRequest.toObject(opt_includeInstance, this); }; @@ -17093,13 +23251,13 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByIDRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByIDRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetChildrenContextsByContextRequest.toObject = function(includeInstance, msg) { var f, obj = { - contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -17114,23 +23272,23 @@ proto.ml_metadata.GetContextsByIDRequest.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByIDRequest} + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} */ -proto.ml_metadata.GetContextsByIDRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByIDRequest; - return proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetChildrenContextsByContextRequest; + return proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByIDRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByIDRequest} + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} */ -proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -17138,10 +23296,8 @@ proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); - for (var i = 0; i < values.length; i++) { - msg.addContextIds(values[i]); - } + var value = /** @type {number} */ (reader.readInt64()); + msg.setContextId(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -17161,9 +23317,9 @@ proto.ml_metadata.GetContextsByIDRequest.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByIDRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17171,15 +23327,15 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByIDRequest} message + * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByIDRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextIdsList(); - if (f.length > 0) { - writer.writeRepeatedInt64( + f = /** @type {number} */ (jspb.Message.getField(message, 1)); + if (f != null) { + writer.writeInt64( 1, f ); @@ -17196,39 +23352,38 @@ proto.ml_metadata.GetContextsByIDRequest.serializeBinaryToWriter = function(mess /** - * repeated int64 context_ids = 1; - * @return {!Array} + * optional int64 context_id = 1; + * @return {number} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.getContextIdsList = function() { - return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.getContextId = function() { + return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this + * @param {number} value + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextsByIDRequest.prototype.setContextIdsList = function(value) { - return jspb.Message.setField(this, 1, value || []); +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.setContextId = function(value) { + return jspb.Message.setField(this, 1, value); }; /** - * @param {number} value - * @param {number=} opt_index - * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextsByIDRequest.prototype.addContextIds = function(value, opt_index) { - return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearContextId = function() { + return jspb.Message.setField(this, 1, undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.clearContextIdsList = function() { - return this.setContextIdsList([]); +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.hasContextId = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -17236,7 +23391,7 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.clearContextIdsList = functio * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -17244,18 +23399,18 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.getTransactionOptions = funct /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextsByIDRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsByIDRequest} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this */ -proto.ml_metadata.GetContextsByIDRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -17264,7 +23419,7 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.clearTransactionOptions = fun * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsByIDRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -17275,7 +23430,7 @@ proto.ml_metadata.GetContextsByIDRequest.prototype.hasTransactionOptions = funct * @private {!Array} * @const */ -proto.ml_metadata.GetContextsByIDResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetChildrenContextsByContextResponse.repeatedFields_ = [1]; @@ -17292,8 +23447,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByIDResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByIDResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetChildrenContextsByContextResponse.toObject(opt_includeInstance, this); }; @@ -17302,11 +23457,11 @@ proto.ml_metadata.GetContextsByIDResponse.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByIDResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByIDResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetChildrenContextsByContextResponse.toObject = function(includeInstance, msg) { var f, obj = { contextsList: jspb.Message.toObjectList(msg.getContextsList(), ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) @@ -17323,23 +23478,23 @@ proto.ml_metadata.GetContextsByIDResponse.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByIDResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} */ -proto.ml_metadata.GetContextsByIDResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByIDResponse; - return proto.ml_metadata.GetContextsByIDResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetChildrenContextsByContextResponse; + return proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByIDResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByIDResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} */ -proto.ml_metadata.GetContextsByIDResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -17364,9 +23519,9 @@ proto.ml_metadata.GetContextsByIDResponse.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByIDResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByIDResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetChildrenContextsByContextResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17374,11 +23529,11 @@ proto.ml_metadata.GetContextsByIDResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByIDResponse} message + * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByIDResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetChildrenContextsByContextResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = message.getContextsList(); if (f.length > 0) { @@ -17395,7 +23550,7 @@ proto.ml_metadata.GetContextsByIDResponse.serializeBinaryToWriter = function(mes * repeated Context contexts = 1; * @return {!Array} */ -proto.ml_metadata.GetContextsByIDResponse.prototype.getContextsList = function() { +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.getContextsList = function() { return /** @type{!Array} */ ( jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; @@ -17403,9 +23558,9 @@ proto.ml_metadata.GetContextsByIDResponse.prototype.getContextsList = function() /** * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsByIDResponse} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} returns this */ -proto.ml_metadata.GetContextsByIDResponse.prototype.setContextsList = function(value) { +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.setContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; @@ -17415,21 +23570,28 @@ proto.ml_metadata.GetContextsByIDResponse.prototype.setContextsList = function(v * @param {number=} opt_index * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextsByIDResponse.prototype.addContexts = function(opt_value, opt_index) { +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.addContexts = function(opt_value, opt_index) { return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsByIDResponse} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} returns this */ -proto.ml_metadata.GetContextsByIDResponse.prototype.clearContextsList = function() { +proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.clearContextsList = function() { return this.setContextsList([]); }; +/** + * List of repeated fields within this message type. + * @private {!Array} + * @const + */ +proto.ml_metadata.GetParentContextsByContextsRequest.repeatedFields_ = [1]; + if (jspb.Message.GENERATE_TO_OBJECT) { @@ -17445,8 +23607,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByArtifactRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetParentContextsByContextsRequest.toObject(opt_includeInstance, this); }; @@ -17455,13 +23617,13 @@ proto.ml_metadata.GetContextsByArtifactRequest.prototype.toObject = function(opt * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByArtifactRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByArtifactRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetParentContextsByContextsRequest.toObject = function(includeInstance, msg) { var f, obj = { - artifactId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -17476,23 +23638,23 @@ proto.ml_metadata.GetContextsByArtifactRequest.toObject = function(includeInstan /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} */ -proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetParentContextsByContextsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByArtifactRequest; - return proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetParentContextsByContextsRequest; + return proto.ml_metadata.GetParentContextsByContextsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByArtifactRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} */ -proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetParentContextsByContextsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -17500,8 +23662,10 @@ proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader = fun var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setArtifactId(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextIds(values[i]); + } break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -17521,9 +23685,9 @@ proto.ml_metadata.GetContextsByArtifactRequest.deserializeBinaryFromReader = fun * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByArtifactRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetParentContextsByContextsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17531,15 +23695,15 @@ proto.ml_metadata.GetContextsByArtifactRequest.prototype.serializeBinary = funct /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByArtifactRequest} message + * @param {!proto.ml_metadata.GetParentContextsByContextsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByArtifactRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetParentContextsByContextsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( + f = message.getContextIdsList(); + if (f.length > 0) { + writer.writePackedInt64( 1, f ); @@ -17556,38 +23720,39 @@ proto.ml_metadata.GetContextsByArtifactRequest.serializeBinaryToWriter = functio /** - * optional int64 artifact_id = 1; - * @return {number} + * repeated int64 context_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.getArtifactId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.getContextIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {number} value - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.setArtifactId = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.setContextIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this + * @param {number} value + * @param {number=} opt_index + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.clearArtifactId = function() { - return jspb.Message.setField(this, 1, undefined); +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.addContextIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.hasArtifactId = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); }; @@ -17595,7 +23760,7 @@ proto.ml_metadata.GetContextsByArtifactRequest.prototype.hasArtifactId = functio * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -17603,18 +23768,18 @@ proto.ml_metadata.GetContextsByArtifactRequest.prototype.getTransactionOptions = /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsByArtifactRequest} returns this + * @return {!proto.ml_metadata.GetParentContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -17623,19 +23788,12 @@ proto.ml_metadata.GetContextsByArtifactRequest.prototype.clearTransactionOptions * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetContextsByArtifactRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetParentContextsByContextsRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetContextsByArtifactResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -17651,8 +23809,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByArtifactResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetParentContextsByContextsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetParentContextsByContextsResponse.toObject(opt_includeInstance, this); }; @@ -17661,14 +23819,13 @@ proto.ml_metadata.GetContextsByArtifactResponse.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByArtifactResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByArtifactResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetParentContextsByContextsResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) + contextsMap: (f = msg.getContextsMap()) ? f.toObject(includeInstance, proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.toObject) : [] }; if (includeInstance) { @@ -17682,33 +23839,34 @@ proto.ml_metadata.GetContextsByArtifactResponse.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByArtifactResponse} + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse} */ -proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetParentContextsByContextsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByArtifactResponse; - return proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetParentContextsByContextsResponse; + return proto.ml_metadata.GetParentContextsByContextsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByArtifactResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByArtifactResponse} + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse} */ -proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetParentContextsByContextsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; - } - var field = reader.getFieldNumber(); - switch (field) { - case 1: - var value = new ml_metadata_proto_metadata_store_pb.Context; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); + } + var field = reader.getFieldNumber(); + switch (field) { + case 2: + var value = msg.getContextsMap(); + reader.readMessage(value, function(message, reader) { + jspb.Map.deserializeBinary(message, reader, jspb.BinaryReader.prototype.readInt64, jspb.BinaryReader.prototype.readMessage, proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.deserializeBinaryFromReader, 0, new proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild()); + }); break; default: reader.skipField(); @@ -17723,9 +23881,9 @@ proto.ml_metadata.GetContextsByArtifactResponse.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetParentContextsByContextsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByArtifactResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetParentContextsByContextsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17733,61 +23891,26 @@ proto.ml_metadata.GetContextsByArtifactResponse.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByArtifactResponse} message + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByArtifactResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetParentContextsByContextsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter - ); + f = message.getContextsMap(true); + if (f && f.getLength() > 0) { + f.serializeBinary(2, writer, jspb.BinaryWriter.prototype.writeInt64, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.serializeBinaryToWriter); } }; -/** - * repeated Context contexts = 1; - * @return {!Array} - */ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.getContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsByArtifactResponse} returns this -*/ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.setContextsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); -}; - - -/** - * @param {!proto.ml_metadata.Context=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} - */ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); -}; - /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsByArtifactResponse} returns this + * List of repeated fields within this message type. + * @private {!Array} + * @const */ -proto.ml_metadata.GetContextsByArtifactResponse.prototype.clearContextsList = function() { - return this.setContextsList([]); -}; - - +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.repeatedFields_ = [1]; @@ -17804,8 +23927,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByExecutionRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.toObject(opt_includeInstance, this); }; @@ -17814,14 +23937,14 @@ proto.ml_metadata.GetContextsByExecutionRequest.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByExecutionRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByExecutionRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.toObject = function(includeInstance, msg) { var f, obj = { - executionId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + parentContextsList: jspb.Message.toObjectList(msg.getParentContextsList(), + ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; if (includeInstance) { @@ -17835,23 +23958,23 @@ proto.ml_metadata.GetContextsByExecutionRequest.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} */ -proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByExecutionRequest; - return proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild; + return proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByExecutionRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} */ -proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -17859,13 +23982,9 @@ proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader = fu var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setExecutionId(value); - break; - case 2: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); + var value = new ml_metadata_proto_metadata_store_pb.Context; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); + msg.addParentContexts(value); break; default: reader.skipField(); @@ -17880,9 +23999,9 @@ proto.ml_metadata.GetContextsByExecutionRequest.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByExecutionRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -17890,101 +24009,81 @@ proto.ml_metadata.GetContextsByExecutionRequest.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByExecutionRequest} message + * @param {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByExecutionRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( + f = message.getParentContextsList(); + if (f.length > 0) { + writer.writeRepeatedMessage( 1, - f - ); - } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 2, f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter ); } }; /** - * optional int64 execution_id = 1; - * @return {number} - */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.getExecutionId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this + * repeated Context parent_contexts = 1; + * @return {!Array} */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.setExecutionId = function(value) { - return jspb.Message.setField(this, 1, value); +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.getParentContextsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; /** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this - */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.clearExecutionId = function() { - return jspb.Message.setField(this, 1, undefined); + * @param {!Array} value + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} returns this +*/ +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.setParentContextsList = function(value) { + return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * Returns whether this field is set. - * @return {boolean} + * @param {!proto.ml_metadata.Context=} opt_value + * @param {number=} opt_index + * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.hasExecutionId = function() { - return jspb.Message.getField(this, 1) != null; +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.addParentContexts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** - * optional TransactionOptions transaction_options = 2; - * @return {?proto.ml_metadata.TransactionOptions} + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild} returns this */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); -}; - - -/** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this -*/ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.prototype.clearParentContextsList = function() { + return this.setParentContextsList([]); }; /** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetContextsByExecutionRequest} returns this + * map contexts = 2; + * @param {boolean=} opt_noLazyCreate Do not create the map if + * empty, instead returning `undefined` + * @return {!jspb.Map} */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); +proto.ml_metadata.GetParentContextsByContextsResponse.prototype.getContextsMap = function(opt_noLazyCreate) { + return /** @type {!jspb.Map} */ ( + jspb.Message.getMapField(this, 2, opt_noLazyCreate, + proto.ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild)); }; /** - * Returns whether this field is set. - * @return {boolean} + * Clears values from the map. The map will be non-null. + * @return {!proto.ml_metadata.GetParentContextsByContextsResponse} returns this */ -proto.ml_metadata.GetContextsByExecutionRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; -}; +proto.ml_metadata.GetParentContextsByContextsResponse.prototype.clearContextsMap = function() { + this.getContextsMap().clear(); + return this;}; @@ -17993,7 +24092,7 @@ proto.ml_metadata.GetContextsByExecutionRequest.prototype.hasTransactionOptions * @private {!Array} * @const */ -proto.ml_metadata.GetContextsByExecutionResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetChildrenContextsByContextsRequest.repeatedFields_ = [1]; @@ -18010,8 +24109,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetContextsByExecutionResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetChildrenContextsByContextsRequest.toObject(opt_includeInstance, this); }; @@ -18020,14 +24119,14 @@ proto.ml_metadata.GetContextsByExecutionResponse.prototype.toObject = function(o * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetContextsByExecutionResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByExecutionResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetChildrenContextsByContextsRequest.toObject = function(includeInstance, msg) { var f, obj = { - contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) + contextIdsList: (f = jspb.Message.getRepeatedField(msg, 1)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -18041,23 +24140,23 @@ proto.ml_metadata.GetContextsByExecutionResponse.toObject = function(includeInst /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetContextsByExecutionResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} */ -proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetChildrenContextsByContextsRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetContextsByExecutionResponse; - return proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetChildrenContextsByContextsRequest; + return proto.ml_metadata.GetChildrenContextsByContextsRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetContextsByExecutionResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetContextsByExecutionResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} */ -proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetChildrenContextsByContextsRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -18065,9 +24164,15 @@ proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader = f var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Context; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); + var values = /** @type {!Array} */ (reader.isDelimited() ? reader.readPackedInt64() : [reader.readInt64()]); + for (var i = 0; i < values.length; i++) { + msg.addContextIds(values[i]); + } + break; + case 2: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); break; default: reader.skipField(); @@ -18082,9 +24187,9 @@ proto.ml_metadata.GetContextsByExecutionResponse.deserializeBinaryFromReader = f * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetContextsByExecutionResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetChildrenContextsByContextsRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18092,58 +24197,101 @@ proto.ml_metadata.GetContextsByExecutionResponse.prototype.serializeBinary = fun /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetContextsByExecutionResponse} message + * @param {!proto.ml_metadata.GetChildrenContextsByContextsRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetContextsByExecutionResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetChildrenContextsByContextsRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextsList(); + f = message.getContextIdsList(); if (f.length > 0) { - writer.writeRepeatedMessage( + writer.writePackedInt64( 1, + f + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 2, f, - ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } }; /** - * repeated Context contexts = 1; - * @return {!Array} + * repeated int64 context_ids = 1; + * @return {!Array} */ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.getContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.getContextIdsList = function() { + return /** @type {!Array} */ (jspb.Message.getRepeatedField(this, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetContextsByExecutionResponse} returns this -*/ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.setContextsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); + * @param {!Array} value + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} returns this + */ +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.setContextIdsList = function(value) { + return jspb.Message.setField(this, 1, value || []); }; /** - * @param {!proto.ml_metadata.Context=} opt_value + * @param {number} value * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} returns this */ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.addContextIds = function(value, opt_index) { + return jspb.Message.addToRepeatedField(this, 1, value, opt_index); +}; + + +/** + * Clears the list making it empty but non-null. + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} returns this + */ +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.clearContextIdsList = function() { + return this.setContextIdsList([]); +}; + + +/** + * optional TransactionOptions transaction_options = 2; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} returns this +*/ +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetChildrenContextsByContextsRequest} returns this + */ +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); }; /** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetContextsByExecutionResponse} returns this + * Returns whether this field is set. + * @return {boolean} */ -proto.ml_metadata.GetContextsByExecutionResponse.prototype.clearContextsList = function() { - return this.setContextsList([]); +proto.ml_metadata.GetChildrenContextsByContextsRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -18163,8 +24311,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetParentContextsByContextRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetChildrenContextsByContextsResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetChildrenContextsByContextsResponse.toObject(opt_includeInstance, this); }; @@ -18173,14 +24321,13 @@ proto.ml_metadata.GetParentContextsByContextRequest.prototype.toObject = functio * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetParentContextsByContextRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetParentContextsByContextRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + contextsMap: (f = msg.getContextsMap()) ? f.toObject(includeInstance, proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.toObject) : [] }; if (includeInstance) { @@ -18194,37 +24341,34 @@ proto.ml_metadata.GetParentContextsByContextRequest.toObject = function(includeI /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse} */ -proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetParentContextsByContextRequest; - return proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetChildrenContextsByContextsResponse; + return proto.ml_metadata.GetChildrenContextsByContextsResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetParentContextsByContextRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse} */ -proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; } var field = reader.getFieldNumber(); switch (field) { - case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setContextId(value); - break; case 2: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); + var value = msg.getContextsMap(); + reader.readMessage(value, function(message, reader) { + jspb.Map.deserializeBinary(message, reader, jspb.BinaryReader.prototype.readInt64, jspb.BinaryReader.prototype.readMessage, proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.deserializeBinaryFromReader, 0, new proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent()); + }); break; default: reader.skipField(); @@ -18239,9 +24383,9 @@ proto.ml_metadata.GetParentContextsByContextRequest.deserializeBinaryFromReader * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetChildrenContextsByContextsResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetParentContextsByContextRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetChildrenContextsByContextsResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18249,110 +24393,26 @@ proto.ml_metadata.GetParentContextsByContextRequest.prototype.serializeBinary = /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetParentContextsByContextRequest} message + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetParentContextsByContextRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } - f = message.getTransactionOptions(); - if (f != null) { - writer.writeMessage( - 2, - f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter - ); + f = message.getContextsMap(true); + if (f && f.getLength() > 0) { + f.serializeBinary(2, writer, jspb.BinaryWriter.prototype.writeInt64, jspb.BinaryWriter.prototype.writeMessage, proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.serializeBinaryToWriter); } }; -/** - * optional int64 context_id = 1; - * @return {number} - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.getContextId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.setContextId = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.clearContextId = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.hasContextId = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional TransactionOptions transaction_options = 2; - * @return {?proto.ml_metadata.TransactionOptions} - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); -}; - - -/** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this -*/ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); -}; - - -/** - * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetParentContextsByContextRequest} returns this - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetParentContextsByContextRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; -}; - - /** * List of repeated fields within this message type. * @private {!Array} * @const */ -proto.ml_metadata.GetParentContextsByContextResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.repeatedFields_ = [1]; @@ -18369,8 +24429,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetParentContextsByContextResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.toObject(opt_includeInstance, this); }; @@ -18379,13 +24439,13 @@ proto.ml_metadata.GetParentContextsByContextResponse.prototype.toObject = functi * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetParentContextsByContextResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetParentContextsByContextResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.toObject = function(includeInstance, msg) { var f, obj = { - contextsList: jspb.Message.toObjectList(msg.getContextsList(), + childrenContextsList: jspb.Message.toObjectList(msg.getChildrenContextsList(), ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) }; @@ -18400,23 +24460,23 @@ proto.ml_metadata.GetParentContextsByContextResponse.toObject = function(include /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetParentContextsByContextResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} */ -proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetParentContextsByContextResponse; - return proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent; + return proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetParentContextsByContextResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetParentContextsByContextResponse} + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} */ -proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -18426,7 +24486,7 @@ proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader case 1: var value = new ml_metadata_proto_metadata_store_pb.Context; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); + msg.addChildrenContexts(value); break; default: reader.skipField(); @@ -18441,9 +24501,9 @@ proto.ml_metadata.GetParentContextsByContextResponse.deserializeBinaryFromReader * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetParentContextsByContextResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18451,13 +24511,13 @@ proto.ml_metadata.GetParentContextsByContextResponse.prototype.serializeBinary = /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetParentContextsByContextResponse} message + * @param {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetParentContextsByContextResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextsList(); + f = message.getChildrenContextsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, @@ -18469,10 +24529,10 @@ proto.ml_metadata.GetParentContextsByContextResponse.serializeBinaryToWriter = f /** - * repeated Context contexts = 1; + * repeated Context children_contexts = 1; * @return {!Array} */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.getContextsList = function() { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.getChildrenContextsList = function() { return /** @type{!Array} */ ( jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); }; @@ -18480,9 +24540,9 @@ proto.ml_metadata.GetParentContextsByContextResponse.prototype.getContextsList = /** * @param {!Array} value - * @return {!proto.ml_metadata.GetParentContextsByContextResponse} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} returns this */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.setContextsList = function(value) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.setChildrenContextsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; @@ -18492,20 +24552,42 @@ proto.ml_metadata.GetParentContextsByContextResponse.prototype.setContextsList = * @param {number=} opt_index * @return {!proto.ml_metadata.Context} */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.addContexts = function(opt_value, opt_index) { +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.addChildrenContexts = function(opt_value, opt_index) { return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetParentContextsByContextResponse} returns this + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent} returns this */ -proto.ml_metadata.GetParentContextsByContextResponse.prototype.clearContextsList = function() { - return this.setContextsList([]); +proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.prototype.clearChildrenContextsList = function() { + return this.setChildrenContextsList([]); +}; + + +/** + * map contexts = 2; + * @param {boolean=} opt_noLazyCreate Do not create the map if + * empty, instead returning `undefined` + * @return {!jspb.Map} + */ +proto.ml_metadata.GetChildrenContextsByContextsResponse.prototype.getContextsMap = function(opt_noLazyCreate) { + return /** @type {!jspb.Map} */ ( + jspb.Message.getMapField(this, 2, opt_noLazyCreate, + proto.ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent)); }; +/** + * Clears values from the map. The map will be non-null. + * @return {!proto.ml_metadata.GetChildrenContextsByContextsResponse} returns this + */ +proto.ml_metadata.GetChildrenContextsByContextsResponse.prototype.clearContextsMap = function() { + this.getContextsMap().clear(); + return this;}; + + @@ -18522,8 +24604,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetChildrenContextsByContextRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByContextRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByContextRequest.toObject(opt_includeInstance, this); }; @@ -18532,13 +24614,14 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.toObject = funct * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByContextRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetChildrenContextsByContextRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByContextRequest.toObject = function(includeInstance, msg) { var f, obj = { contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -18553,23 +24636,23 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.toObject = function(includ /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetChildrenContextsByContextRequest; - return proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByContextRequest; + return proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByContextRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -18581,6 +24664,11 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReade msg.setContextId(value); break; case 2: + var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + msg.setOptions(value); + break; + case 3: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -18598,9 +24686,9 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.deserializeBinaryFromReade * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByContextRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18608,11 +24696,11 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.serializeBinary /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetChildrenContextsByContextRequest} message + * @param {!proto.ml_metadata.GetArtifactsByContextRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByContextRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { @@ -18621,11 +24709,19 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter = f ); } - f = message.getTransactionOptions(); + f = message.getOptions(); if (f != null) { writer.writeMessage( 2, f, + ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + ); + } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); } @@ -18636,25 +24732,25 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.serializeBinaryToWriter = * optional int64 context_id = 1; * @return {number} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.getContextId = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.getContextId = function() { return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** * @param {number} value - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.setContextId = function(value) { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.setContextId = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearContextId = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearContextId = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -18663,35 +24759,72 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearContextId = * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.hasContextId = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasContextId = function() { return jspb.Message.getField(this, 1) != null; }; /** - * optional TransactionOptions transaction_options = 2; + * optional ListOperationOptions options = 2; + * @return {?proto.ml_metadata.ListOperationOptions} + */ +proto.ml_metadata.GetArtifactsByContextRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 2)); +}; + + +/** + * @param {?proto.ml_metadata.ListOperationOptions|undefined} value + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this +*/ +proto.ml_metadata.GetArtifactsByContextRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + */ +proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearOptions = function() { + return this.setOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 2) != null; +}; + + +/** + * optional TransactionOptions transaction_options = 3; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetArtifactsByContextRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetChildrenContextsByContextRequest} returns this + * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -18700,8 +24833,8 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.clearTransaction * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; }; @@ -18711,7 +24844,7 @@ proto.ml_metadata.GetChildrenContextsByContextRequest.prototype.hasTransactionOp * @private {!Array} * @const */ -proto.ml_metadata.GetChildrenContextsByContextResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetArtifactsByContextResponse.repeatedFields_ = [1]; @@ -18728,8 +24861,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetChildrenContextsByContextResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetArtifactsByContextResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetArtifactsByContextResponse.toObject(opt_includeInstance, this); }; @@ -18738,14 +24871,15 @@ proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.toObject = func * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetArtifactsByContextResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetChildrenContextsByContextResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetArtifactsByContextResponse.toObject = function(includeInstance, msg) { var f, obj = { - contextsList: jspb.Message.toObjectList(msg.getContextsList(), - ml_metadata_proto_metadata_store_pb.Context.toObject, includeInstance) + artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), + ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f }; if (includeInstance) { @@ -18759,23 +24893,23 @@ proto.ml_metadata.GetChildrenContextsByContextResponse.toObject = function(inclu /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetChildrenContextsByContextResponse; - return proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetArtifactsByContextResponse; + return proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetArtifactsByContextResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -18783,9 +24917,13 @@ proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromRead var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Context; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Context.deserializeBinaryFromReader); - msg.addContexts(value); + var value = new ml_metadata_proto_metadata_store_pb.Artifact; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); + msg.addArtifacts(value); + break; + case 2: + var value = /** @type {string} */ (reader.readString()); + msg.setNextPageToken(value); break; default: reader.skipField(); @@ -18800,9 +24938,9 @@ proto.ml_metadata.GetChildrenContextsByContextResponse.deserializeBinaryFromRead * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetArtifactsByContextResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetChildrenContextsByContextResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetArtifactsByContextResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18810,58 +24948,101 @@ proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.serializeBinary /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetChildrenContextsByContextResponse} message + * @param {!proto.ml_metadata.GetArtifactsByContextResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetChildrenContextsByContextResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetArtifactsByContextResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getContextsList(); + f = message.getArtifactsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Context.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ); + } + f = /** @type {string} */ (jspb.Message.getField(message, 2)); + if (f != null) { + writer.writeString( + 2, + f ); } }; /** - * repeated Context contexts = 1; - * @return {!Array} + * repeated Artifact artifacts = 1; + * @return {!Array} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.getContextsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Context, 1)); +proto.ml_metadata.GetArtifactsByContextResponse.prototype.getArtifactsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.setContextsList = function(value) { +proto.ml_metadata.GetArtifactsByContextResponse.prototype.setArtifactsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Context=} opt_value + * @param {!proto.ml_metadata.Artifact=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Context} + * @return {!proto.ml_metadata.Artifact} */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.addContexts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Context, opt_index); +proto.ml_metadata.GetArtifactsByContextResponse.prototype.addArtifacts = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetChildrenContextsByContextResponse} returns this + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this */ -proto.ml_metadata.GetChildrenContextsByContextResponse.prototype.clearContextsList = function() { - return this.setContextsList([]); +proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearArtifactsList = function() { + return this.setArtifactsList([]); +}; + + +/** + * optional string next_page_token = 2; + * @return {string} + */ +proto.ml_metadata.GetArtifactsByContextResponse.prototype.getNextPageToken = function() { + return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); +}; + + +/** + * @param {string} value + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + */ +proto.ml_metadata.GetArtifactsByContextResponse.prototype.setNextPageToken = function(value) { + return jspb.Message.setField(this, 2, value); +}; + + +/** + * Clears the field making it undefined. + * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + */ +proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearNextPageToken = function() { + return jspb.Message.setField(this, 2, undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetArtifactsByContextResponse.prototype.hasNextPageToken = function() { + return jspb.Message.getField(this, 2) != null; }; @@ -18881,8 +25062,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByContextRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByContextRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByContextRequest.toObject(opt_includeInstance, this); }; @@ -18891,11 +25072,11 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.toObject = function(opt * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByContextRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByContextRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByContextRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByContextRequest.toObject = function(includeInstance, msg) { var f, obj = { contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), @@ -18913,23 +25094,23 @@ proto.ml_metadata.GetArtifactsByContextRequest.toObject = function(includeInstan /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} */ -proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByContextRequest; - return proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByContextRequest; + return proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByContextRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByContextRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} */ -proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -18963,9 +25144,9 @@ proto.ml_metadata.GetArtifactsByContextRequest.deserializeBinaryFromReader = fun * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByContextRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByContextRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -18973,11 +25154,11 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.serializeBinary = funct /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByContextRequest} message + * @param {!proto.ml_metadata.GetExecutionsByContextRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByContextRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByContextRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; f = /** @type {number} */ (jspb.Message.getField(message, 1)); if (f != null) { @@ -19009,25 +25190,25 @@ proto.ml_metadata.GetArtifactsByContextRequest.serializeBinaryToWriter = functio * optional int64 context_id = 1; * @return {number} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.getContextId = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.getContextId = function() { return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); }; /** * @param {number} value - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.setContextId = function(value) { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.setContextId = function(value) { return jspb.Message.setField(this, 1, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearContextId = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearContextId = function() { return jspb.Message.setField(this, 1, undefined); }; @@ -19036,7 +25217,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearContextId = functi * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasContextId = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasContextId = function() { return jspb.Message.getField(this, 1) != null; }; @@ -19045,7 +25226,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasContextId = function * optional ListOperationOptions options = 2; * @return {?proto.ml_metadata.ListOperationOptions} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.getOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.getOptions = function() { return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 2)); }; @@ -19053,18 +25234,18 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.getOptions = function() /** * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.setOptions = function(value) { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.setOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearOptions = function() { return this.setOptions(undefined); }; @@ -19073,7 +25254,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearOptions = function * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -19082,7 +25263,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasOptions = function() * optional TransactionOptions transaction_options = 3; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); }; @@ -19090,18 +25271,18 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.getTransactionOptions = /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 3, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByContextRequest} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -19110,7 +25291,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.clearTransactionOptions * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 3) != null; }; @@ -19121,7 +25302,7 @@ proto.ml_metadata.GetArtifactsByContextRequest.prototype.hasTransactionOptions = * @private {!Array} * @const */ -proto.ml_metadata.GetArtifactsByContextResponse.repeatedFields_ = [1]; +proto.ml_metadata.GetExecutionsByContextResponse.repeatedFields_ = [1]; @@ -19138,8 +25319,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetArtifactsByContextResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetExecutionsByContextResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetExecutionsByContextResponse.toObject(opt_includeInstance, this); }; @@ -19148,15 +25329,16 @@ proto.ml_metadata.GetArtifactsByContextResponse.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetArtifactsByContextResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetExecutionsByContextResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByContextResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetExecutionsByContextResponse.toObject = function(includeInstance, msg) { var f, obj = { - artifactsList: jspb.Message.toObjectList(msg.getArtifactsList(), - ml_metadata_proto_metadata_store_pb.Artifact.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f + executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), + ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), + nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, + transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; if (includeInstance) { @@ -19170,23 +25352,23 @@ proto.ml_metadata.GetArtifactsByContextResponse.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} */ -proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetArtifactsByContextResponse; - return proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetExecutionsByContextResponse; + return proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetArtifactsByContextResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetExecutionsByContextResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} */ -proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -19194,14 +25376,19 @@ proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader = fu var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Artifact; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Artifact.deserializeBinaryFromReader); - msg.addArtifacts(value); + var value = new ml_metadata_proto_metadata_store_pb.Execution; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); + msg.addExecutions(value); break; case 2: var value = /** @type {string} */ (reader.readString()); msg.setNextPageToken(value); break; + case 3: + var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); + msg.setTransactionOptions(value); + break; default: reader.skipField(); break; @@ -19215,9 +25402,9 @@ proto.ml_metadata.GetArtifactsByContextResponse.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetArtifactsByContextResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetExecutionsByContextResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -19225,18 +25412,18 @@ proto.ml_metadata.GetArtifactsByContextResponse.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetArtifactsByContextResponse} message + * @param {!proto.ml_metadata.GetExecutionsByContextResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetArtifactsByContextResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetExecutionsByContextResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getArtifactsList(); + f = message.getExecutionsList(); if (f.length > 0) { writer.writeRepeatedMessage( 1, f, - ml_metadata_proto_metadata_store_pb.Artifact.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter ); } f = /** @type {string} */ (jspb.Message.getField(message, 2)); @@ -19246,44 +25433,52 @@ proto.ml_metadata.GetArtifactsByContextResponse.serializeBinaryToWriter = functi f ); } + f = message.getTransactionOptions(); + if (f != null) { + writer.writeMessage( + 3, + f, + ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ); + } }; /** - * repeated Artifact artifacts = 1; - * @return {!Array} + * repeated Execution executions = 1; + * @return {!Array} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.getArtifactsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Artifact, 1)); +proto.ml_metadata.GetExecutionsByContextResponse.prototype.getExecutionsList = function() { + return /** @type{!Array} */ ( + jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); }; /** - * @param {!Array} value - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + * @param {!Array} value + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.setArtifactsList = function(value) { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.setExecutionsList = function(value) { return jspb.Message.setRepeatedWrapperField(this, 1, value); }; /** - * @param {!proto.ml_metadata.Artifact=} opt_value + * @param {!proto.ml_metadata.Execution=} opt_value * @param {number=} opt_index - * @return {!proto.ml_metadata.Artifact} + * @return {!proto.ml_metadata.Execution} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.addArtifacts = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Artifact, opt_index); +proto.ml_metadata.GetExecutionsByContextResponse.prototype.addExecutions = function(opt_value, opt_index) { + return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); }; /** * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearArtifactsList = function() { - return this.setArtifactsList([]); +proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearExecutionsList = function() { + return this.setExecutionsList([]); }; @@ -19291,25 +25486,25 @@ proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearArtifactsList = f * optional string next_page_token = 2; * @return {string} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.getNextPageToken = function() { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.getNextPageToken = function() { return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); }; /** * @param {string} value - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.setNextPageToken = function(value) { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.setNextPageToken = function(value) { return jspb.Message.setField(this, 2, value); }; /** * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetArtifactsByContextResponse} returns this + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearNextPageToken = function() { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearNextPageToken = function() { return jspb.Message.setField(this, 2, undefined); }; @@ -19318,11 +25513,48 @@ proto.ml_metadata.GetArtifactsByContextResponse.prototype.clearNextPageToken = f * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetArtifactsByContextResponse.prototype.hasNextPageToken = function() { +proto.ml_metadata.GetExecutionsByContextResponse.prototype.hasNextPageToken = function() { return jspb.Message.getField(this, 2) != null; }; +/** + * optional TransactionOptions transaction_options = 3; + * @return {?proto.ml_metadata.TransactionOptions} + */ +proto.ml_metadata.GetExecutionsByContextResponse.prototype.getTransactionOptions = function() { + return /** @type{?proto.ml_metadata.TransactionOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); +}; + + +/** + * @param {?proto.ml_metadata.TransactionOptions|undefined} value + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this +*/ +proto.ml_metadata.GetExecutionsByContextResponse.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this + */ +proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearTransactionOptions = function() { + return this.setTransactionOptions(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetExecutionsByContextResponse.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 3) != null; +}; + + @@ -19339,8 +25571,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByContextRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetLineageGraphRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetLineageGraphRequest.toObject(opt_includeInstance, this); }; @@ -19349,14 +25581,13 @@ proto.ml_metadata.GetExecutionsByContextRequest.prototype.toObject = function(op * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByContextRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetLineageGraphRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByContextRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetLineageGraphRequest.toObject = function(includeInstance, msg) { var f, obj = { - contextId: (f = jspb.Message.getField(msg, 1)) == null ? undefined : f, - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.ListOperationOptions.toObject(includeInstance, f), + options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.toObject(includeInstance, f), transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -19371,23 +25602,23 @@ proto.ml_metadata.GetExecutionsByContextRequest.toObject = function(includeInsta /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} + * @return {!proto.ml_metadata.GetLineageGraphRequest} */ -proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetLineageGraphRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByContextRequest; - return proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetLineageGraphRequest; + return proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByContextRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetLineageGraphRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} + * @return {!proto.ml_metadata.GetLineageGraphRequest} */ -proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -19395,15 +25626,11 @@ proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader = fu var field = reader.getFieldNumber(); switch (field) { case 1: - var value = /** @type {number} */ (reader.readInt64()); - msg.setContextId(value); - break; - case 2: - var value = new ml_metadata_proto_metadata_store_pb.ListOperationOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.ListOperationOptions.deserializeBinaryFromReader); + var value = new ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.deserializeBinaryFromReader); msg.setOptions(value); break; - case 3: + case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); msg.setTransactionOptions(value); @@ -19421,9 +25648,9 @@ proto.ml_metadata.GetExecutionsByContextRequest.deserializeBinaryFromReader = fu * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetLineageGraphRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByContextRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetLineageGraphRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -19431,31 +25658,24 @@ proto.ml_metadata.GetExecutionsByContextRequest.prototype.serializeBinary = func /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByContextRequest} message + * @param {!proto.ml_metadata.GetLineageGraphRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByContextRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetLineageGraphRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = /** @type {number} */ (jspb.Message.getField(message, 1)); - if (f != null) { - writer.writeInt64( - 1, - f - ); - } f = message.getOptions(); if (f != null) { writer.writeMessage( - 2, + 1, f, - ml_metadata_proto_metadata_store_pb.ListOperationOptions.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.serializeBinaryToWriter ); } f = message.getTransactionOptions(); if (f != null) { writer.writeMessage( - 3, + 2, f, ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter ); @@ -19464,65 +25684,29 @@ proto.ml_metadata.GetExecutionsByContextRequest.serializeBinaryToWriter = functi /** - * optional int64 context_id = 1; - * @return {number} - */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.getContextId = function() { - return /** @type {number} */ (jspb.Message.getFieldWithDefault(this, 1, 0)); -}; - - -/** - * @param {number} value - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this - */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.setContextId = function(value) { - return jspb.Message.setField(this, 1, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this - */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearContextId = function() { - return jspb.Message.setField(this, 1, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasContextId = function() { - return jspb.Message.getField(this, 1) != null; -}; - - -/** - * optional ListOperationOptions options = 2; - * @return {?proto.ml_metadata.ListOperationOptions} + * optional LineageGraphQueryOptions options = 1; + * @return {?proto.ml_metadata.LineageGraphQueryOptions} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.ListOperationOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.ListOperationOptions, 2)); +proto.ml_metadata.GetLineageGraphRequest.prototype.getOptions = function() { + return /** @type{?proto.ml_metadata.LineageGraphQueryOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions, 1)); }; /** - * @param {?proto.ml_metadata.ListOperationOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this + * @param {?proto.ml_metadata.LineageGraphQueryOptions|undefined} value + * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.setOptions = function(value) { - return jspb.Message.setWrapperField(this, 2, value); +proto.ml_metadata.GetLineageGraphRequest.prototype.setOptions = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this + * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearOptions = function() { +proto.ml_metadata.GetLineageGraphRequest.prototype.clearOptions = function() { return this.setOptions(undefined); }; @@ -19531,35 +25715,35 @@ proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearOptions = functio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasOptions = function() { - return jspb.Message.getField(this, 2) != null; +proto.ml_metadata.GetLineageGraphRequest.prototype.hasOptions = function() { + return jspb.Message.getField(this, 1) != null; }; /** - * optional TransactionOptions transaction_options = 3; + * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetLineageGraphRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this + * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetLineageGraphRequest.prototype.setTransactionOptions = function(value) { + return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByContextRequest} returns this + * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetLineageGraphRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -19568,19 +25752,12 @@ proto.ml_metadata.GetExecutionsByContextRequest.prototype.clearTransactionOption * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsByContextRequest.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetLineageGraphRequest.prototype.hasTransactionOptions = function() { + return jspb.Message.getField(this, 2) != null; }; -/** - * List of repeated fields within this message type. - * @private {!Array} - * @const - */ -proto.ml_metadata.GetExecutionsByContextResponse.repeatedFields_ = [1]; - if (jspb.Message.GENERATE_TO_OBJECT) { @@ -19596,8 +25773,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetExecutionsByContextResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetLineageGraphResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetLineageGraphResponse.toObject(opt_includeInstance, this); }; @@ -19606,16 +25783,13 @@ proto.ml_metadata.GetExecutionsByContextResponse.prototype.toObject = function(o * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetExecutionsByContextResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetLineageGraphResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByContextResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetLineageGraphResponse.toObject = function(includeInstance, msg) { var f, obj = { - executionsList: jspb.Message.toObjectList(msg.getExecutionsList(), - ml_metadata_proto_metadata_store_pb.Execution.toObject, includeInstance), - nextPageToken: (f = jspb.Message.getField(msg, 2)) == null ? undefined : f, - transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) + subgraph: (f = msg.getSubgraph()) && ml_metadata_proto_metadata_store_pb.LineageGraph.toObject(includeInstance, f) }; if (includeInstance) { @@ -19629,23 +25803,23 @@ proto.ml_metadata.GetExecutionsByContextResponse.toObject = function(includeInst /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} + * @return {!proto.ml_metadata.GetLineageGraphResponse} */ -proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetLineageGraphResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetExecutionsByContextResponse; - return proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetLineageGraphResponse; + return proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetExecutionsByContextResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetLineageGraphResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} + * @return {!proto.ml_metadata.GetLineageGraphResponse} */ -proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -19653,18 +25827,9 @@ proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader = f var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.Execution; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.Execution.deserializeBinaryFromReader); - msg.addExecutions(value); - break; - case 2: - var value = /** @type {string} */ (reader.readString()); - msg.setNextPageToken(value); - break; - case 3: - var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.TransactionOptions.deserializeBinaryFromReader); - msg.setTransactionOptions(value); + var value = new ml_metadata_proto_metadata_store_pb.LineageGraph; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.LineageGraph.deserializeBinaryFromReader); + msg.setSubgraph(value); break; default: reader.skipField(); @@ -19679,9 +25844,9 @@ proto.ml_metadata.GetExecutionsByContextResponse.deserializeBinaryFromReader = f * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetLineageGraphResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetExecutionsByContextResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetLineageGraphResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -19689,137 +25854,48 @@ proto.ml_metadata.GetExecutionsByContextResponse.prototype.serializeBinary = fun /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetExecutionsByContextResponse} message + * @param {!proto.ml_metadata.GetLineageGraphResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetExecutionsByContextResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetLineageGraphResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getExecutionsList(); - if (f.length > 0) { - writer.writeRepeatedMessage( - 1, - f, - ml_metadata_proto_metadata_store_pb.Execution.serializeBinaryToWriter - ); - } - f = /** @type {string} */ (jspb.Message.getField(message, 2)); - if (f != null) { - writer.writeString( - 2, - f - ); - } - f = message.getTransactionOptions(); + f = message.getSubgraph(); if (f != null) { writer.writeMessage( - 3, + 1, f, - ml_metadata_proto_metadata_store_pb.TransactionOptions.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.LineageGraph.serializeBinaryToWriter ); } }; /** - * repeated Execution executions = 1; - * @return {!Array} - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.getExecutionsList = function() { - return /** @type{!Array} */ ( - jspb.Message.getRepeatedWrapperField(this, ml_metadata_proto_metadata_store_pb.Execution, 1)); -}; - - -/** - * @param {!Array} value - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this -*/ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.setExecutionsList = function(value) { - return jspb.Message.setRepeatedWrapperField(this, 1, value); -}; - - -/** - * @param {!proto.ml_metadata.Execution=} opt_value - * @param {number=} opt_index - * @return {!proto.ml_metadata.Execution} - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.addExecutions = function(opt_value, opt_index) { - return jspb.Message.addToRepeatedWrapperField(this, 1, opt_value, proto.ml_metadata.Execution, opt_index); -}; - - -/** - * Clears the list making it empty but non-null. - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearExecutionsList = function() { - return this.setExecutionsList([]); -}; - - -/** - * optional string next_page_token = 2; - * @return {string} - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.getNextPageToken = function() { - return /** @type {string} */ (jspb.Message.getFieldWithDefault(this, 2, "")); -}; - - -/** - * @param {string} value - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.setNextPageToken = function(value) { - return jspb.Message.setField(this, 2, value); -}; - - -/** - * Clears the field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearNextPageToken = function() { - return jspb.Message.setField(this, 2, undefined); -}; - - -/** - * Returns whether this field is set. - * @return {boolean} - */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.hasNextPageToken = function() { - return jspb.Message.getField(this, 2) != null; -}; - - -/** - * optional TransactionOptions transaction_options = 3; - * @return {?proto.ml_metadata.TransactionOptions} + * optional LineageGraph subgraph = 1; + * @return {?proto.ml_metadata.LineageGraph} */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.getTransactionOptions = function() { - return /** @type{?proto.ml_metadata.TransactionOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 3)); +proto.ml_metadata.GetLineageGraphResponse.prototype.getSubgraph = function() { + return /** @type{?proto.ml_metadata.LineageGraph} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.LineageGraph, 1)); }; /** - * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this + * @param {?proto.ml_metadata.LineageGraph|undefined} value + * @return {!proto.ml_metadata.GetLineageGraphResponse} returns this */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.setTransactionOptions = function(value) { - return jspb.Message.setWrapperField(this, 3, value); +proto.ml_metadata.GetLineageGraphResponse.prototype.setSubgraph = function(value) { + return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetExecutionsByContextResponse} returns this + * @return {!proto.ml_metadata.GetLineageGraphResponse} returns this */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearTransactionOptions = function() { - return this.setTransactionOptions(undefined); +proto.ml_metadata.GetLineageGraphResponse.prototype.clearSubgraph = function() { + return this.setSubgraph(undefined); }; @@ -19827,8 +25903,8 @@ proto.ml_metadata.GetExecutionsByContextResponse.prototype.clearTransactionOptio * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetExecutionsByContextResponse.prototype.hasTransactionOptions = function() { - return jspb.Message.getField(this, 3) != null; +proto.ml_metadata.GetLineageGraphResponse.prototype.hasSubgraph = function() { + return jspb.Message.getField(this, 1) != null; }; @@ -19848,8 +25924,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetLineageGraphRequest.toObject(opt_includeInstance, this); +proto.ml_metadata.GetLineageSubgraphRequest.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetLineageSubgraphRequest.toObject(opt_includeInstance, this); }; @@ -19858,13 +25934,14 @@ proto.ml_metadata.GetLineageGraphRequest.prototype.toObject = function(opt_inclu * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetLineageGraphRequest} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetLineageGraphRequest.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetLineageSubgraphRequest.toObject = function(includeInstance, msg) { var f, obj = { - options: (f = msg.getOptions()) && ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.toObject(includeInstance, f), + lineageSubgraphQueryOptions: (f = msg.getLineageSubgraphQueryOptions()) && ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions.toObject(includeInstance, f), + readMask: (f = msg.getReadMask()) && google_protobuf_field_mask_pb.FieldMask.toObject(includeInstance, f), transactionOptions: (f = msg.getTransactionOptions()) && ml_metadata_proto_metadata_store_pb.TransactionOptions.toObject(includeInstance, f) }; @@ -19879,23 +25956,23 @@ proto.ml_metadata.GetLineageGraphRequest.toObject = function(includeInstance, ms /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetLineageGraphRequest} + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} */ -proto.ml_metadata.GetLineageGraphRequest.deserializeBinary = function(bytes) { +proto.ml_metadata.GetLineageSubgraphRequest.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetLineageGraphRequest; - return proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetLineageSubgraphRequest; + return proto.ml_metadata.GetLineageSubgraphRequest.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetLineageGraphRequest} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetLineageGraphRequest} + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} */ -proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetLineageSubgraphRequest.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -19903,9 +25980,14 @@ proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader = function( var field = reader.getFieldNumber(); switch (field) { case 1: - var value = new ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions; - reader.readMessage(value,ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.deserializeBinaryFromReader); - msg.setOptions(value); + var value = new ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions; + reader.readMessage(value,ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions.deserializeBinaryFromReader); + msg.setLineageSubgraphQueryOptions(value); + break; + case 3: + var value = new google_protobuf_field_mask_pb.FieldMask; + reader.readMessage(value,google_protobuf_field_mask_pb.FieldMask.deserializeBinaryFromReader); + msg.setReadMask(value); break; case 2: var value = new ml_metadata_proto_metadata_store_pb.TransactionOptions; @@ -19925,9 +26007,9 @@ proto.ml_metadata.GetLineageGraphRequest.deserializeBinaryFromReader = function( * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.serializeBinary = function() { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetLineageGraphRequest.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetLineageSubgraphRequest.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -19935,18 +26017,26 @@ proto.ml_metadata.GetLineageGraphRequest.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetLineageGraphRequest} message + * @param {!proto.ml_metadata.GetLineageSubgraphRequest} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetLineageGraphRequest.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetLineageSubgraphRequest.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getOptions(); + f = message.getLineageSubgraphQueryOptions(); if (f != null) { writer.writeMessage( 1, f, - ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions.serializeBinaryToWriter + ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions.serializeBinaryToWriter + ); + } + f = message.getReadMask(); + if (f != null) { + writer.writeMessage( + 3, + f, + google_protobuf_field_mask_pb.FieldMask.serializeBinaryToWriter ); } f = message.getTransactionOptions(); @@ -19961,30 +26051,30 @@ proto.ml_metadata.GetLineageGraphRequest.serializeBinaryToWriter = function(mess /** - * optional LineageGraphQueryOptions options = 1; - * @return {?proto.ml_metadata.LineageGraphQueryOptions} + * optional LineageSubgraphQueryOptions lineage_subgraph_query_options = 1; + * @return {?proto.ml_metadata.LineageSubgraphQueryOptions} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.getOptions = function() { - return /** @type{?proto.ml_metadata.LineageGraphQueryOptions} */ ( - jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.LineageGraphQueryOptions, 1)); +proto.ml_metadata.GetLineageSubgraphRequest.prototype.getLineageSubgraphQueryOptions = function() { + return /** @type{?proto.ml_metadata.LineageSubgraphQueryOptions} */ ( + jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.LineageSubgraphQueryOptions, 1)); }; /** - * @param {?proto.ml_metadata.LineageGraphQueryOptions|undefined} value - * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this + * @param {?proto.ml_metadata.LineageSubgraphQueryOptions|undefined} value + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this */ -proto.ml_metadata.GetLineageGraphRequest.prototype.setOptions = function(value) { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.setLineageSubgraphQueryOptions = function(value) { return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this */ -proto.ml_metadata.GetLineageGraphRequest.prototype.clearOptions = function() { - return this.setOptions(undefined); +proto.ml_metadata.GetLineageSubgraphRequest.prototype.clearLineageSubgraphQueryOptions = function() { + return this.setLineageSubgraphQueryOptions(undefined); }; @@ -19992,16 +26082,53 @@ proto.ml_metadata.GetLineageGraphRequest.prototype.clearOptions = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.hasOptions = function() { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.hasLineageSubgraphQueryOptions = function() { return jspb.Message.getField(this, 1) != null; }; +/** + * optional google.protobuf.FieldMask read_mask = 3; + * @return {?proto.google.protobuf.FieldMask} + */ +proto.ml_metadata.GetLineageSubgraphRequest.prototype.getReadMask = function() { + return /** @type{?proto.google.protobuf.FieldMask} */ ( + jspb.Message.getWrapperField(this, google_protobuf_field_mask_pb.FieldMask, 3)); +}; + + +/** + * @param {?proto.google.protobuf.FieldMask|undefined} value + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this +*/ +proto.ml_metadata.GetLineageSubgraphRequest.prototype.setReadMask = function(value) { + return jspb.Message.setWrapperField(this, 3, value); +}; + + +/** + * Clears the message field making it undefined. + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this + */ +proto.ml_metadata.GetLineageSubgraphRequest.prototype.clearReadMask = function() { + return this.setReadMask(undefined); +}; + + +/** + * Returns whether this field is set. + * @return {boolean} + */ +proto.ml_metadata.GetLineageSubgraphRequest.prototype.hasReadMask = function() { + return jspb.Message.getField(this, 3) != null; +}; + + /** * optional TransactionOptions transaction_options = 2; * @return {?proto.ml_metadata.TransactionOptions} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.getTransactionOptions = function() { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.getTransactionOptions = function() { return /** @type{?proto.ml_metadata.TransactionOptions} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.TransactionOptions, 2)); }; @@ -20009,18 +26136,18 @@ proto.ml_metadata.GetLineageGraphRequest.prototype.getTransactionOptions = funct /** * @param {?proto.ml_metadata.TransactionOptions|undefined} value - * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this */ -proto.ml_metadata.GetLineageGraphRequest.prototype.setTransactionOptions = function(value) { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.setTransactionOptions = function(value) { return jspb.Message.setWrapperField(this, 2, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetLineageGraphRequest} returns this + * @return {!proto.ml_metadata.GetLineageSubgraphRequest} returns this */ -proto.ml_metadata.GetLineageGraphRequest.prototype.clearTransactionOptions = function() { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.clearTransactionOptions = function() { return this.setTransactionOptions(undefined); }; @@ -20029,7 +26156,7 @@ proto.ml_metadata.GetLineageGraphRequest.prototype.clearTransactionOptions = fun * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetLineageGraphRequest.prototype.hasTransactionOptions = function() { +proto.ml_metadata.GetLineageSubgraphRequest.prototype.hasTransactionOptions = function() { return jspb.Message.getField(this, 2) != null; }; @@ -20050,8 +26177,8 @@ if (jspb.Message.GENERATE_TO_OBJECT) { * http://goto/soy-param-migration * @return {!Object} */ -proto.ml_metadata.GetLineageGraphResponse.prototype.toObject = function(opt_includeInstance) { - return proto.ml_metadata.GetLineageGraphResponse.toObject(opt_includeInstance, this); +proto.ml_metadata.GetLineageSubgraphResponse.prototype.toObject = function(opt_includeInstance) { + return proto.ml_metadata.GetLineageSubgraphResponse.toObject(opt_includeInstance, this); }; @@ -20060,13 +26187,13 @@ proto.ml_metadata.GetLineageGraphResponse.prototype.toObject = function(opt_incl * @param {boolean|undefined} includeInstance Deprecated. Whether to include * the JSPB instance for transitional soy proto support: * http://goto/soy-param-migration - * @param {!proto.ml_metadata.GetLineageGraphResponse} msg The msg instance to transform. + * @param {!proto.ml_metadata.GetLineageSubgraphResponse} msg The msg instance to transform. * @return {!Object} * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetLineageGraphResponse.toObject = function(includeInstance, msg) { +proto.ml_metadata.GetLineageSubgraphResponse.toObject = function(includeInstance, msg) { var f, obj = { - subgraph: (f = msg.getSubgraph()) && ml_metadata_proto_metadata_store_pb.LineageGraph.toObject(includeInstance, f) + lineageSubgraph: (f = msg.getLineageSubgraph()) && ml_metadata_proto_metadata_store_pb.LineageGraph.toObject(includeInstance, f) }; if (includeInstance) { @@ -20080,23 +26207,23 @@ proto.ml_metadata.GetLineageGraphResponse.toObject = function(includeInstance, m /** * Deserializes binary data (in protobuf wire format). * @param {jspb.ByteSource} bytes The bytes to deserialize. - * @return {!proto.ml_metadata.GetLineageGraphResponse} + * @return {!proto.ml_metadata.GetLineageSubgraphResponse} */ -proto.ml_metadata.GetLineageGraphResponse.deserializeBinary = function(bytes) { +proto.ml_metadata.GetLineageSubgraphResponse.deserializeBinary = function(bytes) { var reader = new jspb.BinaryReader(bytes); - var msg = new proto.ml_metadata.GetLineageGraphResponse; - return proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader(msg, reader); + var msg = new proto.ml_metadata.GetLineageSubgraphResponse; + return proto.ml_metadata.GetLineageSubgraphResponse.deserializeBinaryFromReader(msg, reader); }; /** * Deserializes binary data (in protobuf wire format) from the * given reader into the given message object. - * @param {!proto.ml_metadata.GetLineageGraphResponse} msg The message object to deserialize into. + * @param {!proto.ml_metadata.GetLineageSubgraphResponse} msg The message object to deserialize into. * @param {!jspb.BinaryReader} reader The BinaryReader to use. - * @return {!proto.ml_metadata.GetLineageGraphResponse} + * @return {!proto.ml_metadata.GetLineageSubgraphResponse} */ -proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader = function(msg, reader) { +proto.ml_metadata.GetLineageSubgraphResponse.deserializeBinaryFromReader = function(msg, reader) { while (reader.nextField()) { if (reader.isEndGroup()) { break; @@ -20106,7 +26233,7 @@ proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader = function case 1: var value = new ml_metadata_proto_metadata_store_pb.LineageGraph; reader.readMessage(value,ml_metadata_proto_metadata_store_pb.LineageGraph.deserializeBinaryFromReader); - msg.setSubgraph(value); + msg.setLineageSubgraph(value); break; default: reader.skipField(); @@ -20121,9 +26248,9 @@ proto.ml_metadata.GetLineageGraphResponse.deserializeBinaryFromReader = function * Serializes the message to binary data (in protobuf wire format). * @return {!Uint8Array} */ -proto.ml_metadata.GetLineageGraphResponse.prototype.serializeBinary = function() { +proto.ml_metadata.GetLineageSubgraphResponse.prototype.serializeBinary = function() { var writer = new jspb.BinaryWriter(); - proto.ml_metadata.GetLineageGraphResponse.serializeBinaryToWriter(this, writer); + proto.ml_metadata.GetLineageSubgraphResponse.serializeBinaryToWriter(this, writer); return writer.getResultBuffer(); }; @@ -20131,13 +26258,13 @@ proto.ml_metadata.GetLineageGraphResponse.prototype.serializeBinary = function() /** * Serializes the given message to binary data (in protobuf wire * format), writing to the given BinaryWriter. - * @param {!proto.ml_metadata.GetLineageGraphResponse} message + * @param {!proto.ml_metadata.GetLineageSubgraphResponse} message * @param {!jspb.BinaryWriter} writer * @suppress {unusedLocalVariables} f is only used for nested messages */ -proto.ml_metadata.GetLineageGraphResponse.serializeBinaryToWriter = function(message, writer) { +proto.ml_metadata.GetLineageSubgraphResponse.serializeBinaryToWriter = function(message, writer) { var f = undefined; - f = message.getSubgraph(); + f = message.getLineageSubgraph(); if (f != null) { writer.writeMessage( 1, @@ -20149,10 +26276,10 @@ proto.ml_metadata.GetLineageGraphResponse.serializeBinaryToWriter = function(mes /** - * optional LineageGraph subgraph = 1; + * optional LineageGraph lineage_subgraph = 1; * @return {?proto.ml_metadata.LineageGraph} */ -proto.ml_metadata.GetLineageGraphResponse.prototype.getSubgraph = function() { +proto.ml_metadata.GetLineageSubgraphResponse.prototype.getLineageSubgraph = function() { return /** @type{?proto.ml_metadata.LineageGraph} */ ( jspb.Message.getWrapperField(this, ml_metadata_proto_metadata_store_pb.LineageGraph, 1)); }; @@ -20160,19 +26287,19 @@ proto.ml_metadata.GetLineageGraphResponse.prototype.getSubgraph = function() { /** * @param {?proto.ml_metadata.LineageGraph|undefined} value - * @return {!proto.ml_metadata.GetLineageGraphResponse} returns this + * @return {!proto.ml_metadata.GetLineageSubgraphResponse} returns this */ -proto.ml_metadata.GetLineageGraphResponse.prototype.setSubgraph = function(value) { +proto.ml_metadata.GetLineageSubgraphResponse.prototype.setLineageSubgraph = function(value) { return jspb.Message.setWrapperField(this, 1, value); }; /** * Clears the message field making it undefined. - * @return {!proto.ml_metadata.GetLineageGraphResponse} returns this + * @return {!proto.ml_metadata.GetLineageSubgraphResponse} returns this */ -proto.ml_metadata.GetLineageGraphResponse.prototype.clearSubgraph = function() { - return this.setSubgraph(undefined); +proto.ml_metadata.GetLineageSubgraphResponse.prototype.clearLineageSubgraph = function() { + return this.setLineageSubgraph(undefined); }; @@ -20180,7 +26307,7 @@ proto.ml_metadata.GetLineageGraphResponse.prototype.clearSubgraph = function() { * Returns whether this field is set. * @return {boolean} */ -proto.ml_metadata.GetLineageGraphResponse.prototype.hasSubgraph = function() { +proto.ml_metadata.GetLineageSubgraphResponse.prototype.hasLineageSubgraph = function() { return jspb.Message.getField(this, 1) != null; }; diff --git a/go.mod b/go.mod index d88deba061..5a6fffb148 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/cenkalti/backoff v2.2.1+incompatible github.com/eapache/go-resiliency v1.2.0 github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a // indirect - github.com/emicklei/go-restful v2.15.0+incompatible // indirect github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5 // indirect github.com/fsnotify/fsnotify v1.5.1 github.com/go-openapi/errors v0.20.2 @@ -32,7 +31,7 @@ require ( github.com/jinzhu/now v1.1.4 // indirect github.com/kubeflow/pipelines/api v0.0.0-20230331215358-758c91f76784 github.com/kubeflow/pipelines/kubernetes_platform v0.0.0-20230404213301-bd9f74e34de6 - github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20220118175555-e78ed557ddcb + github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20230810215105-e1f0c010f800 github.com/lestrrat-go/strftime v1.0.4 github.com/mattn/go-sqlite3 v1.14.16 github.com/minio/minio-go/v6 v6.0.57 diff --git a/go.sum b/go.sum index 70508caa62..d349781102 100644 --- a/go.sum +++ b/go.sum @@ -359,9 +359,8 @@ github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a h1:A4wNiqeKqU56Zht github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.12.0+incompatible h1:SIvoTSbsMEwuM3dzFirLwKc4BH6VXP5CNf+G1FfJVr4= github.com/emicklei/go-restful v2.12.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.15.0+incompatible h1:8KpYO/Xl/ZudZs5RNOEhWMBY4hmzlZhhRd9cu+jrZP4= -github.com/emicklei/go-restful v2.15.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful/v3 v3.8.0 h1:eCZ8ulSerjdAiaNpF7GxXIE7ZCMo1moN1qX+S609eVw= github.com/emicklei/go-restful/v3 v3.8.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= @@ -938,8 +937,8 @@ github.com/kubeflow/pipelines/api v0.0.0-20230331215358-758c91f76784 h1:ZVCoqnKn github.com/kubeflow/pipelines/api v0.0.0-20230331215358-758c91f76784/go.mod h1:T7TOQB36gGe97yUdfVAnYK5uuT0+uQbLNHDUHxYkmE4= github.com/kubeflow/pipelines/kubernetes_platform v0.0.0-20230404213301-bd9f74e34de6 h1:ApWW5ZH45ruvQCmkp7RewHlPKGwqBNSSRxEHGJFiAOA= github.com/kubeflow/pipelines/kubernetes_platform v0.0.0-20230404213301-bd9f74e34de6/go.mod h1:CJkKr356RlpZP/gQRuHf3Myrn1qJtoUVe4EMCmtwarg= -github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20220118175555-e78ed557ddcb h1:i0RzcKBlfGHueIwrUlKB+AvVZPuMUJIYe1g8nvhwgbo= -github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20220118175555-e78ed557ddcb/go.mod h1:chIDffBaVQ/asNl1pTTdbAymYcuBKf8BR3YtSP+3FEU= +github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20230810215105-e1f0c010f800 h1:YAW+X9xCW8Yq5tQaBBQaLTNU9CJj8Nr7lx1+k66ZHJ0= +github.com/kubeflow/pipelines/third_party/ml-metadata v0.0.0-20230810215105-e1f0c010f800/go.mod h1:chIDffBaVQ/asNl1pTTdbAymYcuBKf8BR3YtSP+3FEU= github.com/labstack/echo v3.2.1+incompatible/go.mod h1:0INS7j/VjnFxD4E2wkz67b8cVwCLbBmJyDaka6Cmk1s= github.com/labstack/gommon v0.2.7/go.mod h1:/tj9csK2iPSBvn+3NLM9e52usepMtrd5ilFYA+wQNJ4= github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw= diff --git a/hack/update-all-requirements.sh b/hack/update-all-requirements.sh index 3a71bf81e9..8b49c0f434 100755 --- a/hack/update-all-requirements.sh +++ b/hack/update-all-requirements.sh @@ -18,6 +18,5 @@ set -euo pipefail DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)" REPO_ROOT="${DIR}/.." -cd "${REPO_ROOT}/backend" && bash update_requirements.sh cd "${REPO_ROOT}/backend/src/apiserver/visualization" && bash update_requirements.sh cd "${REPO_ROOT}/test/sample-test/hack" && bash update_requirements.sh diff --git a/manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml b/manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml index 4ed344491d..1ea10488d6 100644 --- a/manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml +++ b/manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml @@ -22,7 +22,7 @@ spec: # * .cloudbuild.yaml and .release.cloudbuild.yaml # * manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml # * test/tag_for_hosted.sh - image: gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0 + image: gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0 env: - name: DBCONFIG_USER valueFrom: diff --git a/test/tag_for_hosted.sh b/test/tag_for_hosted.sh index ba0ce64e88..8a50cd65f2 100755 --- a/test/tag_for_hosted.sh +++ b/test/tag_for_hosted.sh @@ -100,8 +100,8 @@ docker push gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA:$MM_VER # * .cloudbuild.yaml and .release.cloudbuild.yaml # * manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml # * test/tag_for_hosted.sh -docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0 gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$SEM_VER -docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.5.0 gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$MM_VER +docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0 gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$SEM_VER +docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:1.14.0 gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$MM_VER docker push gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$SEM_VER docker push gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/metadataserver:$MM_VER diff --git a/third_party/ml-metadata/README.md b/third_party/ml-metadata/README.md index 4267604a94..1af380896f 100644 --- a/third_party/ml-metadata/README.md +++ b/third_party/ml-metadata/README.md @@ -30,6 +30,14 @@ Make sure the generated files are as expected. Update clients as described below Make sure you have installed tools and packages in [grpc golang prerequisites](https://grpc.io/docs/languages/go/quickstart/#prerequisites). +NOTE: The versions for tools are important, following is a record for when the version combination works successfully. + +```bash +apt install -y protobuf-compiler=3.15.8 +go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26 +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1 +``` + #### Command ```bash diff --git a/third_party/ml-metadata/VERSION b/third_party/ml-metadata/VERSION index 3e1ad720b1..cd99d386a8 100644 --- a/third_party/ml-metadata/VERSION +++ b/third_party/ml-metadata/VERSION @@ -1 +1 @@ -1.5.0 \ No newline at end of file +1.14.0 \ No newline at end of file diff --git a/third_party/ml-metadata/go.mod b/third_party/ml-metadata/go.mod index a9e89505af..e7853a4a68 100644 --- a/third_party/ml-metadata/go.mod +++ b/third_party/ml-metadata/go.mod @@ -3,7 +3,6 @@ module github.com/kubeflow/pipelines/third_party/ml-metadata go 1.16 require ( - github.com/golang/protobuf v1.5.2 golang.org/x/net v0.0.0-20211216030914-fe4d6282115f // indirect golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect golang.org/x/text v0.3.7 // indirect diff --git a/third_party/ml-metadata/go.sum b/third_party/ml-metadata/go.sum index 60f21d7c35..d4502821e9 100644 --- a/third_party/ml-metadata/go.sum +++ b/third_party/ml-metadata/go.sum @@ -68,7 +68,6 @@ golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211216030914-fe4d6282115f h1:hEYJvxw1lSnWIl8X9ofsYMklzaDs90JI2az5YMd4fPM= @@ -82,7 +81,6 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -91,7 +89,6 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -111,7 +108,6 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20211221231510-d629cc9a93d5 h1:v7aOwCaINsgis88/5e6DEZ6TlP7vXueAw/Ftqd5rm+w= google.golang.org/genproto v0.0.0-20211221231510-d629cc9a93d5/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= @@ -122,8 +118,6 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= -google.golang.org/grpc v1.42.0 h1:XT2/MFpuPFsEX2fWh3YQtHkZ+WYZFQRfaUgLZYj/p6A= -google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.43.0 h1:Eeu7bZtDZ2DpRCsLhUlcrLnvYaMK1Gz86a+hMVvELmM= google.golang.org/grpc v1.43.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= diff --git a/third_party/ml-metadata/go/ml_metadata/metadata_store.pb.go b/third_party/ml-metadata/go/ml_metadata/metadata_store.pb.go index c2988f5e0c..ed2fa881d6 100644 --- a/third_party/ml-metadata/go/ml_metadata/metadata_store.pb.go +++ b/third_party/ml-metadata/go/ml_metadata/metadata_store.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.26.0 -// protoc v3.17.3 +// protoc v3.15.8 // source: ml_metadata/proto/metadata_store.proto package ml_metadata @@ -26,6 +26,7 @@ import ( protoiface "google.golang.org/protobuf/runtime/protoiface" protoimpl "google.golang.org/protobuf/runtime/protoimpl" descriptorpb "google.golang.org/protobuf/types/descriptorpb" + anypb "google.golang.org/protobuf/types/known/anypb" structpb "google.golang.org/protobuf/types/known/structpb" reflect "reflect" sync "sync" @@ -46,7 +47,11 @@ const ( PropertyType_INT PropertyType = 1 PropertyType_DOUBLE PropertyType = 2 PropertyType_STRING PropertyType = 3 + // Prefer to use `PROTO` to store structed data since this option has + // inefficient database storage usage. PropertyType_STRUCT PropertyType = 4 + PropertyType_PROTO PropertyType = 5 + PropertyType_BOOLEAN PropertyType = 6 ) // Enum value maps for PropertyType. @@ -57,6 +62,8 @@ var ( 2: "DOUBLE", 3: "STRING", 4: "STRUCT", + 5: "PROTO", + 6: "BOOLEAN", } PropertyType_value = map[string]int32{ "UNKNOWN": 0, @@ -64,6 +71,8 @@ var ( "DOUBLE": 2, "STRING": 3, "STRUCT": 4, + "PROTO": 5, + "BOOLEAN": 6, } ) @@ -117,6 +126,17 @@ const ( Artifact_MARKED_FOR_DELETION Artifact_State = 3 // A state indicating that the artifact has been deleted. Artifact_DELETED Artifact_State = 4 + // A state indicating that the artifact has been abandoned, which may be + // due to a failed or cancelled execution. + Artifact_ABANDONED Artifact_State = 5 + // A state indicating that the artifact is a reference artifact. At + // execution start time, the orchestrator produces an output artifact for + // each output key with state PENDING. However, for an intermediate + // artifact, this first artifact's state will be REFERENCE. Intermediate + // artifacts emitted during a component's execution will copy the REFERENCE + // artifact's attributes. At the end of an execution, the artifact state + // should remain REFERENCE instead of being changed to LIVE. + Artifact_REFERENCE Artifact_State = 6 ) // Enum value maps for Artifact_State. @@ -127,6 +147,8 @@ var ( 2: "LIVE", 3: "MARKED_FOR_DELETION", 4: "DELETED", + 5: "ABANDONED", + 6: "REFERENCE", } Artifact_State_value = map[string]int32{ "UNKNOWN": 0, @@ -134,6 +156,8 @@ var ( "LIVE": 2, "MARKED_FOR_DELETION": 3, "DELETED": 4, + "ABANDONED": 5, + "REFERENCE": 6, } ) @@ -254,6 +278,7 @@ const ( Event_OUTPUT Event_Type = 4 // An output of the execution. Event_INTERNAL_INPUT Event_Type = 5 // An internal input of the execution. Event_INTERNAL_OUTPUT Event_Type = 6 // An internal output of the execution. + Event_PENDING_OUTPUT Event_Type = 7 // A pending output of the execution. ) // Enum value maps for Event_Type. @@ -266,6 +291,7 @@ var ( 4: "OUTPUT", 5: "INTERNAL_INPUT", 6: "INTERNAL_OUTPUT", + 7: "PENDING_OUTPUT", } Event_Type_value = map[string]int32{ "UNKNOWN": 0, @@ -275,6 +301,7 @@ var ( "OUTPUT": 4, "INTERNAL_INPUT": 5, "INTERNAL_OUTPUT": 6, + "PENDING_OUTPUT": 7, } ) @@ -646,7 +673,74 @@ func (x *ListOperationOptions_OrderByField_Field) UnmarshalJSON(b []byte) error // Deprecated: Use ListOperationOptions_OrderByField_Field.Descriptor instead. func (ListOperationOptions_OrderByField_Field) EnumDescriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{30, 0, 0} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{31, 0, 0} +} + +type LineageSubgraphQueryOptions_Direction int32 + +const ( + // Direction is by defult DIRECTION_UNSPECIFIED, which is equivalent to + // BIDIRECTIONAL. + LineageSubgraphQueryOptions_DIRECTION_UNSPECIFIED LineageSubgraphQueryOptions_Direction = 0 + // Indicates tracing the lineage graph by hops in upstream direction. + LineageSubgraphQueryOptions_UPSTREAM LineageSubgraphQueryOptions_Direction = 1 + // Indicates tracing the lineage graph by hops in downstream direction. + LineageSubgraphQueryOptions_DOWNSTREAM LineageSubgraphQueryOptions_Direction = 2 + // Indicates tracing the lineage graph in both directions. + LineageSubgraphQueryOptions_BIDIRECTIONAL LineageSubgraphQueryOptions_Direction = 3 +) + +// Enum value maps for LineageSubgraphQueryOptions_Direction. +var ( + LineageSubgraphQueryOptions_Direction_name = map[int32]string{ + 0: "DIRECTION_UNSPECIFIED", + 1: "UPSTREAM", + 2: "DOWNSTREAM", + 3: "BIDIRECTIONAL", + } + LineageSubgraphQueryOptions_Direction_value = map[string]int32{ + "DIRECTION_UNSPECIFIED": 0, + "UPSTREAM": 1, + "DOWNSTREAM": 2, + "BIDIRECTIONAL": 3, + } +) + +func (x LineageSubgraphQueryOptions_Direction) Enum() *LineageSubgraphQueryOptions_Direction { + p := new(LineageSubgraphQueryOptions_Direction) + *p = x + return p +} + +func (x LineageSubgraphQueryOptions_Direction) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (LineageSubgraphQueryOptions_Direction) Descriptor() protoreflect.EnumDescriptor { + return file_ml_metadata_proto_metadata_store_proto_enumTypes[9].Descriptor() +} + +func (LineageSubgraphQueryOptions_Direction) Type() protoreflect.EnumType { + return &file_ml_metadata_proto_metadata_store_proto_enumTypes[9] +} + +func (x LineageSubgraphQueryOptions_Direction) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *LineageSubgraphQueryOptions_Direction) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = LineageSubgraphQueryOptions_Direction(num) + return nil +} + +// Deprecated: Use LineageSubgraphQueryOptions_Direction.Descriptor instead. +func (LineageSubgraphQueryOptions_Direction) EnumDescriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{35, 0} } type SystemTypeExtension struct { @@ -708,6 +802,8 @@ type Value struct { // *Value_DoubleValue // *Value_StringValue // *Value_StructValue + // *Value_ProtoValue + // *Value_BoolValue Value isValue_Value `protobuf_oneof:"value"` } @@ -778,6 +874,20 @@ func (x *Value) GetStructValue() *structpb.Struct { return nil } +func (x *Value) GetProtoValue() *anypb.Any { + if x, ok := x.GetValue().(*Value_ProtoValue); ok { + return x.ProtoValue + } + return nil +} + +func (x *Value) GetBoolValue() bool { + if x, ok := x.GetValue().(*Value_BoolValue); ok { + return x.BoolValue + } + return false +} + type isValue_Value interface { isValue_Value() } @@ -798,6 +908,14 @@ type Value_StructValue struct { StructValue *structpb.Struct `protobuf:"bytes,4,opt,name=struct_value,json=structValue,oneof"` } +type Value_ProtoValue struct { + ProtoValue *anypb.Any `protobuf:"bytes,5,opt,name=proto_value,json=protoValue,oneof"` +} + +type Value_BoolValue struct { + BoolValue bool `protobuf:"varint,6,opt,name=bool_value,json=boolValue,oneof"` +} + func (*Value_IntValue) isValue_Value() {} func (*Value_DoubleValue) isValue_Value() {} @@ -806,12 +924,16 @@ func (*Value_StringValue) isValue_Value() {} func (*Value_StructValue) isValue_Value() {} +func (*Value_ProtoValue) isValue_Value() {} + +func (*Value_BoolValue) isValue_Value() {} + type Artifact struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Output only. The globally unique server generated id of the artifact. + // Output only. The unique server generated id of the artifact. Id *int64 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` // The client provided name of the artifact. This field is optional. If set, // it must be unique among all the artifacts of the same artifact type within @@ -825,6 +947,9 @@ type Artifact struct { // The uniform resource identifier of the physical artifact. // May be empty if there is no physical artifact. Uri *string `protobuf:"bytes,3,opt,name=uri" json:"uri,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all artifacts within a database instance. + ExternalId *string `protobuf:"bytes,11,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // Properties of the artifact. // Properties must be specified in the ArtifactType. Properties map[string]*Value `protobuf:"bytes,4,rep,name=properties" json:"properties,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` @@ -837,6 +962,8 @@ type Artifact struct { // Output only. Last update time of the artifact since epoch in millisecond // since epoch. LastUpdateTimeSinceEpoch *int64 `protobuf:"varint,10,opt,name=last_update_time_since_epoch,json=lastUpdateTimeSinceEpoch" json:"last_update_time_since_epoch,omitempty"` + // Output only. + SystemMetadata *anypb.Any `protobuf:"bytes,12,opt,name=system_metadata,json=systemMetadata" json:"system_metadata,omitempty"` } func (x *Artifact) Reset() { @@ -906,6 +1033,13 @@ func (x *Artifact) GetUri() string { return "" } +func (x *Artifact) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *Artifact) GetProperties() map[string]*Value { if x != nil { return x.Properties @@ -941,6 +1075,13 @@ func (x *Artifact) GetLastUpdateTimeSinceEpoch() int64 { return 0 } +func (x *Artifact) GetSystemMetadata() *anypb.Any { + if x != nil { + return x.SystemMetadata + } + return nil +} + type ArtifactType struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -955,6 +1096,10 @@ type ArtifactType struct { Version *string `protobuf:"bytes,4,opt,name=version" json:"version,omitempty"` // An optional description about the type. Description *string `protobuf:"bytes,5,opt,name=description" json:"description,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all artifact types within a database + // instance. + ExternalId *string `protobuf:"bytes,7,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // The schema of the type. // Properties are always optional in the artifact. // Properties of an artifact type can be expanded but not contracted (i.e., @@ -1026,6 +1171,13 @@ func (x *ArtifactType) GetDescription() string { return "" } +func (x *ArtifactType) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *ArtifactType) GetProperties() map[string]PropertyType { if x != nil { return x.Properties @@ -1074,40 +1226,57 @@ func (x *ArtifactType) GetBaseType() ArtifactType_SystemDefinedBaseType { // type:DECLARED_OUTPUT, // path:{step:[{"key":"my_result"}]} // } -// Other event types include INPUT/OUTPUT and INTERNAL_INPUT/_OUTPUT. +// +// Other event types include INPUT/OUTPUT, INTERNAL_INPUT/_OUTPUT and +// PENDING_OUTPUT: +// // * The INPUT/OUTPUT is an event that actually reads/writes an artifact by an // execution. The input/output artifacts may not declared in the signature, // For example, the trainer may output multiple caches of the parameters // (as an OUTPUT), then finally write the SavedModel as a DECLARED_OUTPUT. +// // * The INTERNAL_INPUT/_OUTPUT are event types which are only meaningful to // an orchestration system to keep track of the details for later debugging. // For example, a fork happened conditioning on an artifact, then an execution -// is triggered, such fork implementating may need to log the read and write +// is triggered, such fork implementing may need to log the read and write // of artifacts and may not be worth displaying to the users. // -// For instance, in the above example, +// For instance, in the above example, // -// my_result = my_execution({"data":[3,7],"schema":8}) +// my_result = my_execution({"data":[3,7],"schema":8}) // -// there is another execution (id: 15), which represents a `garbage_collection` -// step in an orchestration system +// there is another execution (id: 15), which represents a +// `garbage_collection` step in an orchestration system // -// gc_result = garbage_collection(my_result) +// gc_result = garbage_collection(my_result) // -// that cleans `my_result` if needed. The details should be invisible to the -// end users and lineage tracking. The orchestrator can emit following events: +// that cleans `my_result` if needed. The details should be invisible to the +// end users and lineage tracking. The orchestrator can emit following events: // -// { -// artifact_id: 15, -// execution_id: 15, -// type:INTERNAL_INPUT, -// } -// { -// artifact_id:16, // New artifact containing the GC job result. -// execution_id: 15, -// type:INTERNAL_OUTPUT, -// path:{step:[{"key":"gc_result"}]} -// } +// { +// artifact_id: 15, +// execution_id: 15, +// type:INTERNAL_INPUT, +// } +// { +// artifact_id:16, // New artifact containing the GC job result. +// execution_id: 15, +// type:INTERNAL_OUTPUT, +// path:{step:[{"key":"gc_result"}]} +// } +// +// * The PENDING_OUTPUT event is used to indicate that an artifact is +// tentatively associated with an active execution which has not yet been +// finalized. For example, an orchestration system can register output +// artifacts of a running execution with PENDING_OUTPUT events to indicate +// the output artifacts the execution is expected to produce. When the +// execution is finished, the final set of output artifacts can be associated +// with the exeution using OUTPUT events, and any unused artifacts which were +// previously registered with PENDING_OUTPUT events can be updated to set +// their Artifact.State to ABANDONED. +// +// Events are unique of the same +// (artifact_id, execution_id, type) combination within a metadata store. type Event struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1126,6 +1295,8 @@ type Event struct { // Time the event occurred // Epoch is Jan 1, 1970, UTC MillisecondsSinceEpoch *int64 `protobuf:"varint,5,opt,name=milliseconds_since_epoch,json=millisecondsSinceEpoch" json:"milliseconds_since_epoch,omitempty"` + // Output only. + SystemMetadata *anypb.Any `protobuf:"bytes,6,opt,name=system_metadata,json=systemMetadata" json:"system_metadata,omitempty"` } func (x *Event) Reset() { @@ -1195,12 +1366,19 @@ func (x *Event) GetMillisecondsSinceEpoch() int64 { return 0 } +func (x *Event) GetSystemMetadata() *anypb.Any { + if x != nil { + return x.SystemMetadata + } + return nil +} + type Execution struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Output only. The globally unique server generated id of the execution. + // Output only. The unique server generated id of the execution. Id *int64 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` // The client provided name of the execution. This field is optional. If set, // it must be unique among all the executions of the same execution type @@ -1212,6 +1390,9 @@ type Execution struct { TypeId *int64 `protobuf:"varint,2,opt,name=type_id,json=typeId" json:"type_id,omitempty"` // Output only. The name of an ExecutionType. Type *string `protobuf:"bytes,7,opt,name=type" json:"type,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all executions within a database instance. + ExternalId *string `protobuf:"bytes,10,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // The last known state of an execution in the system. LastKnownState *Execution_State `protobuf:"varint,3,opt,name=last_known_state,json=lastKnownState,enum=ml_metadata.Execution_State" json:"last_known_state,omitempty"` // Properties of the Execution. @@ -1223,6 +1404,8 @@ type Execution struct { CreateTimeSinceEpoch *int64 `protobuf:"varint,8,opt,name=create_time_since_epoch,json=createTimeSinceEpoch" json:"create_time_since_epoch,omitempty"` // Output only. Last update time of the execution in millisecond since epoch. LastUpdateTimeSinceEpoch *int64 `protobuf:"varint,9,opt,name=last_update_time_since_epoch,json=lastUpdateTimeSinceEpoch" json:"last_update_time_since_epoch,omitempty"` + // Output only. + SystemMetadata *anypb.Any `protobuf:"bytes,11,opt,name=system_metadata,json=systemMetadata" json:"system_metadata,omitempty"` } func (x *Execution) Reset() { @@ -1285,6 +1468,13 @@ func (x *Execution) GetType() string { return "" } +func (x *Execution) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *Execution) GetLastKnownState() Execution_State { if x != nil && x.LastKnownState != nil { return *x.LastKnownState @@ -1320,6 +1510,13 @@ func (x *Execution) GetLastUpdateTimeSinceEpoch() int64 { return 0 } +func (x *Execution) GetSystemMetadata() *anypb.Any { + if x != nil { + return x.SystemMetadata + } + return nil +} + type ExecutionType struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1334,6 +1531,10 @@ type ExecutionType struct { Version *string `protobuf:"bytes,6,opt,name=version" json:"version,omitempty"` // An optional description about the type. Description *string `protobuf:"bytes,7,opt,name=description" json:"description,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all execution types within a database + // instance. + ExternalId *string `protobuf:"bytes,9,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // The schema of the type. // Properties are always optional in the execution. Properties map[string]PropertyType `protobuf:"bytes,3,rep,name=properties" json:"properties,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"varint,2,opt,name=value,enum=ml_metadata.PropertyType"` @@ -1424,6 +1625,13 @@ func (x *ExecutionType) GetDescription() string { return "" } +func (x *ExecutionType) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *ExecutionType) GetProperties() map[string]PropertyType { if x != nil { return x.Properties @@ -1466,6 +1674,10 @@ type ContextType struct { Version *string `protobuf:"bytes,4,opt,name=version" json:"version,omitempty"` // An optional description about the type. Description *string `protobuf:"bytes,5,opt,name=description" json:"description,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all context types within a database + // instance. + ExternalId *string `protobuf:"bytes,7,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // The schema of the type, e.g., name: string, owner: string // Properties are always optional in the context. // Properties of an context type can be expanded but not contracted (i.e., @@ -1538,6 +1750,13 @@ func (x *ContextType) GetDescription() string { return "" } +func (x *ContextType) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *ContextType) GetProperties() map[string]PropertyType { if x != nil { return x.Properties @@ -1557,7 +1776,7 @@ type Context struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Output Only. The globally unique server generated id of the context. + // Output Only. The unique server generated id of the context. Id *int64 `protobuf:"varint,1,opt,name=id" json:"id,omitempty"` // The client provided name of the context. It must be unique within a // database instance. @@ -1567,6 +1786,9 @@ type Context struct { TypeId *int64 `protobuf:"varint,2,opt,name=type_id,json=typeId" json:"type_id,omitempty"` // Output only. The name of a ContextType. Type *string `protobuf:"bytes,6,opt,name=type" json:"type,omitempty"` + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all contexts within a virtual database. + ExternalId *string `protobuf:"bytes,9,opt,name=external_id,json=externalId" json:"external_id,omitempty"` // Values of the properties, which must be specified in the ContextType. Properties map[string]*Value `protobuf:"bytes,4,rep,name=properties" json:"properties,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` // User provided custom properties which are not defined by its type. @@ -1575,6 +1797,8 @@ type Context struct { CreateTimeSinceEpoch *int64 `protobuf:"varint,7,opt,name=create_time_since_epoch,json=createTimeSinceEpoch" json:"create_time_since_epoch,omitempty"` // Output only. Last update time of the context in millisecond since epoch. LastUpdateTimeSinceEpoch *int64 `protobuf:"varint,8,opt,name=last_update_time_since_epoch,json=lastUpdateTimeSinceEpoch" json:"last_update_time_since_epoch,omitempty"` + // Output only system metadata. + SystemMetadata *anypb.Any `protobuf:"bytes,10,opt,name=system_metadata,json=systemMetadata" json:"system_metadata,omitempty"` } func (x *Context) Reset() { @@ -1637,6 +1861,13 @@ func (x *Context) GetType() string { return "" } +func (x *Context) GetExternalId() string { + if x != nil && x.ExternalId != nil { + return *x.ExternalId + } + return "" +} + func (x *Context) GetProperties() map[string]*Value { if x != nil { return x.Properties @@ -1665,6 +1896,13 @@ func (x *Context) GetLastUpdateTimeSinceEpoch() int64 { return 0 } +func (x *Context) GetSystemMetadata() *anypb.Any { + if x != nil { + return x.SystemMetadata + } + return nil +} + // the Attribution edges between Context and Artifact instances. type Attribution struct { state protoimpl.MessageState @@ -2564,7 +2802,7 @@ type MySQLDatabaseConfig struct { Host *string `protobuf:"bytes,1,opt,name=host" json:"host,omitempty"` // The TCP Port number that the MYSQL server accepts connections on. // If unspecified, the default MYSQL port (3306) is used. - Port *uint32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` + Port *int64 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` // The database to connect to. Must be specified. // After connecting to the MYSQL server, this database is created if not // already present unless skip_db_creation is set. @@ -2627,7 +2865,7 @@ func (x *MySQLDatabaseConfig) GetHost() string { return "" } -func (x *MySQLDatabaseConfig) GetPort() uint32 { +func (x *MySQLDatabaseConfig) GetPort() int64 { if x != nil && x.Port != nil { return *x.Port } @@ -2743,6 +2981,133 @@ func (x *SqliteMetadataSourceConfig) GetConnectionMode() SqliteMetadataSourceCon return SqliteMetadataSourceConfig_UNKNOWN } +// A config contains the parameters when using with PostgreSQLMetadatSource. +// Next index: 10 +type PostgreSQLDatabaseConfig struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Name of host to connect to. If the host name starts with /, it is taken as + // a Unix-domain socket in the abstract namespace. + Host *string `protobuf:"bytes,1,opt,name=host" json:"host,omitempty"` + // Numeric IP address of host to connect to. If this field is provided, `host` + // field is ignored. + Hostaddr *string `protobuf:"bytes,2,opt,name=hostaddr" json:"hostaddr,omitempty"` + // Port number to connect to at the server host, or socket file name extension + // for Unix-domain connections. + Port *string `protobuf:"bytes,3,opt,name=port" json:"port,omitempty"` + // PostgreSQL user name to connect as. Defaults to be the same as the + // operating system name of the user running the application. + User *string `protobuf:"bytes,4,opt,name=user" json:"user,omitempty"` + // Password to be used if the server demands password authentication. + Password *string `protobuf:"bytes,5,opt,name=password" json:"password,omitempty"` + // Specifies the name of the file used to store passwords. + Passfile *string `protobuf:"bytes,6,opt,name=passfile" json:"passfile,omitempty"` + // The database name. Defaults to be the same as the user name. + Dbname *string `protobuf:"bytes,7,opt,name=dbname" json:"dbname,omitempty"` + // A config to skip the database creation if not exist when connecting the + // db instance. It is useful when the db creation is handled by an admin + // process, while the lib user should not issue db creation clauses. + SkipDbCreation *bool `protobuf:"varint,8,opt,name=skip_db_creation,json=skipDbCreation" json:"skip_db_creation,omitempty"` + Ssloption *PostgreSQLDatabaseConfig_SSLOptions `protobuf:"bytes,9,opt,name=ssloption" json:"ssloption,omitempty"` +} + +func (x *PostgreSQLDatabaseConfig) Reset() { + *x = PostgreSQLDatabaseConfig{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PostgreSQLDatabaseConfig) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PostgreSQLDatabaseConfig) ProtoMessage() {} + +func (x *PostgreSQLDatabaseConfig) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[24] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PostgreSQLDatabaseConfig.ProtoReflect.Descriptor instead. +func (*PostgreSQLDatabaseConfig) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{24} +} + +func (x *PostgreSQLDatabaseConfig) GetHost() string { + if x != nil && x.Host != nil { + return *x.Host + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetHostaddr() string { + if x != nil && x.Hostaddr != nil { + return *x.Hostaddr + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetPort() string { + if x != nil && x.Port != nil { + return *x.Port + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetUser() string { + if x != nil && x.User != nil { + return *x.User + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetPassword() string { + if x != nil && x.Password != nil { + return *x.Password + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetPassfile() string { + if x != nil && x.Passfile != nil { + return *x.Passfile + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetDbname() string { + if x != nil && x.Dbname != nil { + return *x.Dbname + } + return "" +} + +func (x *PostgreSQLDatabaseConfig) GetSkipDbCreation() bool { + if x != nil && x.SkipDbCreation != nil { + return *x.SkipDbCreation + } + return false +} + +func (x *PostgreSQLDatabaseConfig) GetSsloption() *PostgreSQLDatabaseConfig_SSLOptions { + if x != nil { + return x.Ssloption + } + return nil +} + type MigrationOptions struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -2778,7 +3143,7 @@ const ( func (x *MigrationOptions) Reset() { *x = MigrationOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[24] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[25] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2791,7 +3156,7 @@ func (x *MigrationOptions) String() string { func (*MigrationOptions) ProtoMessage() {} func (x *MigrationOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[24] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[25] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2804,7 +3169,7 @@ func (x *MigrationOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use MigrationOptions.ProtoReflect.Descriptor instead. func (*MigrationOptions) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{24} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{25} } func (x *MigrationOptions) GetEnableUpgradeMigration() bool { @@ -2833,7 +3198,7 @@ type RetryOptions struct { func (x *RetryOptions) Reset() { *x = RetryOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[25] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[26] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2846,7 +3211,7 @@ func (x *RetryOptions) String() string { func (*RetryOptions) ProtoMessage() {} func (x *RetryOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[25] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[26] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2859,7 +3224,7 @@ func (x *RetryOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use RetryOptions.ProtoReflect.Descriptor instead. func (*RetryOptions) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{25} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{26} } func (x *RetryOptions) GetMaxNumRetries() int64 { @@ -2880,6 +3245,7 @@ type ConnectionConfig struct { // *ConnectionConfig_FakeDatabase // *ConnectionConfig_Mysql // *ConnectionConfig_Sqlite + // *ConnectionConfig_Postgresql Config isConnectionConfig_Config `protobuf_oneof:"config"` // Options for overwriting the default retry setting when MLMD transactions // returning Aborted error. @@ -2891,7 +3257,7 @@ type ConnectionConfig struct { func (x *ConnectionConfig) Reset() { *x = ConnectionConfig{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[26] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[27] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2904,7 +3270,7 @@ func (x *ConnectionConfig) String() string { func (*ConnectionConfig) ProtoMessage() {} func (x *ConnectionConfig) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[26] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[27] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2917,7 +3283,7 @@ func (x *ConnectionConfig) ProtoReflect() protoreflect.Message { // Deprecated: Use ConnectionConfig.ProtoReflect.Descriptor instead. func (*ConnectionConfig) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{26} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{27} } func (m *ConnectionConfig) GetConfig() isConnectionConfig_Config { @@ -2948,6 +3314,13 @@ func (x *ConnectionConfig) GetSqlite() *SqliteMetadataSourceConfig { return nil } +func (x *ConnectionConfig) GetPostgresql() *PostgreSQLDatabaseConfig { + if x, ok := x.GetConfig().(*ConnectionConfig_Postgresql); ok { + return x.Postgresql + } + return nil +} + func (x *ConnectionConfig) GetRetryOptions() *RetryOptions { if x != nil { return x.RetryOptions @@ -2971,12 +3344,19 @@ type ConnectionConfig_Sqlite struct { Sqlite *SqliteMetadataSourceConfig `protobuf:"bytes,3,opt,name=sqlite,oneof"` } +type ConnectionConfig_Postgresql struct { + // PostgreSQL database connection config. + Postgresql *PostgreSQLDatabaseConfig `protobuf:"bytes,5,opt,name=postgresql,oneof"` +} + func (*ConnectionConfig_FakeDatabase) isConnectionConfig_Config() {} func (*ConnectionConfig_Mysql) isConnectionConfig_Config() {} func (*ConnectionConfig_Sqlite) isConnectionConfig_Config() {} +func (*ConnectionConfig_Postgresql) isConnectionConfig_Config() {} + // A list of supported GRPC arguments defined in: // https://grpc.github.io/grpc/core/group__grpc__arg__keys.html type GrpcChannelArguments struct { @@ -2994,7 +3374,7 @@ type GrpcChannelArguments struct { func (x *GrpcChannelArguments) Reset() { *x = GrpcChannelArguments{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[27] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[28] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3007,7 +3387,7 @@ func (x *GrpcChannelArguments) String() string { func (*GrpcChannelArguments) ProtoMessage() {} func (x *GrpcChannelArguments) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[27] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[28] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3020,7 +3400,7 @@ func (x *GrpcChannelArguments) ProtoReflect() protoreflect.Message { // Deprecated: Use GrpcChannelArguments.ProtoReflect.Descriptor instead. func (*GrpcChannelArguments) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{27} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{28} } func (x *GrpcChannelArguments) GetMaxReceiveMessageLength() int64 { @@ -3047,7 +3427,7 @@ type MetadataStoreClientConfig struct { Host *string `protobuf:"bytes,1,opt,name=host" json:"host,omitempty"` // The TCP Port number that the gRPC server accepts connections on. // Must be specified. - Port *uint32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` + Port *int64 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` // Configuration for a secure gRPC channel. // If not given, insecure connection is used. SslConfig *MetadataStoreClientConfig_SSLConfig `protobuf:"bytes,3,opt,name=ssl_config,json=sslConfig" json:"ssl_config,omitempty"` @@ -3063,7 +3443,7 @@ type MetadataStoreClientConfig struct { func (x *MetadataStoreClientConfig) Reset() { *x = MetadataStoreClientConfig{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[28] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[29] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3076,7 +3456,7 @@ func (x *MetadataStoreClientConfig) String() string { func (*MetadataStoreClientConfig) ProtoMessage() {} func (x *MetadataStoreClientConfig) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[28] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[29] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3089,7 +3469,7 @@ func (x *MetadataStoreClientConfig) ProtoReflect() protoreflect.Message { // Deprecated: Use MetadataStoreClientConfig.ProtoReflect.Descriptor instead. func (*MetadataStoreClientConfig) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{28} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{29} } func (x *MetadataStoreClientConfig) GetHost() string { @@ -3099,7 +3479,7 @@ func (x *MetadataStoreClientConfig) GetHost() string { return "" } -func (x *MetadataStoreClientConfig) GetPort() uint32 { +func (x *MetadataStoreClientConfig) GetPort() int64 { if x != nil && x.Port != nil { return *x.Port } @@ -3145,7 +3525,7 @@ type MetadataStoreServerConfig struct { func (x *MetadataStoreServerConfig) Reset() { *x = MetadataStoreServerConfig{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[29] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[30] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3158,7 +3538,7 @@ func (x *MetadataStoreServerConfig) String() string { func (*MetadataStoreServerConfig) ProtoMessage() {} func (x *MetadataStoreServerConfig) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[29] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[30] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3171,7 +3551,7 @@ func (x *MetadataStoreServerConfig) ProtoReflect() protoreflect.Message { // Deprecated: Use MetadataStoreServerConfig.ProtoReflect.Descriptor instead. func (*MetadataStoreServerConfig) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{29} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{30} } func (x *MetadataStoreServerConfig) GetConnectionConfig() *ConnectionConfig { @@ -3212,12 +3592,14 @@ type ListOperationOptions struct { // Identifies the next page of results. NextPageToken *string `protobuf:"bytes,3,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` // A boolean expression in SQL syntax that is used to specify the conditions - // on nodes' attributes and 1-hop neighborhood. + // on node attributes and directly connected assets. // // In the current implementation, filtering Artifact/Execution/Context with // the following attributes and neighborhood is supported: // - // Attributes: id:int64, type_id:int64, type:string, uri:string, name: string, + // Attributes: + // id:int64, type_id:int64, type:string, + // uri:string, name: string, external_id: string, // create_time_since_epoch:int64, last_update_time_since_epoch:int64 // state:ENUM (Artifact only) last_known_state:ENUM (Execution only) // @@ -3227,6 +3609,7 @@ type ListOperationOptions struct { // custom_properties.$name ($name is the custom property name) // attributes: the following attributes can be used // int_value: int64, double_value: double, string_value: string + // bool_value: bool // // - Context (for Artifact and Execution): // syntax: contexts_$alias ($alias can be [0-9A-Za-z_]) @@ -3254,6 +3637,7 @@ type ListOperationOptions struct { // - type = 'my_type_name' // - name = 'foo' // - type = 'bar' AND name LIKE 'foo%' + // - external_id = 'my_external_id' // - NOT(create_time_since_epoch < 1 OR last_update_time_since_epoch < 1) // // b) to filter artifacts' uri @@ -3264,10 +3648,12 @@ type ListOperationOptions struct { // c) to filter artifact's state or execution's last_known_state // - state = LIVE // - state IS NULL + // - state IN (PENDING, LIVE) // - last_known_state = RUNNING // - last_known_state != RUNNING + // - last_known_state NOT IN (FAILED, CANCELED) // - // d) to filter nodes having a specific context + // d) to filter nodes having a specific context, artifact, or execution // - contexts_a.id = 5 // - contexts_a.type = 'RunContext' // - contexts_a.name = 'my_run' @@ -3275,6 +3661,29 @@ type ListOperationOptions struct { // - contexts_a.last_update_time_since_epoch = 1626761453 // To filter nodes with conditions on multiple contexts: // - contexts_a.name = 'my_run' AND contexts_b.name = 'my_pipeline' + // To filter context with artifacts: + // - artifacts_a.id = 5 + // - artifacts_a.type = 'Dataset' + // - artifacts_a.name = 'my_dataset' + // - artifacts_a.uri = 'exact_path_string' + // - artifacts_a.state = LIVE + // - artifacts_a.state IN (PENDING, LIVE) + // - artifacts_a.external_id = "my_external_id" + // - artifacts_a.create_time_since_epoch = 1626761453 + // - artifacts_a.last_update_time_since_epoch = 1626761453 + // To filter contexts with conditions on multiple artifacts: + // - artifacts_a.name = 'my_run' AND artifacts_b.name = 'my_pipeline' + // To filter context with executions: + // - executions_a.id = 5 + // - executions_a.type = 'Dataset' + // - executions_a.name = 'my_dataset' + // - executions_a.last_known_state = RUNNING + //. - executions_a.last_known_state IN (NEW, RUNNING) + // - executions_a.external_id = "my_external_id" + // - executions_a.create_time_since_epoch = 1626761453 + // - executions_a.last_update_time_since_epoch = 1626761453 + // To filter contexts with conditions on multiple executions: + // - executions_a.name = 'my_run' AND executions_b.name = 'my_pipeline' // // e) to filter nodes condition on their properties // - properties.accuracy.double_value > 0.95 @@ -3283,7 +3692,7 @@ type ListOperationOptions struct { // other than [0-9A-Za-z_], then the name need to be backquoted, // e.g., // - properties.`my property`.int_value > 0 - // - custom_properties.`my:custom.property`.string_value = 'foo' + // - custom_properties.`my:custom.property`.bool_value = true // // f) complex query to filter both node attributes and neighborhood // - type = 'DataSet' AND @@ -3301,7 +3710,7 @@ type ListOperationOptions struct { // - events_0.milliseconds_since_epoch = 1 // to filter Executions on Events // - events_0.artifact_id = 1 - // - events_0.type = INPUT + // - events_0.type IN (INPUT, INTERNAL_INPUT) // - events_0.milliseconds_since_epoch = 1 // TODO(b/145945460) Support filtering on event step fields. FilterQuery *string `protobuf:"bytes,4,opt,name=filter_query,json=filterQuery" json:"filter_query,omitempty"` @@ -3315,7 +3724,7 @@ const ( func (x *ListOperationOptions) Reset() { *x = ListOperationOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[30] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[31] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3328,7 +3737,7 @@ func (x *ListOperationOptions) String() string { func (*ListOperationOptions) ProtoMessage() {} func (x *ListOperationOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[30] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[31] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3341,7 +3750,7 @@ func (x *ListOperationOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use ListOperationOptions.ProtoReflect.Descriptor instead. func (*ListOperationOptions) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{30} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{31} } func (x *ListOperationOptions) GetMaxResultSize() int32 { @@ -3403,7 +3812,7 @@ type ListOperationNextPageToken struct { func (x *ListOperationNextPageToken) Reset() { *x = ListOperationNextPageToken{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[31] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[32] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3416,7 +3825,7 @@ func (x *ListOperationNextPageToken) String() string { func (*ListOperationNextPageToken) ProtoMessage() {} func (x *ListOperationNextPageToken) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[31] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[32] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3429,7 +3838,7 @@ func (x *ListOperationNextPageToken) ProtoReflect() protoreflect.Message { // Deprecated: Use ListOperationNextPageToken.ProtoReflect.Descriptor instead. func (*ListOperationNextPageToken) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{31} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{32} } func (x *ListOperationNextPageToken) GetIdOffset() int64 { @@ -3467,12 +3876,15 @@ type TransactionOptions struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields extensionFields protoimpl.ExtensionFields + + // Transaction tag for debug use only. + Tag *string `protobuf:"bytes,1,opt,name=tag" json:"tag,omitempty"` } func (x *TransactionOptions) Reset() { *x = TransactionOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[32] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[33] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3485,7 +3897,7 @@ func (x *TransactionOptions) String() string { func (*TransactionOptions) ProtoMessage() {} func (x *TransactionOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[32] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[33] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3498,7 +3910,7 @@ func (x *TransactionOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use TransactionOptions.ProtoReflect.Descriptor instead. func (*TransactionOptions) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{32} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{33} } var extRange_TransactionOptions = []protoiface.ExtensionRangeV1{ @@ -3510,29 +3922,56 @@ func (*TransactionOptions) ExtensionRangeArray() []protoiface.ExtensionRangeV1 { return extRange_TransactionOptions } -// The query options for list lineage graph operation. It allows specifying the -// `query_nodes` of interests and the `stop_conditions` when querying a -// lineage graph. The query option is used for exporting provenance information -// from a source MLMD instance. +func (x *TransactionOptions) GetTag() string { + if x != nil && x.Tag != nil { + return *x.Tag + } + return "" +} + +// TODO(b/283852485): Deprecate GetLineageGraph API after migration to +// GetLineageSubgraph API. +// The query options for `get_lineage_graph` operation. +// `query_nodes` is a list of nodes of interest. +// Currently only artifacts are supported as `query_nodes`. +// `stop_conditions` defines the filtering rules when querying a lineage graph. +// `max_node_size` defines the total number of artifacts and executions returned +// in the subgraph. type LineageGraphQueryOptions struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // A query to specify the nodes of interests. + // A query to specify the nodes of interest. + // `ListOperationOptions.max_result_size` sets the maximum number of nodes to + // begin with the graph search. // TODO(b/178491112) Support query_nodes for Executions. // // Types that are assignable to QueryNodes: // *LineageGraphQueryOptions_ArtifactsOptions QueryNodes isLineageGraphQueryOptions_QueryNodes `protobuf_oneof:"query_nodes"` - // A constraint option to define the boundary of the returned subgraph. + // A constraint option to define the filtering rules when querying a lineage + // graph. StopConditions *LineageGraphQueryOptions_BoundaryConstraint `protobuf:"bytes,2,opt,name=stop_conditions,json=stopConditions" json:"stop_conditions,omitempty"` + // Maximum total number of artifacts and executions in the whole returned + // lineage graph. + // If set to 0 or below, all related nodes will be returned without any + // number limitation. + // The number counts toward Artifacts and Executions. Nothing else considered. + // + // NOTE: There is no pagination supported. + MaxNodeSize *int64 `protobuf:"varint,3,opt,name=max_node_size,json=maxNodeSize,def=20" json:"max_node_size,omitempty"` } -func (x *LineageGraphQueryOptions) Reset() { - *x = LineageGraphQueryOptions{} +// Default values for LineageGraphQueryOptions fields. +const ( + Default_LineageGraphQueryOptions_MaxNodeSize = int64(20) +) + +func (x *LineageGraphQueryOptions) Reset() { + *x = LineageGraphQueryOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[33] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[34] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3545,7 +3984,7 @@ func (x *LineageGraphQueryOptions) String() string { func (*LineageGraphQueryOptions) ProtoMessage() {} func (x *LineageGraphQueryOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[33] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[34] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3558,7 +3997,7 @@ func (x *LineageGraphQueryOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use LineageGraphQueryOptions.ProtoReflect.Descriptor instead. func (*LineageGraphQueryOptions) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{33} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{34} } func (m *LineageGraphQueryOptions) GetQueryNodes() isLineageGraphQueryOptions_QueryNodes { @@ -3582,6 +4021,13 @@ func (x *LineageGraphQueryOptions) GetStopConditions() *LineageGraphQueryOptions return nil } +func (x *LineageGraphQueryOptions) GetMaxNodeSize() int64 { + if x != nil && x.MaxNodeSize != nil { + return *x.MaxNodeSize + } + return Default_LineageGraphQueryOptions_MaxNodeSize +} + type isLineageGraphQueryOptions_QueryNodes interface { isLineageGraphQueryOptions_QueryNodes() } @@ -3592,6 +4038,120 @@ type LineageGraphQueryOptions_ArtifactsOptions struct { func (*LineageGraphQueryOptions_ArtifactsOptions) isLineageGraphQueryOptions_QueryNodes() {} +// The query options for lineage graph tracing from a list of interested nodes. +type LineageSubgraphQueryOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to StartingNodes: + // *LineageSubgraphQueryOptions_StartingArtifacts + // *LineageSubgraphQueryOptions_StartingExecutions + StartingNodes isLineageSubgraphQueryOptions_StartingNodes `protobuf_oneof:"starting_nodes"` + // The maximum number of hops from the `starting_nodes` to traverse. + // A hop is defined as a jump to the next node following the path of + // node -> event -> next_node. + // For example, in the lineage graph a_1 -> e_1 -> a_2: + // a_2 is 2 hops away from a_1, and e_1 is 1 hop away from a_1. + // `max_num_hops` should be non-negative. + // When its value is set to 0, only the `starting_nodes` are returned. + MaxNumHops *int64 `protobuf:"varint,3,opt,name=max_num_hops,json=maxNumHops" json:"max_num_hops,omitempty"` + // The direction of lineage graph tracing, which means the direction of all + // hops in the tracing. + // An UPSTREAM hop means an expansion following the path of + // execution -> output_event -> artifact or + // artifact -> input_event -> execution + // A DOWNSTREAM hop means an expansion following the path of + // execution -> input_event -> artifact or + // artifact -> output_event -> execution + // Please refer to `Direction` for more details. + Direction *LineageSubgraphQueryOptions_Direction `protobuf:"varint,4,opt,name=direction,enum=ml_metadata.LineageSubgraphQueryOptions_Direction" json:"direction,omitempty"` +} + +func (x *LineageSubgraphQueryOptions) Reset() { + *x = LineageSubgraphQueryOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[35] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LineageSubgraphQueryOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LineageSubgraphQueryOptions) ProtoMessage() {} + +func (x *LineageSubgraphQueryOptions) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[35] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LineageSubgraphQueryOptions.ProtoReflect.Descriptor instead. +func (*LineageSubgraphQueryOptions) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{35} +} + +func (m *LineageSubgraphQueryOptions) GetStartingNodes() isLineageSubgraphQueryOptions_StartingNodes { + if m != nil { + return m.StartingNodes + } + return nil +} + +func (x *LineageSubgraphQueryOptions) GetStartingArtifacts() *LineageSubgraphQueryOptions_StartingNodes { + if x, ok := x.GetStartingNodes().(*LineageSubgraphQueryOptions_StartingArtifacts); ok { + return x.StartingArtifacts + } + return nil +} + +func (x *LineageSubgraphQueryOptions) GetStartingExecutions() *LineageSubgraphQueryOptions_StartingNodes { + if x, ok := x.GetStartingNodes().(*LineageSubgraphQueryOptions_StartingExecutions); ok { + return x.StartingExecutions + } + return nil +} + +func (x *LineageSubgraphQueryOptions) GetMaxNumHops() int64 { + if x != nil && x.MaxNumHops != nil { + return *x.MaxNumHops + } + return 0 +} + +func (x *LineageSubgraphQueryOptions) GetDirection() LineageSubgraphQueryOptions_Direction { + if x != nil && x.Direction != nil { + return *x.Direction + } + return LineageSubgraphQueryOptions_DIRECTION_UNSPECIFIED +} + +type isLineageSubgraphQueryOptions_StartingNodes interface { + isLineageSubgraphQueryOptions_StartingNodes() +} + +type LineageSubgraphQueryOptions_StartingArtifacts struct { + StartingArtifacts *LineageSubgraphQueryOptions_StartingNodes `protobuf:"bytes,1,opt,name=starting_artifacts,json=startingArtifacts,oneof"` +} + +type LineageSubgraphQueryOptions_StartingExecutions struct { + StartingExecutions *LineageSubgraphQueryOptions_StartingNodes `protobuf:"bytes,2,opt,name=starting_executions,json=startingExecutions,oneof"` +} + +func (*LineageSubgraphQueryOptions_StartingArtifacts) isLineageSubgraphQueryOptions_StartingNodes() {} + +func (*LineageSubgraphQueryOptions_StartingExecutions) isLineageSubgraphQueryOptions_StartingNodes() { +} + // A simple path (e.g. {step{key:"foo"}}) can name an artifact in the context // of an execution. type Event_Path struct { @@ -3607,7 +4167,7 @@ type Event_Path struct { func (x *Event_Path) Reset() { *x = Event_Path{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[37] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[39] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3620,7 +4180,7 @@ func (x *Event_Path) String() string { func (*Event_Path) ProtoMessage() {} func (x *Event_Path) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[37] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[39] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3657,7 +4217,7 @@ type Event_Path_Step struct { func (x *Event_Path_Step) Reset() { *x = Event_Path_Step{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[38] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[40] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3670,7 +4230,7 @@ func (x *Event_Path_Step) String() string { func (*Event_Path_Step) ProtoMessage() {} func (x *Event_Path_Step) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[38] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[40] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3747,7 +4307,7 @@ type MySQLDatabaseConfig_SSLOptions struct { func (x *MySQLDatabaseConfig_SSLOptions) Reset() { *x = MySQLDatabaseConfig_SSLOptions{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[46] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[48] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3760,7 +4320,7 @@ func (x *MySQLDatabaseConfig_SSLOptions) String() string { func (*MySQLDatabaseConfig_SSLOptions) ProtoMessage() {} func (x *MySQLDatabaseConfig_SSLOptions) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[46] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[48] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3818,6 +4378,102 @@ func (x *MySQLDatabaseConfig_SSLOptions) GetVerifyServerCert() bool { return false } +type PostgreSQLDatabaseConfig_SSLOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // disable, allow, verify-ca, verify-full, etc. Reference: + // https://www.postgresql.org/docs/current/libpq-connect.html + Sslmode *string `protobuf:"bytes,1,opt,name=sslmode" json:"sslmode,omitempty"` + // This parameter specifies the file name of the client SSL certificate, + // replacing the default ~/.postgresql/postgresql.crt. This parameter is + // ignored if an SSL connection is not made. + Sslcert *string `protobuf:"bytes,2,opt,name=sslcert" json:"sslcert,omitempty"` + // This parameter specifies the location for the secret key used for the + // client certificate. It can either specify a file name that will be used + // instead of the default ~/.postgresql/postgresql.key, this parameter is + // ignored if an SSL connection is not made. + Sslkey *string `protobuf:"bytes,3,opt,name=sslkey" json:"sslkey,omitempty"` + // This parameter specifies the password for the secret key specified in + // sslkey, allowing client certificate private keys to be stored in + // encrypted form on disk even when interactive passphrase input is not + // practical. + Sslpassword *string `protobuf:"bytes,4,opt,name=sslpassword" json:"sslpassword,omitempty"` + // This parameter specifies the name of a file containing SSL certificate + // authority (CA) certificate(s). If the file exists, the server's + // certificate will be verified to be signed by one of these authorities. + // The default is ~/.postgresql/root.crt. + Sslrootcert *string `protobuf:"bytes,5,opt,name=sslrootcert" json:"sslrootcert,omitempty"` +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) Reset() { + *x = PostgreSQLDatabaseConfig_SSLOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[49] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PostgreSQLDatabaseConfig_SSLOptions) ProtoMessage() {} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[49] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PostgreSQLDatabaseConfig_SSLOptions.ProtoReflect.Descriptor instead. +func (*PostgreSQLDatabaseConfig_SSLOptions) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{24, 0} +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) GetSslmode() string { + if x != nil && x.Sslmode != nil { + return *x.Sslmode + } + return "" +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) GetSslcert() string { + if x != nil && x.Sslcert != nil { + return *x.Sslcert + } + return "" +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) GetSslkey() string { + if x != nil && x.Sslkey != nil { + return *x.Sslkey + } + return "" +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) GetSslpassword() string { + if x != nil && x.Sslpassword != nil { + return *x.Sslpassword + } + return "" +} + +func (x *PostgreSQLDatabaseConfig_SSLOptions) GetSslrootcert() string { + if x != nil && x.Sslrootcert != nil { + return *x.Sslrootcert + } + return "" +} + type MetadataStoreClientConfig_SSLConfig struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3837,7 +4493,7 @@ type MetadataStoreClientConfig_SSLConfig struct { func (x *MetadataStoreClientConfig_SSLConfig) Reset() { *x = MetadataStoreClientConfig_SSLConfig{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[47] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[50] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3850,7 +4506,7 @@ func (x *MetadataStoreClientConfig_SSLConfig) String() string { func (*MetadataStoreClientConfig_SSLConfig) ProtoMessage() {} func (x *MetadataStoreClientConfig_SSLConfig) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[47] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[50] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3863,7 +4519,7 @@ func (x *MetadataStoreClientConfig_SSLConfig) ProtoReflect() protoreflect.Messag // Deprecated: Use MetadataStoreClientConfig_SSLConfig.ProtoReflect.Descriptor instead. func (*MetadataStoreClientConfig_SSLConfig) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{28, 0} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{29, 0} } func (x *MetadataStoreClientConfig_SSLConfig) GetClientKey() string { @@ -3905,7 +4561,7 @@ type MetadataStoreServerConfig_SSLConfig struct { func (x *MetadataStoreServerConfig_SSLConfig) Reset() { *x = MetadataStoreServerConfig_SSLConfig{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[48] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[51] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3918,7 +4574,7 @@ func (x *MetadataStoreServerConfig_SSLConfig) String() string { func (*MetadataStoreServerConfig_SSLConfig) ProtoMessage() {} func (x *MetadataStoreServerConfig_SSLConfig) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[48] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[51] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3931,7 +4587,7 @@ func (x *MetadataStoreServerConfig_SSLConfig) ProtoReflect() protoreflect.Messag // Deprecated: Use MetadataStoreServerConfig_SSLConfig.ProtoReflect.Descriptor instead. func (*MetadataStoreServerConfig_SSLConfig) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{29, 0} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{30, 0} } func (x *MetadataStoreServerConfig_SSLConfig) GetServerKey() string { @@ -3982,7 +4638,7 @@ const ( func (x *ListOperationOptions_OrderByField) Reset() { *x = ListOperationOptions_OrderByField{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[49] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[52] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -3995,7 +4651,7 @@ func (x *ListOperationOptions_OrderByField) String() string { func (*ListOperationOptions_OrderByField) ProtoMessage() {} func (x *ListOperationOptions_OrderByField) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[49] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[52] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4008,7 +4664,7 @@ func (x *ListOperationOptions_OrderByField) ProtoReflect() protoreflect.Message // Deprecated: Use ListOperationOptions_OrderByField.ProtoReflect.Descriptor instead. func (*ListOperationOptions_OrderByField) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{30, 0} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{31, 0} } func (x *ListOperationOptions_OrderByField) GetField() ListOperationOptions_OrderByField_Field { @@ -4025,35 +4681,109 @@ func (x *ListOperationOptions_OrderByField) GetIsAsc() bool { return Default_ListOperationOptions_OrderByField_IsAsc } -// Boundary conditions to stop the traversal when return the `subgraph`. +// Filtering conditions for retrieving the lineage graph. type LineageGraphQueryOptions_BoundaryConstraint struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The maximum number of hops from the `query_nodes` to traverse. It should - // be non-negative. When zero, only the `query_nodes` are returned. + // The maximum number of hops from the `query_nodes` to traverse. + // A hop is defined as a jump to the next node following the path of + // node -> event -> next_node. + // For example, in the lineage graph a_1 -> e_1 -> a_2: + // a_2 is 2 hops away from a_1, and e_1 is 1 hop away from a_1. + // `max_num_hops` should be non-negative. + // When its value is set to 0, only the `query_nodes` are returned. MaxNumHops *int64 `protobuf:"varint,1,opt,name=max_num_hops,json=maxNumHops" json:"max_num_hops,omitempty"` - // Conditions for the boundary node in the returned the subgraph. - // Please refer to ListOperationOptions.filter_query for the syntax. - // If set, it stops traversing further at the artifacts that do not satisfy - // `boundary_artifacts` and exclude them from the returned subgraph. - // For example, to look for the models related to a DataSet and ignore - // other artifacts derived from the DataSet: - // "type = 'DataSet' OR type = 'TransformGraph' OR type = 'Model'" + // Filtering conditions for retrieving the lineage graph. + // Please refer to `ListOperationOptions.filter_query` for the syntax. + // + // If set, the `boundary_artifacts` defines which artifacts to keep in the + // returned lineage graph during the graph search. + // Artifacts that do not satisfy the `boundary_artifacts` are filtered out, + // and the subgraphs starting at them will be pruned. + // If not set, no artifacts will be filtered out. + // Taking the following lineage graph as example: + // (`a` represents an Artifact, `e` represents an Execution, each arrow + // represents a hop.) + // a_0 a_1 a_3 + // | \ / \ + // \/ \/ \/ \/ + // e_0 e_1 e_3 + // / \ + // \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 + // To query all the upstream and downstream nodes 3 hops away from a_4, + // while excluding the upstream subgraph starting at a_3, then + // `stop_conditions` can be set as: + // { + // max_num_hops: 3 + // boundary_artifacts: 'id != 3' + // } + // With the `stop_conditions`, {a_3, e_1, a_1, a_0, e_0} will be filtered + // out. + // The returned lineage graph looks like: + // e_3 + // / \ + // \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 BoundaryArtifacts *string `protobuf:"bytes,2,opt,name=boundary_artifacts,json=boundaryArtifacts" json:"boundary_artifacts,omitempty"` - // If set, it stops traversing further at the executions that do not satisfy - // `boundary_executions` and exclude them from the returned subgraph. - // For example, two trainers may be connected due to the shared datasets, - // to exclude other trainers except a particular one (e.g., id = 5): - // "type != 'Trainer' OR (type = 'Trainer' AND id = 5)" + // If set, the `boundary_executions` defines which executions to keep in the + // returned lineage graph during the graph search. + // Executions that do not satisfy the `boundary_executions` are filtered out + // and the subgraphs starting at them will be pruned. + // If not set, no executions will be filtered out. + // In the example above, to query for all the upstream and downstream nodes + // 3 hops away from a_4, while excluding the upstream subgraph and the + // downstream subgraph starting at e_3, then `stop_conditions` can be set as + // { + // max_num_hops: 3 + // boundary_executions: 'id != 3' + // } + // With the `stop_conditions`, {e_3, a_5, a_3, e_1, a_1, a_0, e_0} will be + // filtered out. + // The returned lineage graph looks like: + // a_2 a_4 + // \ / + // \/ \/ + // e_2 + // However, for the following graph: + // a_0 a_1 a_3 + // | \ / \ + // \/ \/ \/ \/ + // e_0 e_1 e_3 + // \ / \ + // \/ \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 + // With the same `stop_conditions`, only {e_3, a_5, a_0, e_0} will be + // filtered out. + // The returned lineage graph looks like: + // a_1 a_3 + // \ / + // \/ \/ + // e_1 + // \ + // \/ + // a_2 a_4 + // \ / + // \/ \/ + // e_2 BoundaryExecutions *string `protobuf:"bytes,3,opt,name=boundary_executions,json=boundaryExecutions" json:"boundary_executions,omitempty"` } func (x *LineageGraphQueryOptions_BoundaryConstraint) Reset() { *x = LineageGraphQueryOptions_BoundaryConstraint{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[50] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[53] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -4066,7 +4796,7 @@ func (x *LineageGraphQueryOptions_BoundaryConstraint) String() string { func (*LineageGraphQueryOptions_BoundaryConstraint) ProtoMessage() {} func (x *LineageGraphQueryOptions_BoundaryConstraint) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[50] + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[53] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4079,7 +4809,7 @@ func (x *LineageGraphQueryOptions_BoundaryConstraint) ProtoReflect() protoreflec // Deprecated: Use LineageGraphQueryOptions_BoundaryConstraint.ProtoReflect.Descriptor instead. func (*LineageGraphQueryOptions_BoundaryConstraint) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{33, 0} + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{34, 0} } func (x *LineageGraphQueryOptions_BoundaryConstraint) GetMaxNumHops() int64 { @@ -4103,6 +4833,58 @@ func (x *LineageGraphQueryOptions_BoundaryConstraint) GetBoundaryExecutions() st return "" } +// `starting_nodes` is a list of nodes of interest to start graph tracing. +// NOTE: The maximum number of starting nodes is 100 at most. +type LineageSubgraphQueryOptions_StartingNodes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // `filter_query` is a boolean expression in SQL syntax that is used to + // specify the conditions on starting nodes. + // Please refer to ListOperationOptions.filter_query for more details. + FilterQuery *string `protobuf:"bytes,1,opt,name=filter_query,json=filterQuery" json:"filter_query,omitempty"` +} + +func (x *LineageSubgraphQueryOptions_StartingNodes) Reset() { + *x = LineageSubgraphQueryOptions_StartingNodes{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[54] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *LineageSubgraphQueryOptions_StartingNodes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*LineageSubgraphQueryOptions_StartingNodes) ProtoMessage() {} + +func (x *LineageSubgraphQueryOptions_StartingNodes) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_proto_msgTypes[54] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use LineageSubgraphQueryOptions_StartingNodes.ProtoReflect.Descriptor instead. +func (*LineageSubgraphQueryOptions_StartingNodes) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_proto_rawDescGZIP(), []int{35, 0} +} + +func (x *LineageSubgraphQueryOptions_StartingNodes) GetFilterQuery() string { + if x != nil && x.FilterQuery != nil { + return *x.FilterQuery + } + return "" +} + var file_ml_metadata_proto_metadata_store_proto_extTypes = []protoimpl.ExtensionInfo{ { ExtendedType: (*descriptorpb.EnumValueOptions)(nil), @@ -4128,626 +4910,736 @@ var file_ml_metadata_proto_metadata_store_proto_rawDesc = []byte{ 0x0a, 0x26, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0b, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x1c, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x1a, 0x20, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x32, 0x0a, 0x13, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x54, - 0x79, 0x70, 0x65, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1b, 0x0a, 0x09, - 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0xb7, 0x01, 0x0a, 0x05, 0x56, 0x61, - 0x6c, 0x75, 0x65, 0x12, 0x1d, 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x08, 0x69, 0x6e, 0x74, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00, 0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, - 0x6c, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, - 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, - 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x3c, 0x0a, 0x0c, - 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x00, 0x52, 0x0b, 0x73, - 0x74, 0x72, 0x75, 0x63, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x22, 0xb7, 0x05, 0x0a, 0x08, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x19, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x61, 0x6e, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x1a, 0x1c, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x20, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, + 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x22, 0x32, 0x0a, 0x13, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x45, 0x78, + 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, + 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x91, 0x02, 0x0a, 0x05, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1d, + 0x0a, 0x09, 0x69, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x03, 0x48, 0x00, 0x52, 0x08, 0x69, 0x6e, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, + 0x0c, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x01, 0x48, 0x00, 0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x56, 0x61, 0x6c, + 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, + 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x3c, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x48, 0x00, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x37, 0x0a, 0x0b, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x5f, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x41, 0x6e, 0x79, + 0x48, 0x00, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, + 0x0a, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x06, 0x20, 0x01, + 0x28, 0x08, 0x48, 0x00, 0x52, 0x09, 0x62, 0x6f, 0x6f, 0x6c, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, + 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0xb5, 0x06, 0x0a, 0x08, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, + 0x65, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, + 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x65, + 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x65, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x45, 0x0a, 0x0a, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x12, 0x58, 0x0a, 0x11, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, + 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x10, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x31, 0x0a, 0x05, 0x73, 0x74, + 0x61, 0x74, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x35, 0x0a, + 0x17, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, + 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x09, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, + 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, + 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3e, 0x0a, 0x1c, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, + 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, + 0x70, 0x6f, 0x63, 0x68, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, 0x52, 0x18, 0x6c, 0x61, 0x73, 0x74, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, + 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3d, 0x0a, 0x0f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x41, 0x6e, 0x79, 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x1a, 0x51, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x57, 0x0a, 0x15, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0x6f, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x50, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, + 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x4c, 0x49, 0x56, 0x45, 0x10, 0x02, 0x12, 0x17, 0x0a, 0x13, + 0x4d, 0x41, 0x52, 0x4b, 0x45, 0x44, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x44, 0x45, 0x4c, 0x45, 0x54, + 0x49, 0x4f, 0x4e, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x45, 0x44, + 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x41, 0x42, 0x41, 0x4e, 0x44, 0x4f, 0x4e, 0x45, 0x44, 0x10, + 0x05, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x46, 0x45, 0x52, 0x45, 0x4e, 0x43, 0x45, 0x10, 0x06, + 0x22, 0xd2, 0x04, 0x0a, 0x0c, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, + 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, + 0x49, 0x64, 0x12, 0x49, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x4c, 0x0a, + 0x09, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x2f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x53, 0x79, 0x73, 0x74, + 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, + 0x65, 0x52, 0x08, 0x62, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x1a, 0x58, 0x0a, 0x0f, 0x50, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, + 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, + 0x12, 0x2f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xcd, 0x01, 0x0a, 0x15, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x26, 0x0a, 0x05, 0x55, 0x4e, 0x53, 0x45, 0x54, 0x10, 0x00, 0x1a, 0x1b, 0xaa, 0xf1, 0xfd, 0xba, + 0x0b, 0x15, 0x0a, 0x13, 0x75, 0x6e, 0x73, 0x65, 0x74, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x12, 0x21, 0x0a, 0x07, 0x44, 0x41, 0x54, 0x41, 0x53, + 0x45, 0x54, 0x10, 0x01, 0x1a, 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, 0x6d, 0x6c, + 0x6d, 0x64, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x12, 0x1d, 0x0a, 0x05, 0x4d, 0x4f, + 0x44, 0x45, 0x4c, 0x10, 0x02, 0x1a, 0x12, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0c, 0x0a, 0x0a, 0x6d, + 0x6c, 0x6d, 0x64, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x21, 0x0a, 0x07, 0x4d, 0x45, 0x54, + 0x52, 0x49, 0x43, 0x53, 0x10, 0x03, 0x1a, 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, + 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x12, 0x27, 0x0a, 0x0a, + 0x53, 0x54, 0x41, 0x54, 0x49, 0x53, 0x54, 0x49, 0x43, 0x53, 0x10, 0x04, 0x1a, 0x17, 0xaa, 0xf1, + 0xfd, 0xba, 0x0b, 0x11, 0x0a, 0x0f, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x69, + 0x73, 0x74, 0x69, 0x63, 0x73, 0x22, 0xaa, 0x04, 0x0a, 0x05, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, + 0x1f, 0x0a, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, + 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x49, 0x64, 0x12, 0x2b, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x50, 0x61, 0x74, 0x68, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, + 0x12, 0x2b, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x17, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x38, 0x0a, + 0x18, 0x6d, 0x69, 0x6c, 0x6c, 0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x5f, 0x73, 0x69, + 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x16, 0x6d, 0x69, 0x6c, 0x6c, 0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x53, 0x69, 0x6e, + 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3d, 0x0a, 0x0f, 0x73, 0x79, 0x73, 0x74, 0x65, + 0x6d, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x14, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x41, 0x6e, 0x79, 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x77, 0x0a, 0x04, 0x50, 0x61, 0x74, 0x68, 0x12, 0x32, + 0x0a, 0x05, 0x73, 0x74, 0x65, 0x70, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, + 0x74, 0x2e, 0x50, 0x61, 0x74, 0x68, 0x2e, 0x53, 0x74, 0x65, 0x70, 0x52, 0x05, 0x73, 0x74, 0x65, + 0x70, 0x73, 0x1a, 0x3b, 0x0a, 0x04, 0x53, 0x74, 0x65, 0x70, 0x12, 0x16, 0x0a, 0x05, 0x69, 0x6e, + 0x64, 0x65, 0x78, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x05, 0x69, 0x6e, 0x64, + 0x65, 0x78, 0x12, 0x12, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, + 0x00, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, + 0x90, 0x01, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0f, 0x44, 0x45, 0x43, 0x4c, 0x41, 0x52, 0x45, + 0x44, 0x5f, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, 0x10, 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x44, 0x45, + 0x43, 0x4c, 0x41, 0x52, 0x45, 0x44, 0x5f, 0x49, 0x4e, 0x50, 0x55, 0x54, 0x10, 0x02, 0x12, 0x09, + 0x0a, 0x05, 0x49, 0x4e, 0x50, 0x55, 0x54, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x55, 0x54, + 0x50, 0x55, 0x54, 0x10, 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, + 0x4c, 0x5f, 0x49, 0x4e, 0x50, 0x55, 0x54, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x54, + 0x45, 0x52, 0x4e, 0x41, 0x4c, 0x5f, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, 0x10, 0x06, 0x12, 0x12, + 0x0a, 0x0e, 0x50, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, + 0x10, 0x07, 0x22, 0xaa, 0x06, 0x0a, 0x09, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, - 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, + 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x12, 0x12, 0x0a, - 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, - 0x65, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x75, 0x72, 0x69, 0x12, 0x45, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, - 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x2e, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, - 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x11, 0x63, 0x75, - 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, - 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x2e, 0x43, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x52, 0x10, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x69, 0x65, 0x73, 0x12, 0x31, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x06, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x35, 0x0a, 0x17, 0x63, 0x72, 0x65, 0x61, 0x74, - 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, - 0x63, 0x68, 0x18, 0x09, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, - 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3e, - 0x0a, 0x1c, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, - 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x0a, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x18, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x1a, 0x51, - 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x1a, 0x57, 0x0a, 0x15, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, - 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x51, 0x0a, 0x05, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x0b, 0x0a, 0x07, 0x50, 0x45, 0x4e, 0x44, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x08, 0x0a, - 0x04, 0x4c, 0x49, 0x56, 0x45, 0x10, 0x02, 0x12, 0x17, 0x0a, 0x13, 0x4d, 0x41, 0x52, 0x4b, 0x45, - 0x44, 0x5f, 0x46, 0x4f, 0x52, 0x5f, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x03, - 0x12, 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x4c, 0x45, 0x54, 0x45, 0x44, 0x10, 0x04, 0x22, 0xb1, 0x04, - 0x0a, 0x0c, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0e, - 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, - 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x0b, - 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x49, - 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x4c, 0x0a, 0x09, 0x62, 0x61, 0x73, - 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, - 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x62, - 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x1a, 0x58, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2f, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, - 0x01, 0x22, 0xcd, 0x01, 0x0a, 0x15, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, - 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x26, 0x0a, 0x05, 0x55, - 0x4e, 0x53, 0x45, 0x54, 0x10, 0x00, 0x1a, 0x1b, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x15, 0x0a, 0x13, - 0x75, 0x6e, 0x73, 0x65, 0x74, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x12, 0x21, 0x0a, 0x07, 0x44, 0x41, 0x54, 0x41, 0x53, 0x45, 0x54, 0x10, 0x01, - 0x1a, 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x44, - 0x61, 0x74, 0x61, 0x73, 0x65, 0x74, 0x12, 0x1d, 0x0a, 0x05, 0x4d, 0x4f, 0x44, 0x45, 0x4c, 0x10, - 0x02, 0x1a, 0x12, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0c, 0x0a, 0x0a, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x21, 0x0a, 0x07, 0x4d, 0x45, 0x54, 0x52, 0x49, 0x43, 0x53, - 0x10, 0x03, 0x1a, 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, 0x6d, 0x6c, 0x6d, 0x64, - 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x12, 0x27, 0x0a, 0x0a, 0x53, 0x54, 0x41, 0x54, - 0x49, 0x53, 0x54, 0x49, 0x43, 0x53, 0x10, 0x04, 0x1a, 0x17, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x11, - 0x0a, 0x0f, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x69, 0x73, 0x74, 0x69, 0x63, - 0x73, 0x22, 0xd6, 0x03, 0x0a, 0x05, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x61, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, - 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, - 0x2b, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, - 0x74, 0x2e, 0x50, 0x61, 0x74, 0x68, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x12, 0x2b, 0x0a, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x17, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x38, 0x0a, 0x18, 0x6d, 0x69, 0x6c, - 0x6c, 0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, - 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x16, 0x6d, 0x69, 0x6c, - 0x6c, 0x69, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x73, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, - 0x6f, 0x63, 0x68, 0x1a, 0x77, 0x0a, 0x04, 0x50, 0x61, 0x74, 0x68, 0x12, 0x32, 0x0a, 0x05, 0x73, - 0x74, 0x65, 0x70, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x2e, 0x50, - 0x61, 0x74, 0x68, 0x2e, 0x53, 0x74, 0x65, 0x70, 0x52, 0x05, 0x73, 0x74, 0x65, 0x70, 0x73, 0x1a, - 0x3b, 0x0a, 0x04, 0x53, 0x74, 0x65, 0x70, 0x12, 0x16, 0x0a, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x05, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x12, - 0x12, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0x7c, 0x0a, 0x04, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, - 0x00, 0x12, 0x13, 0x0a, 0x0f, 0x44, 0x45, 0x43, 0x4c, 0x41, 0x52, 0x45, 0x44, 0x5f, 0x4f, 0x55, - 0x54, 0x50, 0x55, 0x54, 0x10, 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x44, 0x45, 0x43, 0x4c, 0x41, 0x52, - 0x45, 0x44, 0x5f, 0x49, 0x4e, 0x50, 0x55, 0x54, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x49, 0x4e, - 0x50, 0x55, 0x54, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, 0x10, - 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, 0x4c, 0x5f, 0x49, 0x4e, - 0x50, 0x55, 0x54, 0x10, 0x05, 0x12, 0x13, 0x0a, 0x0f, 0x49, 0x4e, 0x54, 0x45, 0x52, 0x4e, 0x41, - 0x4c, 0x5f, 0x4f, 0x55, 0x54, 0x50, 0x55, 0x54, 0x10, 0x06, 0x22, 0xca, 0x05, 0x0a, 0x09, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, 0x07, - 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, - 0x79, 0x70, 0x65, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x07, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x46, 0x0a, 0x10, 0x6c, 0x61, 0x73, - 0x74, 0x5f, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x52, 0x0e, 0x6c, 0x61, 0x73, 0x74, 0x4b, 0x6e, 0x6f, 0x77, 0x6e, 0x53, 0x74, 0x61, 0x74, - 0x65, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x59, 0x0a, 0x11, 0x63, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x43, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x52, 0x10, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x69, 0x65, 0x73, 0x12, 0x35, 0x0a, 0x17, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, - 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, - 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, - 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3e, 0x0a, 0x1c, 0x6c, - 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, - 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x09, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x18, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, - 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x1a, 0x51, 0x0a, 0x0f, 0x50, - 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, - 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, - 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, - 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x57, - 0x0a, 0x15, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, - 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x5e, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, - 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, - 0x03, 0x4e, 0x45, 0x57, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, - 0x47, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x45, 0x10, - 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0a, 0x0a, - 0x06, 0x43, 0x41, 0x43, 0x48, 0x45, 0x44, 0x10, 0x05, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x41, 0x4e, - 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x22, 0xd8, 0x05, 0x0a, 0x0d, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, - 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, - 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, - 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x4a, 0x0a, 0x0a, 0x70, 0x72, 0x6f, - 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, + 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, + 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, + 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, + 0x49, 0x64, 0x12, 0x46, 0x0a, 0x10, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x6b, 0x6e, 0x6f, 0x77, 0x6e, + 0x5f, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1c, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x0e, 0x6c, 0x61, 0x73, 0x74, + 0x4b, 0x6e, 0x6f, 0x77, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x46, 0x0a, 0x0a, 0x70, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x12, 0x59, 0x0a, 0x11, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x70, 0x72, 0x6f, + 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x3e, 0x0a, 0x0a, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x69, 0x6e, 0x70, 0x75, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x40, 0x0a, 0x0b, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, - 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, 0x6f, 0x75, 0x74, - 0x70, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x4d, 0x0a, 0x09, 0x62, 0x61, 0x73, 0x65, 0x5f, - 0x74, 0x79, 0x70, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, - 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x62, 0x61, - 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x1a, 0x58, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2f, 0x0a, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, - 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, - 0x22, 0xef, 0x01, 0x0a, 0x15, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, - 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x27, 0x0a, 0x05, 0x55, 0x4e, - 0x53, 0x45, 0x54, 0x10, 0x00, 0x1a, 0x1c, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x16, 0x0a, 0x14, 0x75, - 0x6e, 0x73, 0x65, 0x74, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x12, 0x1d, 0x0a, 0x05, 0x54, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x1a, 0x12, - 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0c, 0x0a, 0x0a, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x54, 0x72, 0x61, - 0x69, 0x6e, 0x12, 0x25, 0x0a, 0x09, 0x54, 0x52, 0x41, 0x4e, 0x53, 0x46, 0x4f, 0x52, 0x4d, 0x10, - 0x02, 0x1a, 0x16, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x10, 0x0a, 0x0e, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, - 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, 0x6f, 0x72, 0x6d, 0x12, 0x21, 0x0a, 0x07, 0x50, 0x52, 0x4f, - 0x43, 0x45, 0x53, 0x53, 0x10, 0x03, 0x1a, 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, - 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x12, 0x23, 0x0a, 0x08, - 0x45, 0x56, 0x41, 0x4c, 0x55, 0x41, 0x54, 0x45, 0x10, 0x04, 0x1a, 0x15, 0xaa, 0xf1, 0xfd, 0xba, - 0x0b, 0x0f, 0x0a, 0x0d, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x45, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, - 0x65, 0x12, 0x1f, 0x0a, 0x06, 0x44, 0x45, 0x50, 0x4c, 0x4f, 0x59, 0x10, 0x05, 0x1a, 0x13, 0xaa, - 0xf1, 0xfd, 0xba, 0x0b, 0x0d, 0x0a, 0x0b, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x44, 0x65, 0x70, 0x6c, - 0x6f, 0x79, 0x22, 0x9e, 0x03, 0x0a, 0x0b, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, - 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x48, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, - 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x4b, 0x0a, 0x09, - 0x62, 0x61, 0x73, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x08, 0x62, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x1a, 0x58, 0x0a, 0x0f, 0x50, 0x72, 0x6f, - 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2f, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, - 0x02, 0x38, 0x01, 0x22, 0x3e, 0x0a, 0x15, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, - 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x25, 0x0a, 0x05, - 0x55, 0x4e, 0x53, 0x45, 0x54, 0x10, 0x00, 0x1a, 0x1a, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x14, 0x0a, - 0x12, 0x75, 0x6e, 0x73, 0x65, 0x74, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x22, 0x9c, 0x04, 0x0a, 0x07, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, - 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, - 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, - 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, - 0x12, 0x44, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x04, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, - 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x57, 0x0a, 0x11, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, - 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x10, 0x63, - 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x35, 0x0a, 0x17, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, - 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x14, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, - 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3e, 0x0a, 0x1c, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, - 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, - 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x18, 0x6c, 0x61, - 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, - 0x65, 0x45, 0x70, 0x6f, 0x63, 0x68, 0x1a, 0x51, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, - 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x57, 0x0a, 0x15, 0x43, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, - 0x38, 0x01, 0x22, 0x4d, 0x0a, 0x0b, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x49, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, - 0x64, 0x22, 0x4f, 0x0a, 0x0b, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x49, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, - 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x49, 0x64, 0x22, 0x47, 0x0a, 0x0d, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, - 0x0a, 0x09, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x08, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x9b, 0x04, 0x0a, 0x0c, - 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x40, 0x0a, 0x0e, - 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x43, - 0x0a, 0x0f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, - 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, - 0x70, 0x65, 0x73, 0x12, 0x3d, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, - 0x65, 0x73, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x73, 0x12, 0x2a, 0x0a, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3c, 0x0a, - 0x0c, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, - 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x0c, 0x61, - 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x09, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, 0x73, 0x73, - 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x92, 0x04, 0x0a, 0x12, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x33, 0x0a, 0x06, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x06, 0x73, - 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x12, 0x45, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x6f, 0x6e, 0x5f, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x55, 0x6e, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, - 0x00, 0x52, 0x09, 0x75, 0x6e, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x51, 0x0a, 0x0c, - 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, - 0x00, 0x52, 0x0c, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, - 0x39, 0x0a, 0x04, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x48, 0x00, 0x52, 0x04, 0x6c, 0x69, 0x73, 0x74, 0x12, 0x39, 0x0a, 0x04, 0x6e, 0x6f, - 0x6e, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4e, 0x6f, 0x6e, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, - 0x04, 0x6e, 0x6f, 0x6e, 0x65, 0x12, 0x36, 0x0a, 0x03, 0x61, 0x6e, 0x79, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x41, 0x6e, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, - 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x03, 0x61, 0x6e, 0x79, 0x12, 0x3c, 0x0a, - 0x05, 0x74, 0x75, 0x70, 0x6c, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x75, 0x70, 0x6c, 0x65, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x48, 0x00, 0x52, 0x05, 0x74, 0x75, 0x70, 0x6c, 0x65, 0x12, 0x39, 0x0a, 0x04, 0x64, - 0x69, 0x63, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x44, 0x69, 0x63, 0x74, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, - 0x52, 0x04, 0x64, 0x69, 0x63, 0x74, 0x42, 0x06, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x22, 0x5a, - 0x0a, 0x17, 0x55, 0x6e, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, - 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x3f, 0x0a, 0x0a, 0x63, 0x61, 0x6e, - 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, - 0x63, 0x61, 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x1e, 0x49, 0x6e, - 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x41, 0x0a, 0x0b, - 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x73, 0x22, - 0x53, 0x0a, 0x16, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, - 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x39, 0x0a, 0x07, 0x65, 0x6c, 0x65, - 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x07, 0x65, 0x6c, 0x65, - 0x6d, 0x65, 0x6e, 0x74, 0x22, 0x18, 0x0a, 0x16, 0x4e, 0x6f, 0x6e, 0x65, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x22, 0x17, - 0x0a, 0x15, 0x41, 0x6e, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, - 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x22, 0x56, 0x0a, 0x17, 0x54, 0x75, 0x70, 0x6c, 0x65, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, - 0xd7, 0x02, 0x0a, 0x16, 0x44, 0x69, 0x63, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x53, 0x0a, 0x0a, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x44, 0x69, 0x63, - 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, - 0x33, 0x0a, 0x16, 0x6e, 0x6f, 0x6e, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x6f, 0x74, - 0x5f, 0x72, 0x65, 0x71, 0x75, 0x69, 0x72, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x13, 0x6e, 0x6f, 0x6e, 0x65, 0x54, 0x79, 0x70, 0x65, 0x4e, 0x6f, 0x74, 0x52, 0x65, 0x71, 0x75, - 0x69, 0x72, 0x65, 0x64, 0x12, 0x53, 0x0a, 0x15, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, 0x70, 0x72, - 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x10, 0x63, 0x75, 0x73, + 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x35, 0x0a, + 0x17, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, + 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, + 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, + 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3e, 0x0a, 0x1c, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, + 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, + 0x70, 0x6f, 0x63, 0x68, 0x18, 0x09, 0x20, 0x01, 0x28, 0x03, 0x52, 0x18, 0x6c, 0x61, 0x73, 0x74, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, + 0x70, 0x6f, 0x63, 0x68, 0x12, 0x3d, 0x0a, 0x0f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x41, 0x6e, 0x79, 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x1a, 0x51, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x57, 0x0a, 0x15, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, + 0x5e, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x45, 0x57, 0x10, 0x01, 0x12, 0x0b, + 0x0a, 0x07, 0x52, 0x55, 0x4e, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x0c, 0x0a, 0x08, 0x43, + 0x4f, 0x4d, 0x50, 0x4c, 0x45, 0x54, 0x45, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x41, 0x49, + 0x4c, 0x45, 0x44, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x43, 0x41, 0x43, 0x48, 0x45, 0x44, 0x10, + 0x05, 0x12, 0x0c, 0x0a, 0x08, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x45, 0x44, 0x10, 0x06, 0x22, + 0xf9, 0x05, 0x0a, 0x0d, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, + 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x07, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, + 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, + 0x49, 0x64, 0x12, 0x4a, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, + 0x70, 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x3e, + 0x0a, 0x0a, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x40, + 0x0a, 0x0b, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x13, 0x65, 0x78, 0x74, 0x72, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x65, - 0x72, 0x74, 0x69, 0x65, 0x73, 0x54, 0x79, 0x70, 0x65, 0x1a, 0x5e, 0x0a, 0x0f, 0x50, 0x72, 0x6f, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x4d, 0x0a, 0x09, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x08, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x2e, + 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, + 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x62, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x1a, + 0x58, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xef, 0x01, 0x0a, 0x15, 0x53, 0x79, + 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x27, 0x0a, 0x05, 0x55, 0x4e, 0x53, 0x45, 0x54, 0x10, 0x00, 0x1a, 0x1c, + 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x16, 0x0a, 0x14, 0x75, 0x6e, 0x73, 0x65, 0x74, 0x5f, 0x65, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x12, 0x1d, 0x0a, 0x05, + 0x54, 0x52, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x1a, 0x12, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0c, 0x0a, + 0x0a, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x69, 0x6e, 0x12, 0x25, 0x0a, 0x09, 0x54, + 0x52, 0x41, 0x4e, 0x53, 0x46, 0x4f, 0x52, 0x4d, 0x10, 0x02, 0x1a, 0x16, 0xaa, 0xf1, 0xfd, 0xba, + 0x0b, 0x10, 0x0a, 0x0e, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x66, 0x6f, + 0x72, 0x6d, 0x12, 0x21, 0x0a, 0x07, 0x50, 0x52, 0x4f, 0x43, 0x45, 0x53, 0x53, 0x10, 0x03, 0x1a, + 0x14, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0e, 0x0a, 0x0c, 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x50, 0x72, + 0x6f, 0x63, 0x65, 0x73, 0x73, 0x12, 0x23, 0x0a, 0x08, 0x45, 0x56, 0x41, 0x4c, 0x55, 0x41, 0x54, + 0x45, 0x10, 0x04, 0x1a, 0x15, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0f, 0x0a, 0x0d, 0x6d, 0x6c, 0x6d, + 0x64, 0x2e, 0x45, 0x76, 0x61, 0x6c, 0x75, 0x61, 0x74, 0x65, 0x12, 0x1f, 0x0a, 0x06, 0x44, 0x45, + 0x50, 0x4c, 0x4f, 0x59, 0x10, 0x05, 0x1a, 0x13, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x0d, 0x0a, 0x0b, + 0x6d, 0x6c, 0x6d, 0x64, 0x2e, 0x44, 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x22, 0xbf, 0x03, 0x0a, 0x0b, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0e, 0x0a, 0x02, 0x69, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, + 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, + 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x65, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0a, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x48, 0x0a, 0x0a, + 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, + 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x4b, 0x0a, 0x09, 0x62, 0x61, 0x73, 0x65, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, + 0x64, 0x42, 0x61, 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x62, 0x61, 0x73, 0x65, 0x54, + 0x79, 0x70, 0x65, 0x1a, 0x58, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x2f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, + 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x3e, 0x0a, + 0x15, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x44, 0x65, 0x66, 0x69, 0x6e, 0x65, 0x64, 0x42, 0x61, + 0x73, 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x25, 0x0a, 0x05, 0x55, 0x4e, 0x53, 0x45, 0x54, 0x10, + 0x00, 0x1a, 0x1a, 0xaa, 0xf1, 0xfd, 0xba, 0x0b, 0x14, 0x0a, 0x12, 0x75, 0x6e, 0x73, 0x65, 0x74, + 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x22, 0xfc, 0x04, + 0x0a, 0x07, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x17, 0x0a, + 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, + 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, + 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0a, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x12, 0x44, 0x0a, 0x0a, 0x70, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x12, 0x57, 0x0a, 0x11, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x70, + 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, + 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x10, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, + 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x35, 0x0a, 0x17, 0x63, 0x72, + 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, + 0x65, 0x70, 0x6f, 0x63, 0x68, 0x18, 0x07, 0x20, 0x01, 0x28, 0x03, 0x52, 0x14, 0x63, 0x72, 0x65, + 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, + 0x68, 0x12, 0x3e, 0x0a, 0x1c, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, + 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x5f, 0x65, 0x70, 0x6f, 0x63, + 0x68, 0x18, 0x08, 0x20, 0x01, 0x28, 0x03, 0x52, 0x18, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, + 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x53, 0x69, 0x6e, 0x63, 0x65, 0x45, 0x70, 0x6f, 0x63, + 0x68, 0x12, 0x3d, 0x0a, 0x0f, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x41, 0x6e, 0x79, + 0x52, 0x0e, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x1a, 0x51, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x57, 0x0a, 0x15, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x35, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, + 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, + 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x4d, 0x0a, 0x0b, + 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x61, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x12, 0x1d, 0x0a, 0x0a, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x22, 0x4f, 0x0a, 0x0b, 0x41, + 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, 0x1d, 0x0a, + 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x22, 0x47, 0x0a, 0x0d, + 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x19, 0x0a, + 0x08, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x07, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x70, 0x61, 0x72, 0x65, + 0x6e, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x08, 0x70, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x49, 0x64, 0x22, 0x9b, 0x04, 0x0a, 0x0c, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, + 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0e, 0x65, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x3d, 0x0a, + 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x03, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x33, 0x0a, 0x09, + 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x73, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x2a, 0x0a, 0x06, 0x65, + 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, + 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x74, 0x74, 0x72, 0x69, + 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x74, 0x74, 0x72, + 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x22, 0x92, 0x04, 0x0a, 0x12, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x33, 0x0a, 0x06, 0x73, 0x69, + 0x6d, 0x70, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x06, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x12, + 0x45, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x55, 0x6e, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x09, 0x75, 0x6e, 0x69, + 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x51, 0x0a, 0x0c, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x73, + 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x49, 0x6e, 0x74, 0x65, 0x72, + 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, + 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x0c, 0x69, 0x6e, 0x74, + 0x65, 0x72, 0x73, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x39, 0x0a, 0x04, 0x6c, 0x69, 0x73, + 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x04, + 0x6c, 0x69, 0x73, 0x74, 0x12, 0x39, 0x0a, 0x04, 0x6e, 0x6f, 0x6e, 0x65, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x4e, 0x6f, 0x6e, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x04, 0x6e, 0x6f, 0x6e, 0x65, 0x12, + 0x36, 0x0a, 0x03, 0x61, 0x6e, 0x79, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x6e, 0x79, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, + 0x48, 0x00, 0x52, 0x03, 0x61, 0x6e, 0x79, 0x12, 0x3c, 0x0a, 0x05, 0x74, 0x75, 0x70, 0x6c, 0x65, + 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x75, 0x70, 0x6c, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x05, + 0x74, 0x75, 0x70, 0x6c, 0x65, 0x12, 0x39, 0x0a, 0x04, 0x64, 0x69, 0x63, 0x74, 0x18, 0x08, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x44, 0x69, 0x63, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x04, 0x64, 0x69, 0x63, 0x74, + 0x42, 0x06, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x22, 0x5a, 0x0a, 0x17, 0x55, 0x6e, 0x69, 0x6f, + 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x3f, 0x0a, 0x0a, 0x63, 0x61, 0x6e, 0x64, 0x69, 0x64, 0x61, 0x74, 0x65, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0a, 0x63, 0x61, 0x6e, 0x64, 0x69, 0x64, + 0x61, 0x74, 0x65, 0x73, 0x22, 0x63, 0x0a, 0x1e, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x65, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x41, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x72, + 0x61, 0x69, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x73, 0x22, 0x53, 0x0a, 0x16, 0x4c, 0x69, 0x73, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x39, 0x0a, 0x07, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x07, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x22, 0x18, + 0x0a, 0x16, 0x4e, 0x6f, 0x6e, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x22, 0x17, 0x0a, 0x15, 0x41, 0x6e, 0x79, 0x41, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x22, 0x56, 0x0a, 0x17, 0x54, 0x75, 0x70, 0x6c, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x3b, 0x0a, 0x08, + 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, + 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xd7, 0x02, 0x0a, 0x16, 0x44, 0x69, + 0x63, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x53, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x44, 0x69, 0x63, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x50, 0x72, + 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, + 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x12, 0x33, 0x0a, 0x16, 0x6e, 0x6f, 0x6e, + 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x6f, 0x74, 0x5f, 0x72, 0x65, 0x71, 0x75, 0x69, + 0x72, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x13, 0x6e, 0x6f, 0x6e, 0x65, 0x54, + 0x79, 0x70, 0x65, 0x4e, 0x6f, 0x74, 0x52, 0x65, 0x71, 0x75, 0x69, 0x72, 0x65, 0x64, 0x12, 0x53, + 0x0a, 0x15, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x14, 0x0a, 0x12, 0x46, 0x61, 0x6b, - 0x65, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x22, - 0xbc, 0x03, 0x0a, 0x13, 0x4d, 0x79, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, - 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x70, - 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, - 0x1a, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x75, - 0x73, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x73, 0x65, 0x72, 0x12, - 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x73, - 0x6f, 0x63, 0x6b, 0x65, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x6f, 0x63, - 0x6b, 0x65, 0x74, 0x12, 0x4c, 0x0a, 0x0b, 0x73, 0x73, 0x6c, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, 0x79, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, - 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0a, 0x73, 0x73, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x6b, 0x69, 0x70, 0x5f, 0x64, 0x62, 0x5f, 0x63, 0x72, 0x65, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x73, 0x6b, 0x69, - 0x70, 0x44, 0x62, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xa0, 0x01, 0x0a, 0x0a, - 0x53, 0x53, 0x4c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, - 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x65, 0x72, 0x74, - 0x12, 0x0e, 0x0a, 0x02, 0x63, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x63, 0x61, - 0x12, 0x16, 0x0a, 0x06, 0x63, 0x61, 0x70, 0x61, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x06, 0x63, 0x61, 0x70, 0x61, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x69, 0x70, 0x68, - 0x65, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x63, 0x69, 0x70, 0x68, 0x65, 0x72, - 0x12, 0x2c, 0x0a, 0x12, 0x76, 0x65, 0x72, 0x69, 0x66, 0x79, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x76, 0x65, - 0x72, 0x69, 0x66, 0x79, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x22, 0xf6, - 0x01, 0x0a, 0x1a, 0x53, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x21, 0x0a, - 0x0c, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x55, 0x72, 0x69, - 0x12, 0x5f, 0x0a, 0x0f, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6d, - 0x6f, 0x64, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x53, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x66, - 0x69, 0x67, 0x2e, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, - 0x65, 0x52, 0x0e, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, - 0x65, 0x22, 0x54, 0x0a, 0x0e, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, - 0x6f, 0x64, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, - 0x12, 0x0c, 0x0a, 0x08, 0x52, 0x45, 0x41, 0x44, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x01, 0x12, 0x0d, - 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x02, 0x12, 0x18, 0x0a, - 0x14, 0x52, 0x45, 0x41, 0x44, 0x57, 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4f, 0x50, 0x45, 0x4e, 0x43, - 0x52, 0x45, 0x41, 0x54, 0x45, 0x10, 0x03, 0x22, 0x95, 0x01, 0x0a, 0x10, 0x4d, 0x69, 0x67, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x38, 0x0a, 0x18, - 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x75, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6d, - 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x16, - 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x55, 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x4d, 0x69, 0x67, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, 0x1b, 0x64, 0x6f, 0x77, 0x6e, 0x67, 0x72, - 0x61, 0x64, 0x65, 0x5f, 0x74, 0x6f, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x3a, 0x02, 0x2d, 0x31, 0x52, - 0x18, 0x64, 0x6f, 0x77, 0x6e, 0x67, 0x72, 0x61, 0x64, 0x65, 0x54, 0x6f, 0x53, 0x63, 0x68, 0x65, - 0x6d, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x22, - 0x36, 0x0a, 0x0c, 0x52, 0x65, 0x74, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x26, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x72, 0x65, 0x74, 0x72, 0x69, - 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0d, 0x6d, 0x61, 0x78, 0x4e, 0x75, 0x6d, - 0x52, 0x65, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, 0xa1, 0x02, 0x0a, 0x10, 0x43, 0x6f, 0x6e, 0x6e, - 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x46, 0x0a, 0x0d, - 0x66, 0x61, 0x6b, 0x65, 0x5f, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x46, 0x61, 0x6b, 0x65, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, - 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, 0x0c, 0x66, 0x61, 0x6b, 0x65, 0x44, 0x61, 0x74, 0x61, - 0x62, 0x61, 0x73, 0x65, 0x12, 0x38, 0x0a, 0x05, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x4d, 0x79, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, - 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, 0x05, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x12, 0x41, - 0x0a, 0x06, 0x73, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x53, 0x71, 0x6c, + 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x13, + 0x65, 0x78, 0x74, 0x72, 0x61, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x54, + 0x79, 0x70, 0x65, 0x1a, 0x5e, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x35, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, + 0x72, 0x75, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x22, 0x14, 0x0a, 0x12, 0x46, 0x61, 0x6b, 0x65, 0x44, 0x61, 0x74, 0x61, 0x62, + 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x22, 0xbc, 0x03, 0x0a, 0x13, 0x4d, 0x79, + 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, + 0x67, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x68, 0x6f, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x03, 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x61, 0x74, + 0x61, 0x62, 0x61, 0x73, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x61, 0x74, + 0x61, 0x62, 0x61, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x73, 0x65, 0x72, 0x18, 0x04, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x75, 0x73, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, + 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, + 0x73, 0x77, 0x6f, 0x72, 0x64, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x6f, 0x63, 0x6b, 0x65, 0x74, 0x18, + 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x6f, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x4c, 0x0a, + 0x0b, 0x73, 0x73, 0x6c, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x4d, 0x79, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x0a, 0x73, 0x73, 0x6c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x28, 0x0a, 0x10, 0x73, + 0x6b, 0x69, 0x70, 0x5f, 0x64, 0x62, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, + 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x73, 0x6b, 0x69, 0x70, 0x44, 0x62, 0x43, 0x72, 0x65, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xa0, 0x01, 0x0a, 0x0a, 0x53, 0x53, 0x4c, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x63, 0x65, 0x72, 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x63, 0x61, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x63, 0x61, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x61, + 0x70, 0x61, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x63, 0x61, 0x70, 0x61, + 0x74, 0x68, 0x12, 0x16, 0x0a, 0x06, 0x63, 0x69, 0x70, 0x68, 0x65, 0x72, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x06, 0x63, 0x69, 0x70, 0x68, 0x65, 0x72, 0x12, 0x2c, 0x0a, 0x12, 0x76, 0x65, + 0x72, 0x69, 0x66, 0x79, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x76, 0x65, 0x72, 0x69, 0x66, 0x79, 0x53, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x22, 0xf6, 0x01, 0x0a, 0x1a, 0x53, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, - 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, 0x06, 0x73, 0x71, 0x6c, 0x69, 0x74, - 0x65, 0x12, 0x3e, 0x0a, 0x0d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x52, 0x65, 0x74, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x52, 0x0c, 0x72, 0x65, 0x74, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x42, 0x08, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x22, 0x88, 0x01, 0x0a, 0x14, - 0x47, 0x72, 0x70, 0x63, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, 0x6d, - 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3b, 0x0a, 0x1a, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, 0x63, 0x65, - 0x69, 0x76, 0x65, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x6c, 0x65, 0x6e, 0x67, - 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x17, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x63, - 0x65, 0x69, 0x76, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4c, 0x65, 0x6e, 0x67, 0x74, - 0x68, 0x12, 0x33, 0x0a, 0x16, 0x68, 0x74, 0x74, 0x70, 0x32, 0x5f, 0x6d, 0x61, 0x78, 0x5f, 0x70, - 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x74, 0x72, 0x69, 0x6b, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x13, 0x68, 0x74, 0x74, 0x70, 0x32, 0x4d, 0x61, 0x78, 0x50, 0x69, 0x6e, 0x67, 0x53, - 0x74, 0x72, 0x69, 0x6b, 0x65, 0x73, 0x22, 0xfc, 0x02, 0x0a, 0x19, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, 0x6f, - 0x6e, 0x66, 0x69, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x6f, 0x72, 0x74, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x4f, 0x0a, 0x0a, - 0x73, 0x73, 0x6c, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, + 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x69, 0x6c, 0x65, 0x6e, + 0x61, 0x6d, 0x65, 0x5f, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, + 0x69, 0x6c, 0x65, 0x6e, 0x61, 0x6d, 0x65, 0x55, 0x72, 0x69, 0x12, 0x5f, 0x0a, 0x0f, 0x63, 0x6f, + 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x53, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x43, 0x6f, 0x6e, + 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, 0x65, 0x52, 0x0e, 0x63, 0x6f, 0x6e, + 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x54, 0x0a, 0x0e, 0x43, + 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x0b, 0x0a, + 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x52, 0x45, + 0x41, 0x44, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x52, 0x45, 0x41, 0x44, + 0x57, 0x52, 0x49, 0x54, 0x45, 0x10, 0x02, 0x12, 0x18, 0x0a, 0x14, 0x52, 0x45, 0x41, 0x44, 0x57, + 0x52, 0x49, 0x54, 0x45, 0x5f, 0x4f, 0x50, 0x45, 0x4e, 0x43, 0x52, 0x45, 0x41, 0x54, 0x45, 0x10, + 0x03, 0x22, 0xdb, 0x03, 0x0a, 0x18, 0x50, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x53, 0x51, 0x4c, + 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x12, + 0x0a, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x68, 0x6f, + 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x61, 0x64, 0x64, 0x72, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x68, 0x6f, 0x73, 0x74, 0x61, 0x64, 0x64, 0x72, 0x12, 0x12, + 0x0a, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x70, 0x6f, + 0x72, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x75, 0x73, 0x65, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x04, 0x75, 0x73, 0x65, 0x72, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, + 0x72, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, + 0x72, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x66, 0x69, 0x6c, 0x65, 0x12, 0x16, + 0x0a, 0x06, 0x64, 0x62, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, + 0x64, 0x62, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x28, 0x0a, 0x10, 0x73, 0x6b, 0x69, 0x70, 0x5f, 0x64, + 0x62, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0e, 0x73, 0x6b, 0x69, 0x70, 0x44, 0x62, 0x43, 0x72, 0x65, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x12, 0x4e, 0x0a, 0x09, 0x73, 0x73, 0x6c, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x09, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x50, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, + 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x09, 0x73, 0x73, 0x6c, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x1a, 0x9c, 0x01, 0x0a, 0x0a, 0x53, 0x53, 0x4c, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x18, 0x0a, 0x07, 0x73, 0x73, 0x6c, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x07, 0x73, 0x73, 0x6c, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x73, 0x6c, + 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x73, 0x73, 0x6c, 0x63, + 0x65, 0x72, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x73, 0x6c, 0x6b, 0x65, 0x79, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x73, 0x6c, 0x6b, 0x65, 0x79, 0x12, 0x20, 0x0a, 0x0b, 0x73, + 0x73, 0x6c, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0b, 0x73, 0x73, 0x6c, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x12, 0x20, 0x0a, + 0x0b, 0x73, 0x73, 0x6c, 0x72, 0x6f, 0x6f, 0x74, 0x63, 0x65, 0x72, 0x74, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0b, 0x73, 0x73, 0x6c, 0x72, 0x6f, 0x6f, 0x74, 0x63, 0x65, 0x72, 0x74, 0x22, + 0x95, 0x01, 0x0a, 0x10, 0x4d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x38, 0x0a, 0x18, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x75, + 0x70, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x6d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x16, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x55, 0x70, + 0x67, 0x72, 0x61, 0x64, 0x65, 0x4d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x41, + 0x0a, 0x1b, 0x64, 0x6f, 0x77, 0x6e, 0x67, 0x72, 0x61, 0x64, 0x65, 0x5f, 0x74, 0x6f, 0x5f, 0x73, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x03, 0x3a, 0x02, 0x2d, 0x31, 0x52, 0x18, 0x64, 0x6f, 0x77, 0x6e, 0x67, 0x72, 0x61, + 0x64, 0x65, 0x54, 0x6f, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x22, 0x36, 0x0a, 0x0c, 0x52, 0x65, 0x74, 0x72, 0x79, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f, 0x6e, + 0x75, 0x6d, 0x5f, 0x72, 0x65, 0x74, 0x72, 0x69, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x0d, 0x6d, 0x61, 0x78, 0x4e, 0x75, 0x6d, 0x52, 0x65, 0x74, 0x72, 0x69, 0x65, 0x73, 0x22, + 0xea, 0x02, 0x0a, 0x10, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x12, 0x46, 0x0a, 0x0d, 0x66, 0x61, 0x6b, 0x65, 0x5f, 0x64, 0x61, 0x74, + 0x61, 0x62, 0x61, 0x73, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x46, 0x61, 0x6b, 0x65, 0x44, 0x61, + 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, 0x0c, + 0x66, 0x61, 0x6b, 0x65, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x12, 0x38, 0x0a, 0x05, + 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, 0x79, 0x53, 0x51, 0x4c, 0x44, + 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, + 0x05, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x12, 0x41, 0x0a, 0x06, 0x73, 0x71, 0x6c, 0x69, 0x74, 0x65, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x53, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48, + 0x00, 0x52, 0x06, 0x73, 0x71, 0x6c, 0x69, 0x74, 0x65, 0x12, 0x47, 0x0a, 0x0a, 0x70, 0x6f, 0x73, + 0x74, 0x67, 0x72, 0x65, 0x73, 0x71, 0x6c, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x6f, 0x73, 0x74, + 0x67, 0x72, 0x65, 0x53, 0x51, 0x4c, 0x44, 0x61, 0x74, 0x61, 0x62, 0x61, 0x73, 0x65, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x48, 0x00, 0x52, 0x0a, 0x70, 0x6f, 0x73, 0x74, 0x67, 0x72, 0x65, 0x73, + 0x71, 0x6c, 0x12, 0x3e, 0x0a, 0x0d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x52, 0x65, 0x74, 0x72, 0x79, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0c, 0x72, 0x65, 0x74, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x42, 0x08, 0x0a, 0x06, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x22, 0x88, 0x01, 0x0a, + 0x14, 0x47, 0x72, 0x70, 0x63, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, + 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x3b, 0x0a, 0x1a, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, 0x63, + 0x65, 0x69, 0x76, 0x65, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x6c, 0x65, 0x6e, + 0x67, 0x74, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x17, 0x6d, 0x61, 0x78, 0x52, 0x65, + 0x63, 0x65, 0x69, 0x76, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4c, 0x65, 0x6e, 0x67, + 0x74, 0x68, 0x12, 0x33, 0x0a, 0x16, 0x68, 0x74, 0x74, 0x70, 0x32, 0x5f, 0x6d, 0x61, 0x78, 0x5f, + 0x70, 0x69, 0x6e, 0x67, 0x5f, 0x73, 0x74, 0x72, 0x69, 0x6b, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x13, 0x68, 0x74, 0x74, 0x70, 0x32, 0x4d, 0x61, 0x78, 0x50, 0x69, 0x6e, 0x67, + 0x53, 0x74, 0x72, 0x69, 0x6b, 0x65, 0x73, 0x22, 0xfc, 0x02, 0x0a, 0x19, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x43, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x43, + 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x68, 0x6f, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x6f, 0x72, + 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x4f, 0x0a, + 0x0a, 0x73, 0x73, 0x6c, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x43, 0x6c, 0x69, + 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x52, 0x09, 0x73, 0x73, 0x6c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x4e, + 0x0a, 0x11, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x5f, 0x61, 0x72, 0x67, 0x75, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x72, 0x70, 0x63, 0x43, 0x68, 0x61, 0x6e, + 0x6e, 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x10, 0x63, 0x68, + 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x2c, + 0x0a, 0x12, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, + 0x5f, 0x73, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x01, 0x52, 0x10, 0x63, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, 0x63, 0x1a, 0x68, 0x0a, 0x09, + 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x69, + 0x65, 0x6e, 0x74, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, + 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x75, 0x73, + 0x74, 0x6f, 0x6d, 0x5f, 0x63, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x75, + 0x73, 0x74, 0x6f, 0x6d, 0x43, 0x61, 0x22, 0x94, 0x03, 0x0a, 0x19, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x12, 0x4a, 0x0a, 0x11, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, + 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x10, + 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, + 0x12, 0x4a, 0x0a, 0x11, 0x6d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, 0x69, 0x67, 0x72, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x10, 0x6d, 0x69, 0x67, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4f, 0x0a, 0x0a, + 0x73, 0x73, 0x6c, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x43, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, - 0x69, 0x67, 0x52, 0x09, 0x73, 0x73, 0x6c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x4e, 0x0a, - 0x11, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x5f, 0x61, 0x72, 0x67, 0x75, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x72, 0x70, 0x63, 0x43, 0x68, 0x61, 0x6e, 0x6e, - 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x10, 0x63, 0x68, 0x61, - 0x6e, 0x6e, 0x65, 0x6c, 0x41, 0x72, 0x67, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x2c, 0x0a, - 0x12, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x5f, - 0x73, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x01, 0x52, 0x10, 0x63, 0x6c, 0x69, 0x65, 0x6e, - 0x74, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x53, 0x65, 0x63, 0x1a, 0x68, 0x0a, 0x09, 0x53, - 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x69, 0x65, - 0x6e, 0x74, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, - 0x69, 0x65, 0x6e, 0x74, 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x75, 0x73, 0x74, - 0x6f, 0x6d, 0x5f, 0x63, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, 0x75, 0x73, - 0x74, 0x6f, 0x6d, 0x43, 0x61, 0x22, 0x94, 0x03, 0x0a, 0x19, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x6e, - 0x66, 0x69, 0x67, 0x12, 0x4a, 0x0a, 0x11, 0x63, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, - 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x10, 0x63, - 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, - 0x4a, 0x0a, 0x11, 0x6d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, 0x69, 0x67, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x10, 0x6d, 0x69, 0x67, 0x72, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4f, 0x0a, 0x0a, 0x73, - 0x73, 0x6c, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x65, - 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, 0x69, - 0x67, 0x52, 0x09, 0x73, 0x73, 0x6c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x1a, 0x8d, 0x01, 0x0a, - 0x09, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x75, - 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x63, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, - 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x43, 0x61, 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x6c, 0x69, 0x65, 0x6e, - 0x74, 0x5f, 0x76, 0x65, 0x72, 0x69, 0x66, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, - 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x69, 0x66, 0x79, 0x22, 0xb0, 0x03, 0x0a, - 0x14, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, 0x73, - 0x75, 0x6c, 0x74, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x02, - 0x32, 0x30, 0x52, 0x0d, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x53, 0x69, 0x7a, - 0x65, 0x12, 0x54, 0x0a, 0x0e, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x5f, 0x66, 0x69, - 0x65, 0x6c, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x72, 0x64, - 0x65, 0x72, 0x42, 0x79, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x52, 0x0c, 0x6f, 0x72, 0x64, 0x65, 0x72, - 0x42, 0x79, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, - 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x12, - 0x21, 0x0a, 0x0c, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x5f, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x51, 0x75, 0x65, - 0x72, 0x79, 0x1a, 0xca, 0x01, 0x0a, 0x0c, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x42, 0x79, 0x46, 0x69, - 0x65, 0x6c, 0x64, 0x12, 0x4e, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x34, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x42, 0x79, 0x46, 0x69, 0x65, - 0x6c, 0x64, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x3a, 0x02, 0x49, 0x44, 0x52, 0x05, 0x66, 0x69, - 0x65, 0x6c, 0x64, 0x12, 0x1b, 0x0a, 0x06, 0x69, 0x73, 0x5f, 0x61, 0x73, 0x63, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x05, 0x69, 0x73, 0x41, 0x73, 0x63, - 0x22, 0x4d, 0x0a, 0x05, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x15, 0x0a, 0x11, 0x46, 0x49, 0x45, - 0x4c, 0x44, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, - 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x52, 0x45, 0x41, 0x54, 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, - 0x01, 0x12, 0x14, 0x0a, 0x10, 0x4c, 0x41, 0x53, 0x54, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, - 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x44, 0x10, 0x03, 0x22, - 0xbf, 0x01, 0x0a, 0x1a, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x4e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x12, 0x1b, - 0x0a, 0x09, 0x69, 0x64, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x08, 0x69, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x66, - 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x0b, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x42, - 0x0a, 0x0b, 0x73, 0x65, 0x74, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0a, 0x73, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x64, 0x5f, 0x69, 0x64, 0x73, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x03, 0x52, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x64, 0x49, 0x64, - 0x73, 0x22, 0x1f, 0x0a, 0x12, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, - 0x80, 0x02, 0x22, 0xf7, 0x02, 0x0a, 0x18, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, - 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x50, 0x0a, 0x11, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x5f, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, + 0x65, 0x72, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, + 0x69, 0x67, 0x52, 0x09, 0x73, 0x73, 0x6c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x1a, 0x8d, 0x01, + 0x0a, 0x09, 0x53, 0x53, 0x4c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x09, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4b, 0x65, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x65, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x43, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x63, + 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x5f, 0x63, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x43, 0x61, 0x12, 0x23, 0x0a, 0x0d, 0x63, 0x6c, 0x69, 0x65, + 0x6e, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x69, 0x66, 0x79, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x0c, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x56, 0x65, 0x72, 0x69, 0x66, 0x79, 0x22, 0xb0, 0x03, + 0x0a, 0x14, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x5f, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x3a, + 0x02, 0x32, 0x30, 0x52, 0x0d, 0x6d, 0x61, 0x78, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x53, 0x69, + 0x7a, 0x65, 0x12, 0x54, 0x0a, 0x0e, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x5f, 0x62, 0x79, 0x5f, 0x66, + 0x69, 0x65, 0x6c, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, - 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x48, 0x00, 0x52, - 0x10, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x12, 0x61, 0x0a, 0x0f, 0x73, 0x74, 0x6f, 0x70, 0x5f, 0x63, 0x6f, 0x6e, 0x64, 0x69, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x38, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, - 0x47, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x2e, 0x42, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x72, - 0x61, 0x69, 0x6e, 0x74, 0x52, 0x0e, 0x73, 0x74, 0x6f, 0x70, 0x43, 0x6f, 0x6e, 0x64, 0x69, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x96, 0x01, 0x0a, 0x12, 0x42, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, - 0x79, 0x43, 0x6f, 0x6e, 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x12, 0x20, 0x0a, 0x0c, 0x6d, - 0x61, 0x78, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x68, 0x6f, 0x70, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x0a, 0x6d, 0x61, 0x78, 0x4e, 0x75, 0x6d, 0x48, 0x6f, 0x70, 0x73, 0x12, 0x2d, 0x0a, - 0x12, 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x62, 0x6f, 0x75, 0x6e, 0x64, - 0x61, 0x72, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x2f, 0x0a, 0x13, - 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x62, 0x6f, 0x75, 0x6e, 0x64, - 0x61, 0x72, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x0d, 0x0a, - 0x0b, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x2a, 0x48, 0x0a, 0x0c, - 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, - 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x49, 0x4e, 0x54, - 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x02, 0x12, 0x0a, - 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, - 0x52, 0x55, 0x43, 0x54, 0x10, 0x04, 0x3a, 0x7b, 0x0a, 0x15, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, - 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, - 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x95, 0xde, 0xaf, 0xb7, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, - 0x6d, 0x54, 0x79, 0x70, 0x65, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x13, - 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, - 0x69, 0x6f, 0x6e, + 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x72, + 0x64, 0x65, 0x72, 0x42, 0x79, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x52, 0x0c, 0x6f, 0x72, 0x64, 0x65, + 0x72, 0x42, 0x79, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, + 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, + 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x5f, 0x71, 0x75, 0x65, 0x72, 0x79, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x1a, 0xca, 0x01, 0x0a, 0x0c, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x42, 0x79, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x12, 0x4e, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x34, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x72, 0x64, 0x65, 0x72, 0x42, 0x79, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x3a, 0x02, 0x49, 0x44, 0x52, 0x05, 0x66, + 0x69, 0x65, 0x6c, 0x64, 0x12, 0x1b, 0x0a, 0x06, 0x69, 0x73, 0x5f, 0x61, 0x73, 0x63, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x52, 0x05, 0x69, 0x73, 0x41, 0x73, + 0x63, 0x22, 0x4d, 0x0a, 0x05, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x15, 0x0a, 0x11, 0x46, 0x49, + 0x45, 0x4c, 0x44, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, + 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x43, 0x52, 0x45, 0x41, 0x54, 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x45, + 0x10, 0x01, 0x12, 0x14, 0x0a, 0x10, 0x4c, 0x41, 0x53, 0x54, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, + 0x45, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x02, 0x12, 0x06, 0x0a, 0x02, 0x49, 0x44, 0x10, 0x03, + 0x22, 0xbf, 0x01, 0x0a, 0x1a, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x4e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x12, + 0x1b, 0x0a, 0x09, 0x69, 0x64, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x08, 0x69, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x66, 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x0b, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, + 0x42, 0x0a, 0x0b, 0x73, 0x65, 0x74, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0a, 0x73, 0x65, 0x74, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x64, 0x5f, 0x69, 0x64, + 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x03, 0x52, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x65, 0x64, 0x49, + 0x64, 0x73, 0x22, 0x31, 0x0a, 0x12, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x74, 0x61, 0x67, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x74, 0x61, 0x67, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, + 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x9f, 0x03, 0x0a, 0x18, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, + 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x11, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, + 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x48, 0x00, 0x52, 0x10, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x61, 0x0a, 0x0f, 0x73, 0x74, 0x6f, 0x70, 0x5f, 0x63, 0x6f, 0x6e, + 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x38, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, + 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x42, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6f, 0x6e, + 0x73, 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x52, 0x0e, 0x73, 0x74, 0x6f, 0x70, 0x43, 0x6f, 0x6e, + 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0d, 0x6d, 0x61, 0x78, 0x5f, 0x6e, + 0x6f, 0x64, 0x65, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x3a, 0x02, + 0x32, 0x30, 0x52, 0x0b, 0x6d, 0x61, 0x78, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x1a, + 0x96, 0x01, 0x0a, 0x12, 0x42, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x43, 0x6f, 0x6e, 0x73, + 0x74, 0x72, 0x61, 0x69, 0x6e, 0x74, 0x12, 0x20, 0x0a, 0x0c, 0x6d, 0x61, 0x78, 0x5f, 0x6e, 0x75, + 0x6d, 0x5f, 0x68, 0x6f, 0x70, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x6d, 0x61, + 0x78, 0x4e, 0x75, 0x6d, 0x48, 0x6f, 0x70, 0x73, 0x12, 0x2d, 0x0a, 0x12, 0x62, 0x6f, 0x75, 0x6e, + 0x64, 0x61, 0x72, 0x79, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x11, 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x2f, 0x0a, 0x13, 0x62, 0x6f, 0x75, 0x6e, 0x64, + 0x61, 0x72, 0x79, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x62, 0x6f, 0x75, 0x6e, 0x64, 0x61, 0x72, 0x79, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x0d, 0x0a, 0x0b, 0x71, 0x75, 0x65, 0x72, + 0x79, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x84, 0x04, 0x0a, 0x1b, 0x4c, 0x69, 0x6e, 0x65, + 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x67, 0x0a, 0x12, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x69, 0x6e, 0x67, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x36, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, + 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x74, + 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x4e, 0x6f, 0x64, 0x65, 0x73, 0x48, 0x00, 0x52, 0x11, 0x73, + 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, + 0x12, 0x69, 0x0a, 0x13, 0x73, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x5f, 0x65, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x36, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, + 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, + 0x4e, 0x6f, 0x64, 0x65, 0x73, 0x48, 0x00, 0x52, 0x12, 0x73, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, + 0x67, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x20, 0x0a, 0x0c, 0x6d, + 0x61, 0x78, 0x5f, 0x6e, 0x75, 0x6d, 0x5f, 0x68, 0x6f, 0x70, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0a, 0x6d, 0x61, 0x78, 0x4e, 0x75, 0x6d, 0x48, 0x6f, 0x70, 0x73, 0x12, 0x50, 0x0a, + 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x32, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, + 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x1a, + 0x32, 0x0a, 0x0d, 0x53, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x4e, 0x6f, 0x64, 0x65, 0x73, + 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x5f, 0x71, 0x75, 0x65, 0x72, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x69, 0x6c, 0x74, 0x65, 0x72, 0x51, 0x75, + 0x65, 0x72, 0x79, 0x22, 0x57, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x12, 0x19, 0x0a, 0x15, 0x44, 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, + 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x55, + 0x50, 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x4f, 0x57, + 0x4e, 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x42, 0x49, 0x44, + 0x49, 0x52, 0x45, 0x43, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x4c, 0x10, 0x03, 0x42, 0x10, 0x0a, 0x0e, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x5f, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x2a, 0x60, + 0x0a, 0x0c, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, + 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x07, 0x0a, 0x03, 0x49, + 0x4e, 0x54, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x02, + 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, + 0x53, 0x54, 0x52, 0x55, 0x43, 0x54, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x52, 0x4f, 0x54, + 0x4f, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x42, 0x4f, 0x4f, 0x4c, 0x45, 0x41, 0x4e, 0x10, 0x06, + 0x3a, 0x7b, 0x0a, 0x15, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, + 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x95, 0xde, 0xaf, + 0xb7, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x45, + 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, + 0x54, 0x79, 0x70, 0x65, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, } var ( @@ -4762,8 +5654,8 @@ func file_ml_metadata_proto_metadata_store_proto_rawDescGZIP() []byte { return file_ml_metadata_proto_metadata_store_proto_rawDescData } -var file_ml_metadata_proto_metadata_store_proto_enumTypes = make([]protoimpl.EnumInfo, 9) -var file_ml_metadata_proto_metadata_store_proto_msgTypes = make([]protoimpl.MessageInfo, 51) +var file_ml_metadata_proto_metadata_store_proto_enumTypes = make([]protoimpl.EnumInfo, 10) +var file_ml_metadata_proto_metadata_store_proto_msgTypes = make([]protoimpl.MessageInfo, 55) var file_ml_metadata_proto_metadata_store_proto_goTypes = []interface{}{ (PropertyType)(0), // 0: ml_metadata.PropertyType (Artifact_State)(0), // 1: ml_metadata.Artifact.State @@ -4774,137 +5666,153 @@ var file_ml_metadata_proto_metadata_store_proto_goTypes = []interface{}{ (ContextType_SystemDefinedBaseType)(0), // 6: ml_metadata.ContextType.SystemDefinedBaseType (SqliteMetadataSourceConfig_ConnectionMode)(0), // 7: ml_metadata.SqliteMetadataSourceConfig.ConnectionMode (ListOperationOptions_OrderByField_Field)(0), // 8: ml_metadata.ListOperationOptions.OrderByField.Field - (*SystemTypeExtension)(nil), // 9: ml_metadata.SystemTypeExtension - (*Value)(nil), // 10: ml_metadata.Value - (*Artifact)(nil), // 11: ml_metadata.Artifact - (*ArtifactType)(nil), // 12: ml_metadata.ArtifactType - (*Event)(nil), // 13: ml_metadata.Event - (*Execution)(nil), // 14: ml_metadata.Execution - (*ExecutionType)(nil), // 15: ml_metadata.ExecutionType - (*ContextType)(nil), // 16: ml_metadata.ContextType - (*Context)(nil), // 17: ml_metadata.Context - (*Attribution)(nil), // 18: ml_metadata.Attribution - (*Association)(nil), // 19: ml_metadata.Association - (*ParentContext)(nil), // 20: ml_metadata.ParentContext - (*LineageGraph)(nil), // 21: ml_metadata.LineageGraph - (*ArtifactStructType)(nil), // 22: ml_metadata.ArtifactStructType - (*UnionArtifactStructType)(nil), // 23: ml_metadata.UnionArtifactStructType - (*IntersectionArtifactStructType)(nil), // 24: ml_metadata.IntersectionArtifactStructType - (*ListArtifactStructType)(nil), // 25: ml_metadata.ListArtifactStructType - (*NoneArtifactStructType)(nil), // 26: ml_metadata.NoneArtifactStructType - (*AnyArtifactStructType)(nil), // 27: ml_metadata.AnyArtifactStructType - (*TupleArtifactStructType)(nil), // 28: ml_metadata.TupleArtifactStructType - (*DictArtifactStructType)(nil), // 29: ml_metadata.DictArtifactStructType - (*FakeDatabaseConfig)(nil), // 30: ml_metadata.FakeDatabaseConfig - (*MySQLDatabaseConfig)(nil), // 31: ml_metadata.MySQLDatabaseConfig - (*SqliteMetadataSourceConfig)(nil), // 32: ml_metadata.SqliteMetadataSourceConfig - (*MigrationOptions)(nil), // 33: ml_metadata.MigrationOptions - (*RetryOptions)(nil), // 34: ml_metadata.RetryOptions - (*ConnectionConfig)(nil), // 35: ml_metadata.ConnectionConfig - (*GrpcChannelArguments)(nil), // 36: ml_metadata.GrpcChannelArguments - (*MetadataStoreClientConfig)(nil), // 37: ml_metadata.MetadataStoreClientConfig - (*MetadataStoreServerConfig)(nil), // 38: ml_metadata.MetadataStoreServerConfig - (*ListOperationOptions)(nil), // 39: ml_metadata.ListOperationOptions - (*ListOperationNextPageToken)(nil), // 40: ml_metadata.ListOperationNextPageToken - (*TransactionOptions)(nil), // 41: ml_metadata.TransactionOptions - (*LineageGraphQueryOptions)(nil), // 42: ml_metadata.LineageGraphQueryOptions - nil, // 43: ml_metadata.Artifact.PropertiesEntry - nil, // 44: ml_metadata.Artifact.CustomPropertiesEntry - nil, // 45: ml_metadata.ArtifactType.PropertiesEntry - (*Event_Path)(nil), // 46: ml_metadata.Event.Path - (*Event_Path_Step)(nil), // 47: ml_metadata.Event.Path.Step - nil, // 48: ml_metadata.Execution.PropertiesEntry - nil, // 49: ml_metadata.Execution.CustomPropertiesEntry - nil, // 50: ml_metadata.ExecutionType.PropertiesEntry - nil, // 51: ml_metadata.ContextType.PropertiesEntry - nil, // 52: ml_metadata.Context.PropertiesEntry - nil, // 53: ml_metadata.Context.CustomPropertiesEntry - nil, // 54: ml_metadata.DictArtifactStructType.PropertiesEntry - (*MySQLDatabaseConfig_SSLOptions)(nil), // 55: ml_metadata.MySQLDatabaseConfig.SSLOptions - (*MetadataStoreClientConfig_SSLConfig)(nil), // 56: ml_metadata.MetadataStoreClientConfig.SSLConfig - (*MetadataStoreServerConfig_SSLConfig)(nil), // 57: ml_metadata.MetadataStoreServerConfig.SSLConfig - (*ListOperationOptions_OrderByField)(nil), // 58: ml_metadata.ListOperationOptions.OrderByField - (*LineageGraphQueryOptions_BoundaryConstraint)(nil), // 59: ml_metadata.LineageGraphQueryOptions.BoundaryConstraint - (*structpb.Struct)(nil), // 60: google.protobuf.Struct - (*descriptorpb.EnumValueOptions)(nil), // 61: google.protobuf.EnumValueOptions + (LineageSubgraphQueryOptions_Direction)(0), // 9: ml_metadata.LineageSubgraphQueryOptions.Direction + (*SystemTypeExtension)(nil), // 10: ml_metadata.SystemTypeExtension + (*Value)(nil), // 11: ml_metadata.Value + (*Artifact)(nil), // 12: ml_metadata.Artifact + (*ArtifactType)(nil), // 13: ml_metadata.ArtifactType + (*Event)(nil), // 14: ml_metadata.Event + (*Execution)(nil), // 15: ml_metadata.Execution + (*ExecutionType)(nil), // 16: ml_metadata.ExecutionType + (*ContextType)(nil), // 17: ml_metadata.ContextType + (*Context)(nil), // 18: ml_metadata.Context + (*Attribution)(nil), // 19: ml_metadata.Attribution + (*Association)(nil), // 20: ml_metadata.Association + (*ParentContext)(nil), // 21: ml_metadata.ParentContext + (*LineageGraph)(nil), // 22: ml_metadata.LineageGraph + (*ArtifactStructType)(nil), // 23: ml_metadata.ArtifactStructType + (*UnionArtifactStructType)(nil), // 24: ml_metadata.UnionArtifactStructType + (*IntersectionArtifactStructType)(nil), // 25: ml_metadata.IntersectionArtifactStructType + (*ListArtifactStructType)(nil), // 26: ml_metadata.ListArtifactStructType + (*NoneArtifactStructType)(nil), // 27: ml_metadata.NoneArtifactStructType + (*AnyArtifactStructType)(nil), // 28: ml_metadata.AnyArtifactStructType + (*TupleArtifactStructType)(nil), // 29: ml_metadata.TupleArtifactStructType + (*DictArtifactStructType)(nil), // 30: ml_metadata.DictArtifactStructType + (*FakeDatabaseConfig)(nil), // 31: ml_metadata.FakeDatabaseConfig + (*MySQLDatabaseConfig)(nil), // 32: ml_metadata.MySQLDatabaseConfig + (*SqliteMetadataSourceConfig)(nil), // 33: ml_metadata.SqliteMetadataSourceConfig + (*PostgreSQLDatabaseConfig)(nil), // 34: ml_metadata.PostgreSQLDatabaseConfig + (*MigrationOptions)(nil), // 35: ml_metadata.MigrationOptions + (*RetryOptions)(nil), // 36: ml_metadata.RetryOptions + (*ConnectionConfig)(nil), // 37: ml_metadata.ConnectionConfig + (*GrpcChannelArguments)(nil), // 38: ml_metadata.GrpcChannelArguments + (*MetadataStoreClientConfig)(nil), // 39: ml_metadata.MetadataStoreClientConfig + (*MetadataStoreServerConfig)(nil), // 40: ml_metadata.MetadataStoreServerConfig + (*ListOperationOptions)(nil), // 41: ml_metadata.ListOperationOptions + (*ListOperationNextPageToken)(nil), // 42: ml_metadata.ListOperationNextPageToken + (*TransactionOptions)(nil), // 43: ml_metadata.TransactionOptions + (*LineageGraphQueryOptions)(nil), // 44: ml_metadata.LineageGraphQueryOptions + (*LineageSubgraphQueryOptions)(nil), // 45: ml_metadata.LineageSubgraphQueryOptions + nil, // 46: ml_metadata.Artifact.PropertiesEntry + nil, // 47: ml_metadata.Artifact.CustomPropertiesEntry + nil, // 48: ml_metadata.ArtifactType.PropertiesEntry + (*Event_Path)(nil), // 49: ml_metadata.Event.Path + (*Event_Path_Step)(nil), // 50: ml_metadata.Event.Path.Step + nil, // 51: ml_metadata.Execution.PropertiesEntry + nil, // 52: ml_metadata.Execution.CustomPropertiesEntry + nil, // 53: ml_metadata.ExecutionType.PropertiesEntry + nil, // 54: ml_metadata.ContextType.PropertiesEntry + nil, // 55: ml_metadata.Context.PropertiesEntry + nil, // 56: ml_metadata.Context.CustomPropertiesEntry + nil, // 57: ml_metadata.DictArtifactStructType.PropertiesEntry + (*MySQLDatabaseConfig_SSLOptions)(nil), // 58: ml_metadata.MySQLDatabaseConfig.SSLOptions + (*PostgreSQLDatabaseConfig_SSLOptions)(nil), // 59: ml_metadata.PostgreSQLDatabaseConfig.SSLOptions + (*MetadataStoreClientConfig_SSLConfig)(nil), // 60: ml_metadata.MetadataStoreClientConfig.SSLConfig + (*MetadataStoreServerConfig_SSLConfig)(nil), // 61: ml_metadata.MetadataStoreServerConfig.SSLConfig + (*ListOperationOptions_OrderByField)(nil), // 62: ml_metadata.ListOperationOptions.OrderByField + (*LineageGraphQueryOptions_BoundaryConstraint)(nil), // 63: ml_metadata.LineageGraphQueryOptions.BoundaryConstraint + (*LineageSubgraphQueryOptions_StartingNodes)(nil), // 64: ml_metadata.LineageSubgraphQueryOptions.StartingNodes + (*structpb.Struct)(nil), // 65: google.protobuf.Struct + (*anypb.Any)(nil), // 66: google.protobuf.Any + (*descriptorpb.EnumValueOptions)(nil), // 67: google.protobuf.EnumValueOptions } var file_ml_metadata_proto_metadata_store_proto_depIdxs = []int32{ - 60, // 0: ml_metadata.Value.struct_value:type_name -> google.protobuf.Struct - 43, // 1: ml_metadata.Artifact.properties:type_name -> ml_metadata.Artifact.PropertiesEntry - 44, // 2: ml_metadata.Artifact.custom_properties:type_name -> ml_metadata.Artifact.CustomPropertiesEntry - 1, // 3: ml_metadata.Artifact.state:type_name -> ml_metadata.Artifact.State - 45, // 4: ml_metadata.ArtifactType.properties:type_name -> ml_metadata.ArtifactType.PropertiesEntry - 2, // 5: ml_metadata.ArtifactType.base_type:type_name -> ml_metadata.ArtifactType.SystemDefinedBaseType - 46, // 6: ml_metadata.Event.path:type_name -> ml_metadata.Event.Path - 3, // 7: ml_metadata.Event.type:type_name -> ml_metadata.Event.Type - 4, // 8: ml_metadata.Execution.last_known_state:type_name -> ml_metadata.Execution.State - 48, // 9: ml_metadata.Execution.properties:type_name -> ml_metadata.Execution.PropertiesEntry - 49, // 10: ml_metadata.Execution.custom_properties:type_name -> ml_metadata.Execution.CustomPropertiesEntry - 50, // 11: ml_metadata.ExecutionType.properties:type_name -> ml_metadata.ExecutionType.PropertiesEntry - 22, // 12: ml_metadata.ExecutionType.input_type:type_name -> ml_metadata.ArtifactStructType - 22, // 13: ml_metadata.ExecutionType.output_type:type_name -> ml_metadata.ArtifactStructType - 5, // 14: ml_metadata.ExecutionType.base_type:type_name -> ml_metadata.ExecutionType.SystemDefinedBaseType - 51, // 15: ml_metadata.ContextType.properties:type_name -> ml_metadata.ContextType.PropertiesEntry - 6, // 16: ml_metadata.ContextType.base_type:type_name -> ml_metadata.ContextType.SystemDefinedBaseType - 52, // 17: ml_metadata.Context.properties:type_name -> ml_metadata.Context.PropertiesEntry - 53, // 18: ml_metadata.Context.custom_properties:type_name -> ml_metadata.Context.CustomPropertiesEntry - 12, // 19: ml_metadata.LineageGraph.artifact_types:type_name -> ml_metadata.ArtifactType - 15, // 20: ml_metadata.LineageGraph.execution_types:type_name -> ml_metadata.ExecutionType - 16, // 21: ml_metadata.LineageGraph.context_types:type_name -> ml_metadata.ContextType - 11, // 22: ml_metadata.LineageGraph.artifacts:type_name -> ml_metadata.Artifact - 14, // 23: ml_metadata.LineageGraph.executions:type_name -> ml_metadata.Execution - 17, // 24: ml_metadata.LineageGraph.contexts:type_name -> ml_metadata.Context - 13, // 25: ml_metadata.LineageGraph.events:type_name -> ml_metadata.Event - 18, // 26: ml_metadata.LineageGraph.attributions:type_name -> ml_metadata.Attribution - 19, // 27: ml_metadata.LineageGraph.associations:type_name -> ml_metadata.Association - 12, // 28: ml_metadata.ArtifactStructType.simple:type_name -> ml_metadata.ArtifactType - 23, // 29: ml_metadata.ArtifactStructType.union_type:type_name -> ml_metadata.UnionArtifactStructType - 24, // 30: ml_metadata.ArtifactStructType.intersection:type_name -> ml_metadata.IntersectionArtifactStructType - 25, // 31: ml_metadata.ArtifactStructType.list:type_name -> ml_metadata.ListArtifactStructType - 26, // 32: ml_metadata.ArtifactStructType.none:type_name -> ml_metadata.NoneArtifactStructType - 27, // 33: ml_metadata.ArtifactStructType.any:type_name -> ml_metadata.AnyArtifactStructType - 28, // 34: ml_metadata.ArtifactStructType.tuple:type_name -> ml_metadata.TupleArtifactStructType - 29, // 35: ml_metadata.ArtifactStructType.dict:type_name -> ml_metadata.DictArtifactStructType - 22, // 36: ml_metadata.UnionArtifactStructType.candidates:type_name -> ml_metadata.ArtifactStructType - 22, // 37: ml_metadata.IntersectionArtifactStructType.constraints:type_name -> ml_metadata.ArtifactStructType - 22, // 38: ml_metadata.ListArtifactStructType.element:type_name -> ml_metadata.ArtifactStructType - 22, // 39: ml_metadata.TupleArtifactStructType.elements:type_name -> ml_metadata.ArtifactStructType - 54, // 40: ml_metadata.DictArtifactStructType.properties:type_name -> ml_metadata.DictArtifactStructType.PropertiesEntry - 22, // 41: ml_metadata.DictArtifactStructType.extra_properties_type:type_name -> ml_metadata.ArtifactStructType - 55, // 42: ml_metadata.MySQLDatabaseConfig.ssl_options:type_name -> ml_metadata.MySQLDatabaseConfig.SSLOptions - 7, // 43: ml_metadata.SqliteMetadataSourceConfig.connection_mode:type_name -> ml_metadata.SqliteMetadataSourceConfig.ConnectionMode - 30, // 44: ml_metadata.ConnectionConfig.fake_database:type_name -> ml_metadata.FakeDatabaseConfig - 31, // 45: ml_metadata.ConnectionConfig.mysql:type_name -> ml_metadata.MySQLDatabaseConfig - 32, // 46: ml_metadata.ConnectionConfig.sqlite:type_name -> ml_metadata.SqliteMetadataSourceConfig - 34, // 47: ml_metadata.ConnectionConfig.retry_options:type_name -> ml_metadata.RetryOptions - 56, // 48: ml_metadata.MetadataStoreClientConfig.ssl_config:type_name -> ml_metadata.MetadataStoreClientConfig.SSLConfig - 36, // 49: ml_metadata.MetadataStoreClientConfig.channel_arguments:type_name -> ml_metadata.GrpcChannelArguments - 35, // 50: ml_metadata.MetadataStoreServerConfig.connection_config:type_name -> ml_metadata.ConnectionConfig - 33, // 51: ml_metadata.MetadataStoreServerConfig.migration_options:type_name -> ml_metadata.MigrationOptions - 57, // 52: ml_metadata.MetadataStoreServerConfig.ssl_config:type_name -> ml_metadata.MetadataStoreServerConfig.SSLConfig - 58, // 53: ml_metadata.ListOperationOptions.order_by_field:type_name -> ml_metadata.ListOperationOptions.OrderByField - 39, // 54: ml_metadata.ListOperationNextPageToken.set_options:type_name -> ml_metadata.ListOperationOptions - 39, // 55: ml_metadata.LineageGraphQueryOptions.artifacts_options:type_name -> ml_metadata.ListOperationOptions - 59, // 56: ml_metadata.LineageGraphQueryOptions.stop_conditions:type_name -> ml_metadata.LineageGraphQueryOptions.BoundaryConstraint - 10, // 57: ml_metadata.Artifact.PropertiesEntry.value:type_name -> ml_metadata.Value - 10, // 58: ml_metadata.Artifact.CustomPropertiesEntry.value:type_name -> ml_metadata.Value - 0, // 59: ml_metadata.ArtifactType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType - 47, // 60: ml_metadata.Event.Path.steps:type_name -> ml_metadata.Event.Path.Step - 10, // 61: ml_metadata.Execution.PropertiesEntry.value:type_name -> ml_metadata.Value - 10, // 62: ml_metadata.Execution.CustomPropertiesEntry.value:type_name -> ml_metadata.Value - 0, // 63: ml_metadata.ExecutionType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType - 0, // 64: ml_metadata.ContextType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType - 10, // 65: ml_metadata.Context.PropertiesEntry.value:type_name -> ml_metadata.Value - 10, // 66: ml_metadata.Context.CustomPropertiesEntry.value:type_name -> ml_metadata.Value - 22, // 67: ml_metadata.DictArtifactStructType.PropertiesEntry.value:type_name -> ml_metadata.ArtifactStructType - 8, // 68: ml_metadata.ListOperationOptions.OrderByField.field:type_name -> ml_metadata.ListOperationOptions.OrderByField.Field - 61, // 69: ml_metadata.system_type_extension:extendee -> google.protobuf.EnumValueOptions - 9, // 70: ml_metadata.system_type_extension:type_name -> ml_metadata.SystemTypeExtension - 71, // [71:71] is the sub-list for method output_type - 71, // [71:71] is the sub-list for method input_type - 70, // [70:71] is the sub-list for extension type_name - 69, // [69:70] is the sub-list for extension extendee - 0, // [0:69] is the sub-list for field type_name + 65, // 0: ml_metadata.Value.struct_value:type_name -> google.protobuf.Struct + 66, // 1: ml_metadata.Value.proto_value:type_name -> google.protobuf.Any + 46, // 2: ml_metadata.Artifact.properties:type_name -> ml_metadata.Artifact.PropertiesEntry + 47, // 3: ml_metadata.Artifact.custom_properties:type_name -> ml_metadata.Artifact.CustomPropertiesEntry + 1, // 4: ml_metadata.Artifact.state:type_name -> ml_metadata.Artifact.State + 66, // 5: ml_metadata.Artifact.system_metadata:type_name -> google.protobuf.Any + 48, // 6: ml_metadata.ArtifactType.properties:type_name -> ml_metadata.ArtifactType.PropertiesEntry + 2, // 7: ml_metadata.ArtifactType.base_type:type_name -> ml_metadata.ArtifactType.SystemDefinedBaseType + 49, // 8: ml_metadata.Event.path:type_name -> ml_metadata.Event.Path + 3, // 9: ml_metadata.Event.type:type_name -> ml_metadata.Event.Type + 66, // 10: ml_metadata.Event.system_metadata:type_name -> google.protobuf.Any + 4, // 11: ml_metadata.Execution.last_known_state:type_name -> ml_metadata.Execution.State + 51, // 12: ml_metadata.Execution.properties:type_name -> ml_metadata.Execution.PropertiesEntry + 52, // 13: ml_metadata.Execution.custom_properties:type_name -> ml_metadata.Execution.CustomPropertiesEntry + 66, // 14: ml_metadata.Execution.system_metadata:type_name -> google.protobuf.Any + 53, // 15: ml_metadata.ExecutionType.properties:type_name -> ml_metadata.ExecutionType.PropertiesEntry + 23, // 16: ml_metadata.ExecutionType.input_type:type_name -> ml_metadata.ArtifactStructType + 23, // 17: ml_metadata.ExecutionType.output_type:type_name -> ml_metadata.ArtifactStructType + 5, // 18: ml_metadata.ExecutionType.base_type:type_name -> ml_metadata.ExecutionType.SystemDefinedBaseType + 54, // 19: ml_metadata.ContextType.properties:type_name -> ml_metadata.ContextType.PropertiesEntry + 6, // 20: ml_metadata.ContextType.base_type:type_name -> ml_metadata.ContextType.SystemDefinedBaseType + 55, // 21: ml_metadata.Context.properties:type_name -> ml_metadata.Context.PropertiesEntry + 56, // 22: ml_metadata.Context.custom_properties:type_name -> ml_metadata.Context.CustomPropertiesEntry + 66, // 23: ml_metadata.Context.system_metadata:type_name -> google.protobuf.Any + 13, // 24: ml_metadata.LineageGraph.artifact_types:type_name -> ml_metadata.ArtifactType + 16, // 25: ml_metadata.LineageGraph.execution_types:type_name -> ml_metadata.ExecutionType + 17, // 26: ml_metadata.LineageGraph.context_types:type_name -> ml_metadata.ContextType + 12, // 27: ml_metadata.LineageGraph.artifacts:type_name -> ml_metadata.Artifact + 15, // 28: ml_metadata.LineageGraph.executions:type_name -> ml_metadata.Execution + 18, // 29: ml_metadata.LineageGraph.contexts:type_name -> ml_metadata.Context + 14, // 30: ml_metadata.LineageGraph.events:type_name -> ml_metadata.Event + 19, // 31: ml_metadata.LineageGraph.attributions:type_name -> ml_metadata.Attribution + 20, // 32: ml_metadata.LineageGraph.associations:type_name -> ml_metadata.Association + 13, // 33: ml_metadata.ArtifactStructType.simple:type_name -> ml_metadata.ArtifactType + 24, // 34: ml_metadata.ArtifactStructType.union_type:type_name -> ml_metadata.UnionArtifactStructType + 25, // 35: ml_metadata.ArtifactStructType.intersection:type_name -> ml_metadata.IntersectionArtifactStructType + 26, // 36: ml_metadata.ArtifactStructType.list:type_name -> ml_metadata.ListArtifactStructType + 27, // 37: ml_metadata.ArtifactStructType.none:type_name -> ml_metadata.NoneArtifactStructType + 28, // 38: ml_metadata.ArtifactStructType.any:type_name -> ml_metadata.AnyArtifactStructType + 29, // 39: ml_metadata.ArtifactStructType.tuple:type_name -> ml_metadata.TupleArtifactStructType + 30, // 40: ml_metadata.ArtifactStructType.dict:type_name -> ml_metadata.DictArtifactStructType + 23, // 41: ml_metadata.UnionArtifactStructType.candidates:type_name -> ml_metadata.ArtifactStructType + 23, // 42: ml_metadata.IntersectionArtifactStructType.constraints:type_name -> ml_metadata.ArtifactStructType + 23, // 43: ml_metadata.ListArtifactStructType.element:type_name -> ml_metadata.ArtifactStructType + 23, // 44: ml_metadata.TupleArtifactStructType.elements:type_name -> ml_metadata.ArtifactStructType + 57, // 45: ml_metadata.DictArtifactStructType.properties:type_name -> ml_metadata.DictArtifactStructType.PropertiesEntry + 23, // 46: ml_metadata.DictArtifactStructType.extra_properties_type:type_name -> ml_metadata.ArtifactStructType + 58, // 47: ml_metadata.MySQLDatabaseConfig.ssl_options:type_name -> ml_metadata.MySQLDatabaseConfig.SSLOptions + 7, // 48: ml_metadata.SqliteMetadataSourceConfig.connection_mode:type_name -> ml_metadata.SqliteMetadataSourceConfig.ConnectionMode + 59, // 49: ml_metadata.PostgreSQLDatabaseConfig.ssloption:type_name -> ml_metadata.PostgreSQLDatabaseConfig.SSLOptions + 31, // 50: ml_metadata.ConnectionConfig.fake_database:type_name -> ml_metadata.FakeDatabaseConfig + 32, // 51: ml_metadata.ConnectionConfig.mysql:type_name -> ml_metadata.MySQLDatabaseConfig + 33, // 52: ml_metadata.ConnectionConfig.sqlite:type_name -> ml_metadata.SqliteMetadataSourceConfig + 34, // 53: ml_metadata.ConnectionConfig.postgresql:type_name -> ml_metadata.PostgreSQLDatabaseConfig + 36, // 54: ml_metadata.ConnectionConfig.retry_options:type_name -> ml_metadata.RetryOptions + 60, // 55: ml_metadata.MetadataStoreClientConfig.ssl_config:type_name -> ml_metadata.MetadataStoreClientConfig.SSLConfig + 38, // 56: ml_metadata.MetadataStoreClientConfig.channel_arguments:type_name -> ml_metadata.GrpcChannelArguments + 37, // 57: ml_metadata.MetadataStoreServerConfig.connection_config:type_name -> ml_metadata.ConnectionConfig + 35, // 58: ml_metadata.MetadataStoreServerConfig.migration_options:type_name -> ml_metadata.MigrationOptions + 61, // 59: ml_metadata.MetadataStoreServerConfig.ssl_config:type_name -> ml_metadata.MetadataStoreServerConfig.SSLConfig + 62, // 60: ml_metadata.ListOperationOptions.order_by_field:type_name -> ml_metadata.ListOperationOptions.OrderByField + 41, // 61: ml_metadata.ListOperationNextPageToken.set_options:type_name -> ml_metadata.ListOperationOptions + 41, // 62: ml_metadata.LineageGraphQueryOptions.artifacts_options:type_name -> ml_metadata.ListOperationOptions + 63, // 63: ml_metadata.LineageGraphQueryOptions.stop_conditions:type_name -> ml_metadata.LineageGraphQueryOptions.BoundaryConstraint + 64, // 64: ml_metadata.LineageSubgraphQueryOptions.starting_artifacts:type_name -> ml_metadata.LineageSubgraphQueryOptions.StartingNodes + 64, // 65: ml_metadata.LineageSubgraphQueryOptions.starting_executions:type_name -> ml_metadata.LineageSubgraphQueryOptions.StartingNodes + 9, // 66: ml_metadata.LineageSubgraphQueryOptions.direction:type_name -> ml_metadata.LineageSubgraphQueryOptions.Direction + 11, // 67: ml_metadata.Artifact.PropertiesEntry.value:type_name -> ml_metadata.Value + 11, // 68: ml_metadata.Artifact.CustomPropertiesEntry.value:type_name -> ml_metadata.Value + 0, // 69: ml_metadata.ArtifactType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType + 50, // 70: ml_metadata.Event.Path.steps:type_name -> ml_metadata.Event.Path.Step + 11, // 71: ml_metadata.Execution.PropertiesEntry.value:type_name -> ml_metadata.Value + 11, // 72: ml_metadata.Execution.CustomPropertiesEntry.value:type_name -> ml_metadata.Value + 0, // 73: ml_metadata.ExecutionType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType + 0, // 74: ml_metadata.ContextType.PropertiesEntry.value:type_name -> ml_metadata.PropertyType + 11, // 75: ml_metadata.Context.PropertiesEntry.value:type_name -> ml_metadata.Value + 11, // 76: ml_metadata.Context.CustomPropertiesEntry.value:type_name -> ml_metadata.Value + 23, // 77: ml_metadata.DictArtifactStructType.PropertiesEntry.value:type_name -> ml_metadata.ArtifactStructType + 8, // 78: ml_metadata.ListOperationOptions.OrderByField.field:type_name -> ml_metadata.ListOperationOptions.OrderByField.Field + 67, // 79: ml_metadata.system_type_extension:extendee -> google.protobuf.EnumValueOptions + 10, // 80: ml_metadata.system_type_extension:type_name -> ml_metadata.SystemTypeExtension + 81, // [81:81] is the sub-list for method output_type + 81, // [81:81] is the sub-list for method input_type + 80, // [80:81] is the sub-list for extension type_name + 79, // [79:80] is the sub-list for extension extendee + 0, // [0:79] is the sub-list for field type_name } func init() { file_ml_metadata_proto_metadata_store_proto_init() } @@ -5202,7 +6110,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MigrationOptions); i { + switch v := v.(*PostgreSQLDatabaseConfig); i { case 0: return &v.state case 1: @@ -5214,7 +6122,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*RetryOptions); i { + switch v := v.(*MigrationOptions); i { case 0: return &v.state case 1: @@ -5226,7 +6134,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ConnectionConfig); i { + switch v := v.(*RetryOptions); i { case 0: return &v.state case 1: @@ -5238,7 +6146,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GrpcChannelArguments); i { + switch v := v.(*ConnectionConfig); i { case 0: return &v.state case 1: @@ -5250,7 +6158,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[28].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MetadataStoreClientConfig); i { + switch v := v.(*GrpcChannelArguments); i { case 0: return &v.state case 1: @@ -5262,7 +6170,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[29].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MetadataStoreServerConfig); i { + switch v := v.(*MetadataStoreClientConfig); i { case 0: return &v.state case 1: @@ -5274,7 +6182,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[30].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ListOperationOptions); i { + switch v := v.(*MetadataStoreServerConfig); i { case 0: return &v.state case 1: @@ -5286,7 +6194,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[31].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ListOperationNextPageToken); i { + switch v := v.(*ListOperationOptions); i { case 0: return &v.state case 1: @@ -5298,6 +6206,18 @@ func file_ml_metadata_proto_metadata_store_proto_init() { } } file_ml_metadata_proto_metadata_store_proto_msgTypes[32].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ListOperationNextPageToken); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_proto_msgTypes[33].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*TransactionOptions); i { case 0: return &v.state @@ -5311,7 +6231,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[33].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[34].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*LineageGraphQueryOptions); i { case 0: return &v.state @@ -5323,7 +6243,19 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[37].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[35].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LineageSubgraphQueryOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_proto_msgTypes[39].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*Event_Path); i { case 0: return &v.state @@ -5335,7 +6267,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[38].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[40].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*Event_Path_Step); i { case 0: return &v.state @@ -5347,7 +6279,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[46].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[48].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*MySQLDatabaseConfig_SSLOptions); i { case 0: return &v.state @@ -5359,7 +6291,19 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[47].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[49].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PostgreSQLDatabaseConfig_SSLOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_proto_msgTypes[50].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*MetadataStoreClientConfig_SSLConfig); i { case 0: return &v.state @@ -5371,7 +6315,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[48].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[51].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*MetadataStoreServerConfig_SSLConfig); i { case 0: return &v.state @@ -5383,7 +6327,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[49].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[52].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*ListOperationOptions_OrderByField); i { case 0: return &v.state @@ -5395,7 +6339,7 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_proto_msgTypes[50].Exporter = func(v interface{}, i int) interface{} { + file_ml_metadata_proto_metadata_store_proto_msgTypes[53].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*LineageGraphQueryOptions_BoundaryConstraint); i { case 0: return &v.state @@ -5407,12 +6351,26 @@ func file_ml_metadata_proto_metadata_store_proto_init() { return nil } } + file_ml_metadata_proto_metadata_store_proto_msgTypes[54].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*LineageSubgraphQueryOptions_StartingNodes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } file_ml_metadata_proto_metadata_store_proto_msgTypes[1].OneofWrappers = []interface{}{ (*Value_IntValue)(nil), (*Value_DoubleValue)(nil), (*Value_StringValue)(nil), (*Value_StructValue)(nil), + (*Value_ProtoValue)(nil), + (*Value_BoolValue)(nil), } file_ml_metadata_proto_metadata_store_proto_msgTypes[13].OneofWrappers = []interface{}{ (*ArtifactStructType_Simple)(nil), @@ -5424,15 +6382,20 @@ func file_ml_metadata_proto_metadata_store_proto_init() { (*ArtifactStructType_Tuple)(nil), (*ArtifactStructType_Dict)(nil), } - file_ml_metadata_proto_metadata_store_proto_msgTypes[26].OneofWrappers = []interface{}{ + file_ml_metadata_proto_metadata_store_proto_msgTypes[27].OneofWrappers = []interface{}{ (*ConnectionConfig_FakeDatabase)(nil), (*ConnectionConfig_Mysql)(nil), (*ConnectionConfig_Sqlite)(nil), + (*ConnectionConfig_Postgresql)(nil), } - file_ml_metadata_proto_metadata_store_proto_msgTypes[33].OneofWrappers = []interface{}{ + file_ml_metadata_proto_metadata_store_proto_msgTypes[34].OneofWrappers = []interface{}{ (*LineageGraphQueryOptions_ArtifactsOptions)(nil), } - file_ml_metadata_proto_metadata_store_proto_msgTypes[38].OneofWrappers = []interface{}{ + file_ml_metadata_proto_metadata_store_proto_msgTypes[35].OneofWrappers = []interface{}{ + (*LineageSubgraphQueryOptions_StartingArtifacts)(nil), + (*LineageSubgraphQueryOptions_StartingExecutions)(nil), + } + file_ml_metadata_proto_metadata_store_proto_msgTypes[40].OneofWrappers = []interface{}{ (*Event_Path_Step_Index)(nil), (*Event_Path_Step_Key)(nil), } @@ -5441,8 +6404,8 @@ func file_ml_metadata_proto_metadata_store_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_ml_metadata_proto_metadata_store_proto_rawDesc, - NumEnums: 9, - NumMessages: 51, + NumEnums: 10, + NumMessages: 55, NumExtensions: 1, NumServices: 0, }, diff --git a/third_party/ml-metadata/go/ml_metadata/metadata_store_service.pb.go b/third_party/ml-metadata/go/ml_metadata/metadata_store_service.pb.go index 35c5beef3f..3bf18dd322 100644 --- a/third_party/ml-metadata/go/ml_metadata/metadata_store_service.pb.go +++ b/third_party/ml-metadata/go/ml_metadata/metadata_store_service.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.26.0 -// protoc v3.17.3 +// protoc v3.15.8 // source: ml_metadata/proto/metadata_store_service.proto package ml_metadata @@ -24,6 +24,7 @@ package ml_metadata import ( protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" + fieldmaskpb "google.golang.org/protobuf/types/known/fieldmaskpb" reflect "reflect" sync "sync" ) @@ -296,6 +297,92 @@ type PutArtifactsRequest struct { Artifacts []*Artifact `protobuf:"bytes,1,rep,name=artifacts" json:"artifacts,omitempty"` // Additional options to change the behavior of the method. Options *PutArtifactsRequest_Options `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + // FieldMask for artifacts in the PUT update + // If `artifact.id` is not specified, it means a new artifact will be created + // and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the artifacts as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Examples that update `properties` / `custom_properties`: + // 1.1 Add a <'key', 'val'> pair into `custom_properties`: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 1.2 Set `custom_properties['key'].bool_value` to true: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // bool_value: true + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 1.3 Delete the complete <'key', 'val'> pair from `custom_properties`: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties {} + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Examples that update fields such as `uri`, `external_id`, etc: + // 2.1 Update `external_id` field: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // external_id: "new_value" + // } + // update_mask { + // paths: "external_id" + // } + // } + // 2.2 Set `uri` field: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // uri: "set_value" + // } + // update_mask { + // paths: "uri" + // } + // } + // If `paths: "properties"` or `paths: "custom_properties"` are added to + // `update_mask`, the key-level updates will be ignored and we only perform + // field-level updates on the all `properties`/`custom_properties`. + // For example: + // If the mask is: {"properties", "properties.key1"}, the field path + // "properties.key1" will be ignored and all `properties` will be updated. + // (Do not suggest) + // If the mask is {"properties", "external_id"}, all + // `properties` and field `external_id` will be updated. (Do not suggest) + UpdateMask *fieldmaskpb.FieldMask `protobuf:"bytes,4,opt,name=update_mask,json=updateMask" json:"update_mask,omitempty"` } func (x *PutArtifactsRequest) Reset() { @@ -344,6 +431,20 @@ func (x *PutArtifactsRequest) GetOptions() *PutArtifactsRequest_Options { return nil } +func (x *PutArtifactsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +func (x *PutArtifactsRequest) GetUpdateMask() *fieldmaskpb.FieldMask { + if x != nil { + return x.UpdateMask + } + return nil +} + type PutArtifactsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -417,6 +518,8 @@ type PutArtifactTypeRequest struct { CanDeleteFields *bool `protobuf:"varint,3,opt,name=can_delete_fields,json=canDeleteFields" json:"can_delete_fields,omitempty"` // Deprecated: Do not use. AllFieldsMatch *bool `protobuf:"varint,4,opt,name=all_fields_match,json=allFieldsMatch,def=1" json:"all_fields_match,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,6,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } // Default values for PutArtifactTypeRequest fields. @@ -493,6 +596,13 @@ func (x *PutArtifactTypeRequest) GetAllFieldsMatch() bool { return Default_PutArtifactTypeRequest_AllFieldsMatch } +func (x *PutArtifactTypeRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutArtifactTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -547,6 +657,44 @@ type PutExecutionsRequest struct { unknownFields protoimpl.UnknownFields Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + // FieldMask for executions in the PUT update + // If `execution.id` is not specified, it means a new execution will be + // created and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the executions as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Add a <'key', 'val'> pair into `custom_properties`: + // { + // executions { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Set `last_known_state` field: + // { + // executions { + // id: 1234 + // type_id: 5678 + // last_known_state: CACHED + // } + // update_mask { + // paths: "last_known_state" + // } + // } + // Please refer to `PutArtifactsRequest` for more details. + UpdateMask *fieldmaskpb.FieldMask `protobuf:"bytes,3,opt,name=update_mask,json=updateMask" json:"update_mask,omitempty"` } func (x *PutExecutionsRequest) Reset() { @@ -588,6 +736,20 @@ func (x *PutExecutionsRequest) GetExecutions() []*Execution { return nil } +func (x *PutExecutionsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +func (x *PutExecutionsRequest) GetUpdateMask() *fieldmaskpb.FieldMask { + if x != nil { + return x.UpdateMask + } + return nil +} + type PutExecutionsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -661,6 +823,8 @@ type PutExecutionTypeRequest struct { CanDeleteFields *bool `protobuf:"varint,3,opt,name=can_delete_fields,json=canDeleteFields" json:"can_delete_fields,omitempty"` // Deprecated: Do not use. AllFieldsMatch *bool `protobuf:"varint,4,opt,name=all_fields_match,json=allFieldsMatch,def=1" json:"all_fields_match,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,6,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } // Default values for PutExecutionTypeRequest fields. @@ -737,6 +901,13 @@ func (x *PutExecutionTypeRequest) GetAllFieldsMatch() bool { return Default_PutExecutionTypeRequest_AllFieldsMatch } +func (x *PutExecutionTypeRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutExecutionTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -791,6 +962,8 @@ type PutEventsRequest struct { unknownFields protoimpl.UnknownFields Events []*Event `protobuf:"bytes,1,rep,name=events" json:"events,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } func (x *PutEventsRequest) Reset() { @@ -832,6 +1005,13 @@ func (x *PutEventsRequest) GetEvents() []*Event { return nil } +func (x *PutEventsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutEventsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -888,6 +1068,8 @@ type PutExecutionRequest struct { Contexts []*Context `protobuf:"bytes,3,rep,name=contexts" json:"contexts,omitempty"` // Additional options to change the behavior of the method. Options *PutExecutionRequest_Options `protobuf:"bytes,4,opt,name=options" json:"options,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,5,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } func (x *PutExecutionRequest) Reset() { @@ -950,6 +1132,13 @@ func (x *PutExecutionRequest) GetOptions() *PutExecutionRequest_Options { return nil } +func (x *PutExecutionRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutExecutionResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1018,6 +1207,159 @@ func (x *PutExecutionResponse) GetContextIds() []int64 { return nil } +type PutLineageSubgraphRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` + Artifacts []*Artifact `protobuf:"bytes,2,rep,name=artifacts" json:"artifacts,omitempty"` + Contexts []*Context `protobuf:"bytes,3,rep,name=contexts" json:"contexts,omitempty"` + EventEdges []*PutLineageSubgraphRequest_EventEdge `protobuf:"bytes,4,rep,name=event_edges,json=eventEdges" json:"event_edges,omitempty"` + Options *PutLineageSubgraphRequest_Options `protobuf:"bytes,5,opt,name=options" json:"options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,6,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *PutLineageSubgraphRequest) Reset() { + *x = PutLineageSubgraphRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutLineageSubgraphRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutLineageSubgraphRequest) ProtoMessage() {} + +func (x *PutLineageSubgraphRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutLineageSubgraphRequest.ProtoReflect.Descriptor instead. +func (*PutLineageSubgraphRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{16} +} + +func (x *PutLineageSubgraphRequest) GetExecutions() []*Execution { + if x != nil { + return x.Executions + } + return nil +} + +func (x *PutLineageSubgraphRequest) GetArtifacts() []*Artifact { + if x != nil { + return x.Artifacts + } + return nil +} + +func (x *PutLineageSubgraphRequest) GetContexts() []*Context { + if x != nil { + return x.Contexts + } + return nil +} + +func (x *PutLineageSubgraphRequest) GetEventEdges() []*PutLineageSubgraphRequest_EventEdge { + if x != nil { + return x.EventEdges + } + return nil +} + +func (x *PutLineageSubgraphRequest) GetOptions() *PutLineageSubgraphRequest_Options { + if x != nil { + return x.Options + } + return nil +} + +func (x *PutLineageSubgraphRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type PutLineageSubgraphResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // A list of execution ids index-aligned with `executions` in the request + ExecutionIds []int64 `protobuf:"varint,1,rep,packed,name=execution_ids,json=executionIds" json:"execution_ids,omitempty"` + // A list of artifact ids index-aligned with `artifacts` in the request + ArtifactIds []int64 `protobuf:"varint,2,rep,packed,name=artifact_ids,json=artifactIds" json:"artifact_ids,omitempty"` + // A list of context ids index-aligned with `contexts` in the request + ContextIds []int64 `protobuf:"varint,3,rep,packed,name=context_ids,json=contextIds" json:"context_ids,omitempty"` +} + +func (x *PutLineageSubgraphResponse) Reset() { + *x = PutLineageSubgraphResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutLineageSubgraphResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutLineageSubgraphResponse) ProtoMessage() {} + +func (x *PutLineageSubgraphResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutLineageSubgraphResponse.ProtoReflect.Descriptor instead. +func (*PutLineageSubgraphResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{17} +} + +func (x *PutLineageSubgraphResponse) GetExecutionIds() []int64 { + if x != nil { + return x.ExecutionIds + } + return nil +} + +func (x *PutLineageSubgraphResponse) GetArtifactIds() []int64 { + if x != nil { + return x.ArtifactIds + } + return nil +} + +func (x *PutLineageSubgraphResponse) GetContextIds() []int64 { + if x != nil { + return x.ContextIds + } + return nil +} + type PutTypesRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1041,6 +1383,8 @@ type PutTypesRequest struct { CanDeleteFields *bool `protobuf:"varint,5,opt,name=can_delete_fields,json=canDeleteFields" json:"can_delete_fields,omitempty"` // Deprecated: Do not use. AllFieldsMatch *bool `protobuf:"varint,6,opt,name=all_fields_match,json=allFieldsMatch,def=1" json:"all_fields_match,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,8,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } // Default values for PutTypesRequest fields. @@ -1051,7 +1395,7 @@ const ( func (x *PutTypesRequest) Reset() { *x = PutTypesRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1064,7 +1408,7 @@ func (x *PutTypesRequest) String() string { func (*PutTypesRequest) ProtoMessage() {} func (x *PutTypesRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1077,7 +1421,7 @@ func (x *PutTypesRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PutTypesRequest.ProtoReflect.Descriptor instead. func (*PutTypesRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{16} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{18} } func (x *PutTypesRequest) GetArtifactTypes() []*ArtifactType { @@ -1131,6 +1475,13 @@ func (x *PutTypesRequest) GetAllFieldsMatch() bool { return Default_PutTypesRequest_AllFieldsMatch } +func (x *PutTypesRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutTypesResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1147,7 +1498,7 @@ type PutTypesResponse struct { func (x *PutTypesResponse) Reset() { *x = PutTypesResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1160,7 +1511,7 @@ func (x *PutTypesResponse) String() string { func (*PutTypesResponse) ProtoMessage() {} func (x *PutTypesResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1173,7 +1524,7 @@ func (x *PutTypesResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PutTypesResponse.ProtoReflect.Descriptor instead. func (*PutTypesResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{17} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{19} } func (x *PutTypesResponse) GetArtifactTypeIds() []int64 { @@ -1222,6 +1573,8 @@ type PutContextTypeRequest struct { CanDeleteFields *bool `protobuf:"varint,3,opt,name=can_delete_fields,json=canDeleteFields" json:"can_delete_fields,omitempty"` // Deprecated: Do not use. AllFieldsMatch *bool `protobuf:"varint,4,opt,name=all_fields_match,json=allFieldsMatch,def=1" json:"all_fields_match,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,6,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } // Default values for PutContextTypeRequest fields. @@ -1232,7 +1585,7 @@ const ( func (x *PutContextTypeRequest) Reset() { *x = PutContextTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1245,7 +1598,7 @@ func (x *PutContextTypeRequest) String() string { func (*PutContextTypeRequest) ProtoMessage() {} func (x *PutContextTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1258,7 +1611,7 @@ func (x *PutContextTypeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PutContextTypeRequest.ProtoReflect.Descriptor instead. func (*PutContextTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{18} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{20} } func (x *PutContextTypeRequest) GetContextType() *ContextType { @@ -1298,6 +1651,13 @@ func (x *PutContextTypeRequest) GetAllFieldsMatch() bool { return Default_PutContextTypeRequest_AllFieldsMatch } +func (x *PutContextTypeRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutContextTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1310,7 +1670,7 @@ type PutContextTypeResponse struct { func (x *PutContextTypeResponse) Reset() { *x = PutContextTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1323,7 +1683,7 @@ func (x *PutContextTypeResponse) String() string { func (*PutContextTypeResponse) ProtoMessage() {} func (x *PutContextTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1336,7 +1696,7 @@ func (x *PutContextTypeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PutContextTypeResponse.ProtoReflect.Descriptor instead. func (*PutContextTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{19} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{21} } func (x *PutContextTypeResponse) GetTypeId() int64 { @@ -1352,12 +1712,50 @@ type PutContextsRequest struct { unknownFields protoimpl.UnknownFields Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + // FieldMask for contexts in the PUT update + // If `context.id` is not specified, it means a new context will be + // created and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the contexts as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Add a <'key', 'val'> pair into `custom_properties`: + // { + // contexts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Set `name` field: + // { + // contexts { + // id: 1234 + // type_id: 5678 + // name: "set_name" + // } + // update_mask { + // paths: "name" + // } + // } + // Please refer to `PutArtifactsRequest` for more details. + UpdateMask *fieldmaskpb.FieldMask `protobuf:"bytes,3,opt,name=update_mask,json=updateMask" json:"update_mask,omitempty"` } func (x *PutContextsRequest) Reset() { *x = PutContextsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1370,7 +1768,7 @@ func (x *PutContextsRequest) String() string { func (*PutContextsRequest) ProtoMessage() {} func (x *PutContextsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1383,7 +1781,7 @@ func (x *PutContextsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PutContextsRequest.ProtoReflect.Descriptor instead. func (*PutContextsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{20} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{22} } func (x *PutContextsRequest) GetContexts() []*Context { @@ -1393,6 +1791,20 @@ func (x *PutContextsRequest) GetContexts() []*Context { return nil } +func (x *PutContextsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +func (x *PutContextsRequest) GetUpdateMask() *fieldmaskpb.FieldMask { + if x != nil { + return x.UpdateMask + } + return nil +} + type PutContextsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1405,7 +1817,7 @@ type PutContextsResponse struct { func (x *PutContextsResponse) Reset() { *x = PutContextsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1418,7 +1830,7 @@ func (x *PutContextsResponse) String() string { func (*PutContextsResponse) ProtoMessage() {} func (x *PutContextsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1431,7 +1843,7 @@ func (x *PutContextsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PutContextsResponse.ProtoReflect.Descriptor instead. func (*PutContextsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{21} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{23} } func (x *PutContextsResponse) GetContextIds() []int64 { @@ -1448,12 +1860,14 @@ type PutAttributionsAndAssociationsRequest struct { Attributions []*Attribution `protobuf:"bytes,1,rep,name=attributions" json:"attributions,omitempty"` Associations []*Association `protobuf:"bytes,2,rep,name=associations" json:"associations,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } func (x *PutAttributionsAndAssociationsRequest) Reset() { *x = PutAttributionsAndAssociationsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1466,7 +1880,7 @@ func (x *PutAttributionsAndAssociationsRequest) String() string { func (*PutAttributionsAndAssociationsRequest) ProtoMessage() {} func (x *PutAttributionsAndAssociationsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1479,7 +1893,7 @@ func (x *PutAttributionsAndAssociationsRequest) ProtoReflect() protoreflect.Mess // Deprecated: Use PutAttributionsAndAssociationsRequest.ProtoReflect.Descriptor instead. func (*PutAttributionsAndAssociationsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{22} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{24} } func (x *PutAttributionsAndAssociationsRequest) GetAttributions() []*Attribution { @@ -1496,6 +1910,13 @@ func (x *PutAttributionsAndAssociationsRequest) GetAssociations() []*Association return nil } +func (x *PutAttributionsAndAssociationsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutAttributionsAndAssociationsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1505,7 +1926,7 @@ type PutAttributionsAndAssociationsResponse struct { func (x *PutAttributionsAndAssociationsResponse) Reset() { *x = PutAttributionsAndAssociationsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1518,7 +1939,7 @@ func (x *PutAttributionsAndAssociationsResponse) String() string { func (*PutAttributionsAndAssociationsResponse) ProtoMessage() {} func (x *PutAttributionsAndAssociationsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1531,7 +1952,7 @@ func (x *PutAttributionsAndAssociationsResponse) ProtoReflect() protoreflect.Mes // Deprecated: Use PutAttributionsAndAssociationsResponse.ProtoReflect.Descriptor instead. func (*PutAttributionsAndAssociationsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{23} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{25} } type PutParentContextsRequest struct { @@ -1540,12 +1961,14 @@ type PutParentContextsRequest struct { unknownFields protoimpl.UnknownFields ParentContexts []*ParentContext `protobuf:"bytes,1,rep,name=parent_contexts,json=parentContexts" json:"parent_contexts,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } func (x *PutParentContextsRequest) Reset() { *x = PutParentContextsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1558,7 +1981,7 @@ func (x *PutParentContextsRequest) String() string { func (*PutParentContextsRequest) ProtoMessage() {} func (x *PutParentContextsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1571,7 +1994,7 @@ func (x *PutParentContextsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use PutParentContextsRequest.ProtoReflect.Descriptor instead. func (*PutParentContextsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{24} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{26} } func (x *PutParentContextsRequest) GetParentContexts() []*ParentContext { @@ -1581,6 +2004,13 @@ func (x *PutParentContextsRequest) GetParentContexts() []*ParentContext { return nil } +func (x *PutParentContextsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + type PutParentContextsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -1590,7 +2020,7 @@ type PutParentContextsResponse struct { func (x *PutParentContextsResponse) Reset() { *x = PutParentContextsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1603,7 +2033,7 @@ func (x *PutParentContextsResponse) String() string { func (*PutParentContextsResponse) ProtoMessage() {} func (x *PutParentContextsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1616,7 +2046,7 @@ func (x *PutParentContextsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use PutParentContextsResponse.ProtoReflect.Descriptor instead. func (*PutParentContextsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{25} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{27} } type GetArtifactsByTypeRequest struct { @@ -1631,6 +2061,12 @@ type GetArtifactsByTypeRequest struct { // Currently supports: // 1. Field to order the results. // 2. Page size. + // If set, the request will + // first fetch all artifacts with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Artifacts of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. Options *ListOperationOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` @@ -1639,7 +2075,7 @@ type GetArtifactsByTypeRequest struct { func (x *GetArtifactsByTypeRequest) Reset() { *x = GetArtifactsByTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1652,7 +2088,7 @@ func (x *GetArtifactsByTypeRequest) String() string { func (*GetArtifactsByTypeRequest) ProtoMessage() {} func (x *GetArtifactsByTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1665,7 +2101,7 @@ func (x *GetArtifactsByTypeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByTypeRequest.ProtoReflect.Descriptor instead. func (*GetArtifactsByTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{26} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{28} } func (x *GetArtifactsByTypeRequest) GetTypeName() string { @@ -1710,7 +2146,7 @@ type GetArtifactsByTypeResponse struct { func (x *GetArtifactsByTypeResponse) Reset() { *x = GetArtifactsByTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1723,7 +2159,7 @@ func (x *GetArtifactsByTypeResponse) String() string { func (*GetArtifactsByTypeResponse) ProtoMessage() {} func (x *GetArtifactsByTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1736,7 +2172,7 @@ func (x *GetArtifactsByTypeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByTypeResponse.ProtoReflect.Descriptor instead. func (*GetArtifactsByTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{27} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{29} } func (x *GetArtifactsByTypeResponse) GetArtifacts() []*Artifact { @@ -1770,7 +2206,7 @@ type GetArtifactByTypeAndNameRequest struct { func (x *GetArtifactByTypeAndNameRequest) Reset() { *x = GetArtifactByTypeAndNameRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1783,7 +2219,7 @@ func (x *GetArtifactByTypeAndNameRequest) String() string { func (*GetArtifactByTypeAndNameRequest) ProtoMessage() {} func (x *GetArtifactByTypeAndNameRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1796,7 +2232,7 @@ func (x *GetArtifactByTypeAndNameRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactByTypeAndNameRequest.ProtoReflect.Descriptor instead. func (*GetArtifactByTypeAndNameRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{28} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{30} } func (x *GetArtifactByTypeAndNameRequest) GetTypeName() string { @@ -1838,7 +2274,7 @@ type GetArtifactByTypeAndNameResponse struct { func (x *GetArtifactByTypeAndNameResponse) Reset() { *x = GetArtifactByTypeAndNameResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1851,7 +2287,7 @@ func (x *GetArtifactByTypeAndNameResponse) String() string { func (*GetArtifactByTypeAndNameResponse) ProtoMessage() {} func (x *GetArtifactByTypeAndNameResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1864,7 +2300,7 @@ func (x *GetArtifactByTypeAndNameResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactByTypeAndNameResponse.ProtoReflect.Descriptor instead. func (*GetArtifactByTypeAndNameResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{29} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{31} } func (x *GetArtifactByTypeAndNameResponse) GetArtifact() *Artifact { @@ -1881,14 +2317,31 @@ type GetArtifactsByIDRequest struct { // A list of artifact ids to retrieve. ArtifactIds []int64 `protobuf:"varint,1,rep,name=artifact_ids,json=artifactIds" json:"artifact_ids,omitempty"` + // An option to populate all the ArtifactTypes in the response. + // If true, returns retrieved Artifacts and their artifact types, which can be + // matched by type_ids. + // If false, returns only the retrieved Artifacts. + // Example request proto: + // { + // artifact_ids: 101, + // populate_artifact_types: true, + // } + // The response will contain an artifact with id = 101 and an artifact type + // with id = artifact.type_id(). + PopulateArtifactTypes *bool `protobuf:"varint,3,opt,name=populate_artifact_types,json=populateArtifactTypes,def=0" json:"populate_artifact_types,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } +// Default values for GetArtifactsByIDRequest fields. +const ( + Default_GetArtifactsByIDRequest_PopulateArtifactTypes = bool(false) +) + func (x *GetArtifactsByIDRequest) Reset() { *x = GetArtifactsByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1901,7 +2354,7 @@ func (x *GetArtifactsByIDRequest) String() string { func (*GetArtifactsByIDRequest) ProtoMessage() {} func (x *GetArtifactsByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1914,7 +2367,7 @@ func (x *GetArtifactsByIDRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByIDRequest.ProtoReflect.Descriptor instead. func (*GetArtifactsByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{30} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{32} } func (x *GetArtifactsByIDRequest) GetArtifactIds() []int64 { @@ -1924,6 +2377,13 @@ func (x *GetArtifactsByIDRequest) GetArtifactIds() []int64 { return nil } +func (x *GetArtifactsByIDRequest) GetPopulateArtifactTypes() bool { + if x != nil && x.PopulateArtifactTypes != nil { + return *x.PopulateArtifactTypes + } + return Default_GetArtifactsByIDRequest_PopulateArtifactTypes +} + func (x *GetArtifactsByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions @@ -1937,14 +2397,17 @@ type GetArtifactsByIDResponse struct { unknownFields protoimpl.UnknownFields // Artifacts with matching ids. - // This is not index-aligned: if an id is not found, it is not + // This is not index-aligned: if an id is not found, it is not returned. Artifacts []*Artifact `protobuf:"bytes,1,rep,name=artifacts" json:"artifacts,omitempty"` + // ArtifactTypes populated with matching type_ids owned by `artifacts`. + // This is not index-aligned: if a type_id is not found, it is not returned. + ArtifactTypes []*ArtifactType `protobuf:"bytes,2,rep,name=artifact_types,json=artifactTypes" json:"artifact_types,omitempty"` } func (x *GetArtifactsByIDResponse) Reset() { *x = GetArtifactsByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1957,7 +2420,7 @@ func (x *GetArtifactsByIDResponse) String() string { func (*GetArtifactsByIDResponse) ProtoMessage() {} func (x *GetArtifactsByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1970,7 +2433,7 @@ func (x *GetArtifactsByIDResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByIDResponse.ProtoReflect.Descriptor instead. func (*GetArtifactsByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{31} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{33} } func (x *GetArtifactsByIDResponse) GetArtifacts() []*Artifact { @@ -1980,6 +2443,13 @@ func (x *GetArtifactsByIDResponse) GetArtifacts() []*Artifact { return nil } +func (x *GetArtifactsByIDResponse) GetArtifactTypes() []*ArtifactType { + if x != nil { + return x.ArtifactTypes + } + return nil +} + // Request to retrieve Artifacts using List options. // If option is not specified then all Artifacts are returned. type GetArtifactsRequest struct { @@ -1988,9 +2458,8 @@ type GetArtifactsRequest struct { unknownFields protoimpl.UnknownFields // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. Options *ListOperationOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` @@ -1999,7 +2468,7 @@ type GetArtifactsRequest struct { func (x *GetArtifactsRequest) Reset() { *x = GetArtifactsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2012,7 +2481,7 @@ func (x *GetArtifactsRequest) String() string { func (*GetArtifactsRequest) ProtoMessage() {} func (x *GetArtifactsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2025,7 +2494,7 @@ func (x *GetArtifactsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsRequest.ProtoReflect.Descriptor instead. func (*GetArtifactsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{32} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{34} } func (x *GetArtifactsRequest) GetOptions() *ListOperationOptions { @@ -2057,7 +2526,7 @@ type GetArtifactsResponse struct { func (x *GetArtifactsResponse) Reset() { *x = GetArtifactsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2070,7 +2539,7 @@ func (x *GetArtifactsResponse) String() string { func (*GetArtifactsResponse) ProtoMessage() {} func (x *GetArtifactsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2083,7 +2552,7 @@ func (x *GetArtifactsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsResponse.ProtoReflect.Descriptor instead. func (*GetArtifactsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{33} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{35} } func (x *GetArtifactsResponse) GetArtifacts() []*Artifact { @@ -2114,7 +2583,7 @@ type GetArtifactsByURIRequest struct { func (x *GetArtifactsByURIRequest) Reset() { *x = GetArtifactsByURIRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2127,7 +2596,7 @@ func (x *GetArtifactsByURIRequest) String() string { func (*GetArtifactsByURIRequest) ProtoMessage() {} func (x *GetArtifactsByURIRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2140,7 +2609,7 @@ func (x *GetArtifactsByURIRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByURIRequest.ProtoReflect.Descriptor instead. func (*GetArtifactsByURIRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{34} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{36} } func (x *GetArtifactsByURIRequest) GetUris() []string { @@ -2168,7 +2637,7 @@ type GetArtifactsByURIResponse struct { func (x *GetArtifactsByURIResponse) Reset() { *x = GetArtifactsByURIResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2181,7 +2650,7 @@ func (x *GetArtifactsByURIResponse) String() string { func (*GetArtifactsByURIResponse) ProtoMessage() {} func (x *GetArtifactsByURIResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2194,7 +2663,7 @@ func (x *GetArtifactsByURIResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactsByURIResponse.ProtoReflect.Descriptor instead. func (*GetArtifactsByURIResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{35} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{37} } func (x *GetArtifactsByURIResponse) GetArtifacts() []*Artifact { @@ -2212,9 +2681,8 @@ type GetExecutionsRequest struct { unknownFields protoimpl.UnknownFields // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. Options *ListOperationOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` @@ -2223,7 +2691,7 @@ type GetExecutionsRequest struct { func (x *GetExecutionsRequest) Reset() { *x = GetExecutionsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2236,7 +2704,7 @@ func (x *GetExecutionsRequest) String() string { func (*GetExecutionsRequest) ProtoMessage() {} func (x *GetExecutionsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2249,7 +2717,7 @@ func (x *GetExecutionsRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetExecutionsRequest.ProtoReflect.Descriptor instead. func (*GetExecutionsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{36} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{38} } func (x *GetExecutionsRequest) GetOptions() *ListOperationOptions { @@ -2281,7 +2749,7 @@ type GetExecutionsResponse struct { func (x *GetExecutionsResponse) Reset() { *x = GetExecutionsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2294,7 +2762,7 @@ func (x *GetExecutionsResponse) String() string { func (*GetExecutionsResponse) ProtoMessage() {} func (x *GetExecutionsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2307,7 +2775,7 @@ func (x *GetExecutionsResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetExecutionsResponse.ProtoReflect.Descriptor instead. func (*GetExecutionsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{37} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{39} } func (x *GetExecutionsResponse) GetExecutions() []*Execution { @@ -2339,7 +2807,7 @@ type GetArtifactTypeRequest struct { func (x *GetArtifactTypeRequest) Reset() { *x = GetArtifactTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2352,7 +2820,7 @@ func (x *GetArtifactTypeRequest) String() string { func (*GetArtifactTypeRequest) ProtoMessage() {} func (x *GetArtifactTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2365,7 +2833,7 @@ func (x *GetArtifactTypeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactTypeRequest.ProtoReflect.Descriptor instead. func (*GetArtifactTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{38} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{40} } func (x *GetArtifactTypeRequest) GetTypeName() string { @@ -2401,7 +2869,7 @@ type GetArtifactTypeResponse struct { func (x *GetArtifactTypeResponse) Reset() { *x = GetArtifactTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2414,7 +2882,7 @@ func (x *GetArtifactTypeResponse) String() string { func (*GetArtifactTypeResponse) ProtoMessage() {} func (x *GetArtifactTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2427,7 +2895,7 @@ func (x *GetArtifactTypeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactTypeResponse.ProtoReflect.Descriptor instead. func (*GetArtifactTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{39} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{41} } func (x *GetArtifactTypeResponse) GetArtifactType() *ArtifactType { @@ -2449,7 +2917,7 @@ type GetArtifactTypesRequest struct { func (x *GetArtifactTypesRequest) Reset() { *x = GetArtifactTypesRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2462,7 +2930,7 @@ func (x *GetArtifactTypesRequest) String() string { func (*GetArtifactTypesRequest) ProtoMessage() {} func (x *GetArtifactTypesRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2475,7 +2943,7 @@ func (x *GetArtifactTypesRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactTypesRequest.ProtoReflect.Descriptor instead. func (*GetArtifactTypesRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{40} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{42} } func (x *GetArtifactTypesRequest) GetTransactionOptions() *TransactionOptions { @@ -2496,7 +2964,7 @@ type GetArtifactTypesResponse struct { func (x *GetArtifactTypesResponse) Reset() { *x = GetArtifactTypesResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2509,7 +2977,7 @@ func (x *GetArtifactTypesResponse) String() string { func (*GetArtifactTypesResponse) ProtoMessage() {} func (x *GetArtifactTypesResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2522,7 +2990,7 @@ func (x *GetArtifactTypesResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetArtifactTypesResponse.ProtoReflect.Descriptor instead. func (*GetArtifactTypesResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{41} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{43} } func (x *GetArtifactTypesResponse) GetArtifactTypes() []*ArtifactType { @@ -2544,7 +3012,7 @@ type GetExecutionTypesRequest struct { func (x *GetExecutionTypesRequest) Reset() { *x = GetExecutionTypesRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2557,7 +3025,7 @@ func (x *GetExecutionTypesRequest) String() string { func (*GetExecutionTypesRequest) ProtoMessage() {} func (x *GetExecutionTypesRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2570,7 +3038,7 @@ func (x *GetExecutionTypesRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetExecutionTypesRequest.ProtoReflect.Descriptor instead. func (*GetExecutionTypesRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{42} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{44} } func (x *GetExecutionTypesRequest) GetTransactionOptions() *TransactionOptions { @@ -2591,7 +3059,7 @@ type GetExecutionTypesResponse struct { func (x *GetExecutionTypesResponse) Reset() { *x = GetExecutionTypesResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2604,7 +3072,7 @@ func (x *GetExecutionTypesResponse) String() string { func (*GetExecutionTypesResponse) ProtoMessage() {} func (x *GetExecutionTypesResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2617,7 +3085,7 @@ func (x *GetExecutionTypesResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetExecutionTypesResponse.ProtoReflect.Descriptor instead. func (*GetExecutionTypesResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{43} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{45} } func (x *GetExecutionTypesResponse) GetExecutionTypes() []*ExecutionType { @@ -2639,7 +3107,7 @@ type GetContextTypesRequest struct { func (x *GetContextTypesRequest) Reset() { *x = GetContextTypesRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2652,7 +3120,7 @@ func (x *GetContextTypesRequest) String() string { func (*GetContextTypesRequest) ProtoMessage() {} func (x *GetContextTypesRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2665,7 +3133,7 @@ func (x *GetContextTypesRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetContextTypesRequest.ProtoReflect.Descriptor instead. func (*GetContextTypesRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{44} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{46} } func (x *GetContextTypesRequest) GetTransactionOptions() *TransactionOptions { @@ -2686,7 +3154,7 @@ type GetContextTypesResponse struct { func (x *GetContextTypesResponse) Reset() { *x = GetContextTypesResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -2699,7 +3167,7 @@ func (x *GetContextTypesResponse) String() string { func (*GetContextTypesResponse) ProtoMessage() {} func (x *GetContextTypesResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2712,7 +3180,7 @@ func (x *GetContextTypesResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetContextTypesResponse.ProtoReflect.Descriptor instead. func (*GetContextTypesResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{45} + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{47} } func (x *GetContextTypesResponse) GetContextTypes() []*ContextType { @@ -2722,40 +3190,33 @@ func (x *GetContextTypesResponse) GetContextTypes() []*ContextType { return nil } -type GetExecutionsByTypeRequest struct { +type GetArtifactsByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // If not set, it looks for the type with type_name with default type_version. - TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` - // Specify List options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. - Options *ListOperationOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionsByTypeRequest) Reset() { - *x = GetExecutionsByTypeRequest{} +func (x *GetArtifactsByExternalIdsRequest) Reset() { + *x = GetArtifactsByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByTypeRequest) String() string { +func (x *GetArtifactsByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByTypeRequest) ProtoMessage() {} +func (*GetArtifactsByExternalIdsRequest) ProtoMessage() {} -func (x *GetExecutionsByTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46] +func (x *GetArtifactsByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2766,67 +3227,50 @@ func (x *GetExecutionsByTypeRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByTypeRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionsByTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{46} -} - -func (x *GetExecutionsByTypeRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName - } - return "" -} - -func (x *GetExecutionsByTypeRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion - } - return "" +// Deprecated: Use GetArtifactsByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetArtifactsByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{48} } -func (x *GetExecutionsByTypeRequest) GetOptions() *ListOperationOptions { +func (x *GetArtifactsByExternalIdsRequest) GetExternalIds() []string { if x != nil { - return x.Options + return x.ExternalIds } return nil } -func (x *GetExecutionsByTypeRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetArtifactsByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionsByTypeResponse struct { +type GetArtifactsByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` - // Token to use to retrieve next page of results if list options are used in - // the request. - NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` + Artifacts []*Artifact `protobuf:"bytes,1,rep,name=artifacts" json:"artifacts,omitempty"` } -func (x *GetExecutionsByTypeResponse) Reset() { - *x = GetExecutionsByTypeResponse{} +func (x *GetArtifactsByExternalIdsResponse) Reset() { + *x = GetArtifactsByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByTypeResponse) String() string { +func (x *GetArtifactsByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByTypeResponse) ProtoMessage() {} +func (*GetArtifactsByExternalIdsResponse) ProtoMessage() {} -func (x *GetExecutionsByTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47] +func (x *GetArtifactsByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2837,56 +3281,45 @@ func (x *GetExecutionsByTypeResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByTypeResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionsByTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{47} +// Deprecated: Use GetArtifactsByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetArtifactsByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{49} } -func (x *GetExecutionsByTypeResponse) GetExecutions() []*Execution { +func (x *GetArtifactsByExternalIdsResponse) GetArtifacts() []*Artifact { if x != nil { - return x.Executions + return x.Artifacts } return nil } -func (x *GetExecutionsByTypeResponse) GetNextPageToken() string { - if x != nil && x.NextPageToken != nil { - return *x.NextPageToken - } - return "" -} - -type GetExecutionByTypeAndNameRequest struct { +type GetExecutionsByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // If not set, it looks for the type with type_name and execution_name with - // default type_version. - TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` - ExecutionName *string `protobuf:"bytes,2,opt,name=execution_name,json=executionName" json:"execution_name,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionByTypeAndNameRequest) Reset() { - *x = GetExecutionByTypeAndNameRequest{} +func (x *GetExecutionsByExternalIdsRequest) Reset() { + *x = GetExecutionsByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionByTypeAndNameRequest) String() string { +func (x *GetExecutionsByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionByTypeAndNameRequest) ProtoMessage() {} +func (*GetExecutionsByExternalIdsRequest) ProtoMessage() {} -func (x *GetExecutionByTypeAndNameRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48] +func (x *GetExecutionsByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2897,64 +3330,50 @@ func (x *GetExecutionByTypeAndNameRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionByTypeAndNameRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionByTypeAndNameRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{48} +// Deprecated: Use GetExecutionsByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionsByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{50} } -func (x *GetExecutionByTypeAndNameRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName +func (x *GetExecutionsByExternalIdsRequest) GetExternalIds() []string { + if x != nil { + return x.ExternalIds } - return "" + return nil } -func (x *GetExecutionByTypeAndNameRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion - } - return "" -} - -func (x *GetExecutionByTypeAndNameRequest) GetExecutionName() string { - if x != nil && x.ExecutionName != nil { - return *x.ExecutionName - } - return "" -} - -func (x *GetExecutionByTypeAndNameRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionsByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionByTypeAndNameResponse struct { +type GetExecutionsByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Execution *Execution `protobuf:"bytes,1,opt,name=execution" json:"execution,omitempty"` + Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` } -func (x *GetExecutionByTypeAndNameResponse) Reset() { - *x = GetExecutionByTypeAndNameResponse{} +func (x *GetExecutionsByExternalIdsResponse) Reset() { + *x = GetExecutionsByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionByTypeAndNameResponse) String() string { +func (x *GetExecutionsByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionByTypeAndNameResponse) ProtoMessage() {} +func (*GetExecutionsByExternalIdsResponse) ProtoMessage() {} -func (x *GetExecutionByTypeAndNameResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49] +func (x *GetExecutionsByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -2965,46 +3384,45 @@ func (x *GetExecutionByTypeAndNameResponse) ProtoReflect() protoreflect.Message return mi.MessageOf(x) } -// Deprecated: Use GetExecutionByTypeAndNameResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionByTypeAndNameResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{49} +// Deprecated: Use GetExecutionsByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionsByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{51} } -func (x *GetExecutionByTypeAndNameResponse) GetExecution() *Execution { +func (x *GetExecutionsByExternalIdsResponse) GetExecutions() []*Execution { if x != nil { - return x.Execution + return x.Executions } return nil } -type GetExecutionsByIDRequest struct { +type GetContextsByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // A list of execution ids to retrieve. - ExecutionIds []int64 `protobuf:"varint,1,rep,name=execution_ids,json=executionIds" json:"execution_ids,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionsByIDRequest) Reset() { - *x = GetExecutionsByIDRequest{} +func (x *GetContextsByExternalIdsRequest) Reset() { + *x = GetContextsByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByIDRequest) String() string { +func (x *GetContextsByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByIDRequest) ProtoMessage() {} +func (*GetContextsByExternalIdsRequest) ProtoMessage() {} -func (x *GetExecutionsByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50] +func (x *GetContextsByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3015,52 +3433,50 @@ func (x *GetExecutionsByIDRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByIDRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionsByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{50} +// Deprecated: Use GetContextsByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetContextsByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{52} } -func (x *GetExecutionsByIDRequest) GetExecutionIds() []int64 { +func (x *GetContextsByExternalIdsRequest) GetExternalIds() []string { if x != nil { - return x.ExecutionIds + return x.ExternalIds } return nil } -func (x *GetExecutionsByIDRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionsByIDResponse struct { +type GetContextsByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The result is not index-aligned: if an id is not found, it is not - // returned. - Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` } -func (x *GetExecutionsByIDResponse) Reset() { - *x = GetExecutionsByIDResponse{} +func (x *GetContextsByExternalIdsResponse) Reset() { + *x = GetContextsByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByIDResponse) String() string { +func (x *GetContextsByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByIDResponse) ProtoMessage() {} +func (*GetContextsByExternalIdsResponse) ProtoMessage() {} -func (x *GetExecutionsByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51] +func (x *GetContextsByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3071,47 +3487,45 @@ func (x *GetExecutionsByIDResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByIDResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionsByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{51} +// Deprecated: Use GetContextsByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetContextsByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{53} } -func (x *GetExecutionsByIDResponse) GetExecutions() []*Execution { +func (x *GetContextsByExternalIdsResponse) GetContexts() []*Context { if x != nil { - return x.Executions + return x.Contexts } return nil } -type GetExecutionTypeRequest struct { +type GetArtifactTypesByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // If not set, it looks for the type with type_name with default type_version. - TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionTypeRequest) Reset() { - *x = GetExecutionTypeRequest{} +func (x *GetArtifactTypesByExternalIdsRequest) Reset() { + *x = GetArtifactTypesByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionTypeRequest) String() string { +func (x *GetArtifactTypesByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionTypeRequest) ProtoMessage() {} +func (*GetArtifactTypesByExternalIdsRequest) ProtoMessage() {} -func (x *GetExecutionTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52] +func (x *GetArtifactTypesByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3122,58 +3536,50 @@ func (x *GetExecutionTypeRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionTypeRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{52} -} - -func (x *GetExecutionTypeRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName - } - return "" +// Deprecated: Use GetArtifactTypesByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetArtifactTypesByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{54} } -func (x *GetExecutionTypeRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion +func (x *GetArtifactTypesByExternalIdsRequest) GetExternalIds() []string { + if x != nil { + return x.ExternalIds } - return "" + return nil } -func (x *GetExecutionTypeRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetArtifactTypesByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionTypeResponse struct { +type GetArtifactTypesByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Gets an execution type, or clear if it does not exist. - ExecutionType *ExecutionType `protobuf:"bytes,1,opt,name=execution_type,json=executionType" json:"execution_type,omitempty"` + ArtifactTypes []*ArtifactType `protobuf:"bytes,1,rep,name=artifact_types,json=artifactTypes" json:"artifact_types,omitempty"` } -func (x *GetExecutionTypeResponse) Reset() { - *x = GetExecutionTypeResponse{} +func (x *GetArtifactTypesByExternalIdsResponse) Reset() { + *x = GetArtifactTypesByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionTypeResponse) String() string { +func (x *GetArtifactTypesByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionTypeResponse) ProtoMessage() {} +func (*GetArtifactTypesByExternalIdsResponse) ProtoMessage() {} -func (x *GetExecutionTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53] +func (x *GetArtifactTypesByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3184,46 +3590,45 @@ func (x *GetExecutionTypeResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionTypeResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{53} +// Deprecated: Use GetArtifactTypesByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetArtifactTypesByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{55} } -func (x *GetExecutionTypeResponse) GetExecutionType() *ExecutionType { +func (x *GetArtifactTypesByExternalIdsResponse) GetArtifactTypes() []*ArtifactType { if x != nil { - return x.ExecutionType + return x.ArtifactTypes } return nil } -// Gets all events with matching execution ids. -type GetEventsByExecutionIDsRequest struct { +type GetExecutionTypesByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ExecutionIds []int64 `protobuf:"varint,1,rep,name=execution_ids,json=executionIds" json:"execution_ids,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetEventsByExecutionIDsRequest) Reset() { - *x = GetEventsByExecutionIDsRequest{} +func (x *GetExecutionTypesByExternalIdsRequest) Reset() { + *x = GetExecutionTypesByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetEventsByExecutionIDsRequest) String() string { +func (x *GetExecutionTypesByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetEventsByExecutionIDsRequest) ProtoMessage() {} +func (*GetExecutionTypesByExternalIdsRequest) ProtoMessage() {} -func (x *GetEventsByExecutionIDsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54] +func (x *GetExecutionTypesByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3234,50 +3639,50 @@ func (x *GetEventsByExecutionIDsRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetEventsByExecutionIDsRequest.ProtoReflect.Descriptor instead. -func (*GetEventsByExecutionIDsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{54} +// Deprecated: Use GetExecutionTypesByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionTypesByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{56} } -func (x *GetEventsByExecutionIDsRequest) GetExecutionIds() []int64 { +func (x *GetExecutionTypesByExternalIdsRequest) GetExternalIds() []string { if x != nil { - return x.ExecutionIds + return x.ExternalIds } return nil } -func (x *GetEventsByExecutionIDsRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionTypesByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetEventsByExecutionIDsResponse struct { +type GetExecutionTypesByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Events []*Event `protobuf:"bytes,1,rep,name=events" json:"events,omitempty"` + ExecutionTypes []*ExecutionType `protobuf:"bytes,1,rep,name=execution_types,json=executionTypes" json:"execution_types,omitempty"` } -func (x *GetEventsByExecutionIDsResponse) Reset() { - *x = GetEventsByExecutionIDsResponse{} +func (x *GetExecutionTypesByExternalIdsResponse) Reset() { + *x = GetExecutionTypesByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetEventsByExecutionIDsResponse) String() string { +func (x *GetExecutionTypesByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetEventsByExecutionIDsResponse) ProtoMessage() {} +func (*GetExecutionTypesByExternalIdsResponse) ProtoMessage() {} -func (x *GetEventsByExecutionIDsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55] +func (x *GetExecutionTypesByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3288,45 +3693,45 @@ func (x *GetEventsByExecutionIDsResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetEventsByExecutionIDsResponse.ProtoReflect.Descriptor instead. -func (*GetEventsByExecutionIDsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{55} +// Deprecated: Use GetExecutionTypesByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionTypesByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{57} } -func (x *GetEventsByExecutionIDsResponse) GetEvents() []*Event { +func (x *GetExecutionTypesByExternalIdsResponse) GetExecutionTypes() []*ExecutionType { if x != nil { - return x.Events + return x.ExecutionTypes } return nil } -type GetEventsByArtifactIDsRequest struct { +type GetContextTypesByExternalIdsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ArtifactIds []int64 `protobuf:"varint,1,rep,name=artifact_ids,json=artifactIds" json:"artifact_ids,omitempty"` + ExternalIds []string `protobuf:"bytes,1,rep,name=external_ids,json=externalIds" json:"external_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetEventsByArtifactIDsRequest) Reset() { - *x = GetEventsByArtifactIDsRequest{} +func (x *GetContextTypesByExternalIdsRequest) Reset() { + *x = GetContextTypesByExternalIdsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetEventsByArtifactIDsRequest) String() string { +func (x *GetContextTypesByExternalIdsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetEventsByArtifactIDsRequest) ProtoMessage() {} +func (*GetContextTypesByExternalIdsRequest) ProtoMessage() {} -func (x *GetEventsByArtifactIDsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56] +func (x *GetContextTypesByExternalIdsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3337,50 +3742,50 @@ func (x *GetEventsByArtifactIDsRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetEventsByArtifactIDsRequest.ProtoReflect.Descriptor instead. -func (*GetEventsByArtifactIDsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{56} +// Deprecated: Use GetContextTypesByExternalIdsRequest.ProtoReflect.Descriptor instead. +func (*GetContextTypesByExternalIdsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{58} } -func (x *GetEventsByArtifactIDsRequest) GetArtifactIds() []int64 { +func (x *GetContextTypesByExternalIdsRequest) GetExternalIds() []string { if x != nil { - return x.ArtifactIds + return x.ExternalIds } return nil } -func (x *GetEventsByArtifactIDsRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextTypesByExternalIdsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetEventsByArtifactIDsResponse struct { +type GetContextTypesByExternalIdsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Events []*Event `protobuf:"bytes,1,rep,name=events" json:"events,omitempty"` + ContextTypes []*ContextType `protobuf:"bytes,1,rep,name=context_types,json=contextTypes" json:"context_types,omitempty"` } -func (x *GetEventsByArtifactIDsResponse) Reset() { - *x = GetEventsByArtifactIDsResponse{} +func (x *GetContextTypesByExternalIdsResponse) Reset() { + *x = GetContextTypesByExternalIdsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetEventsByArtifactIDsResponse) String() string { +func (x *GetContextTypesByExternalIdsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetEventsByArtifactIDsResponse) ProtoMessage() {} +func (*GetContextTypesByExternalIdsResponse) ProtoMessage() {} -func (x *GetEventsByArtifactIDsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57] +func (x *GetContextTypesByExternalIdsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3391,45 +3796,58 @@ func (x *GetEventsByArtifactIDsResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetEventsByArtifactIDsResponse.ProtoReflect.Descriptor instead. -func (*GetEventsByArtifactIDsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{57} +// Deprecated: Use GetContextTypesByExternalIdsResponse.ProtoReflect.Descriptor instead. +func (*GetContextTypesByExternalIdsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{59} } -func (x *GetEventsByArtifactIDsResponse) GetEvents() []*Event { +func (x *GetContextTypesByExternalIdsResponse) GetContextTypes() []*ContextType { if x != nil { - return x.Events + return x.ContextTypes } return nil } -type GetArtifactTypesByIDRequest struct { +type GetExecutionsByTypeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // If not set, it looks for the type with type_name with default type_version. + TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + // Specify List options. + // Currently supports: + // 1. Field to order the results. + // 2. Page size. + // If set, the request will + // first fetch all executions with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Executions of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. + Options *ListOperationOptions `protobuf:"bytes,3,opt,name=options" json:"options,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetArtifactTypesByIDRequest) Reset() { - *x = GetArtifactTypesByIDRequest{} +func (x *GetExecutionsByTypeRequest) Reset() { + *x = GetExecutionsByTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetArtifactTypesByIDRequest) String() string { +func (x *GetExecutionsByTypeRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetArtifactTypesByIDRequest) ProtoMessage() {} +func (*GetExecutionsByTypeRequest) ProtoMessage() {} -func (x *GetArtifactTypesByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58] +func (x *GetExecutionsByTypeRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3440,52 +3858,67 @@ func (x *GetArtifactTypesByIDRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetArtifactTypesByIDRequest.ProtoReflect.Descriptor instead. -func (*GetArtifactTypesByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{58} +// Deprecated: Use GetExecutionsByTypeRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionsByTypeRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{60} } -func (x *GetArtifactTypesByIDRequest) GetTypeIds() []int64 { +func (x *GetExecutionsByTypeRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName + } + return "" +} + +func (x *GetExecutionsByTypeRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion + } + return "" +} + +func (x *GetExecutionsByTypeRequest) GetOptions() *ListOperationOptions { if x != nil { - return x.TypeIds + return x.Options } return nil } -func (x *GetArtifactTypesByIDRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionsByTypeRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetArtifactTypesByIDResponse struct { +type GetExecutionsByTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The result is not index-aligned: if an id is not found, it is not - // returned. - ArtifactTypes []*ArtifactType `protobuf:"bytes,1,rep,name=artifact_types,json=artifactTypes" json:"artifact_types,omitempty"` + Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` + // Token to use to retrieve next page of results if list options are used in + // the request. + NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` } -func (x *GetArtifactTypesByIDResponse) Reset() { - *x = GetArtifactTypesByIDResponse{} +func (x *GetExecutionsByTypeResponse) Reset() { + *x = GetExecutionsByTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetArtifactTypesByIDResponse) String() string { +func (x *GetExecutionsByTypeResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetArtifactTypesByIDResponse) ProtoMessage() {} +func (*GetExecutionsByTypeResponse) ProtoMessage() {} -func (x *GetArtifactTypesByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59] +func (x *GetExecutionsByTypeResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3496,45 +3929,56 @@ func (x *GetArtifactTypesByIDResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetArtifactTypesByIDResponse.ProtoReflect.Descriptor instead. -func (*GetArtifactTypesByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{59} +// Deprecated: Use GetExecutionsByTypeResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionsByTypeResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{61} } -func (x *GetArtifactTypesByIDResponse) GetArtifactTypes() []*ArtifactType { +func (x *GetExecutionsByTypeResponse) GetExecutions() []*Execution { if x != nil { - return x.ArtifactTypes + return x.Executions } return nil } -type GetExecutionTypesByIDRequest struct { +func (x *GetExecutionsByTypeResponse) GetNextPageToken() string { + if x != nil && x.NextPageToken != nil { + return *x.NextPageToken + } + return "" +} + +type GetExecutionByTypeAndNameRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // If not set, it looks for the type with type_name and execution_name with + // default type_version. + TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + ExecutionName *string `protobuf:"bytes,2,opt,name=execution_name,json=executionName" json:"execution_name,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionTypesByIDRequest) Reset() { - *x = GetExecutionTypesByIDRequest{} +func (x *GetExecutionByTypeAndNameRequest) Reset() { + *x = GetExecutionByTypeAndNameRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionTypesByIDRequest) String() string { +func (x *GetExecutionByTypeAndNameRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionTypesByIDRequest) ProtoMessage() {} +func (*GetExecutionByTypeAndNameRequest) ProtoMessage() {} -func (x *GetExecutionTypesByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60] +func (x *GetExecutionByTypeAndNameRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3545,52 +3989,64 @@ func (x *GetExecutionTypesByIDRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionTypesByIDRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionTypesByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{60} +// Deprecated: Use GetExecutionByTypeAndNameRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionByTypeAndNameRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{62} } -func (x *GetExecutionTypesByIDRequest) GetTypeIds() []int64 { - if x != nil { - return x.TypeIds +func (x *GetExecutionByTypeAndNameRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName } - return nil + return "" } -func (x *GetExecutionTypesByIDRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionByTypeAndNameRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion + } + return "" +} + +func (x *GetExecutionByTypeAndNameRequest) GetExecutionName() string { + if x != nil && x.ExecutionName != nil { + return *x.ExecutionName + } + return "" +} + +func (x *GetExecutionByTypeAndNameRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionTypesByIDResponse struct { +type GetExecutionByTypeAndNameResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The result is not index-aligned: if an id is not found, it is not - // returned. - ExecutionTypes []*ExecutionType `protobuf:"bytes,1,rep,name=execution_types,json=executionTypes" json:"execution_types,omitempty"` + Execution *Execution `protobuf:"bytes,1,opt,name=execution" json:"execution,omitempty"` } -func (x *GetExecutionTypesByIDResponse) Reset() { - *x = GetExecutionTypesByIDResponse{} +func (x *GetExecutionByTypeAndNameResponse) Reset() { + *x = GetExecutionByTypeAndNameResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionTypesByIDResponse) String() string { +func (x *GetExecutionByTypeAndNameResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionTypesByIDResponse) ProtoMessage() {} +func (*GetExecutionByTypeAndNameResponse) ProtoMessage() {} -func (x *GetExecutionTypesByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61] +func (x *GetExecutionByTypeAndNameResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3601,47 +4057,46 @@ func (x *GetExecutionTypesByIDResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionTypesByIDResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionTypesByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{61} +// Deprecated: Use GetExecutionByTypeAndNameResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionByTypeAndNameResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{63} } -func (x *GetExecutionTypesByIDResponse) GetExecutionTypes() []*ExecutionType { +func (x *GetExecutionByTypeAndNameResponse) GetExecution() *Execution { if x != nil { - return x.ExecutionTypes + return x.Execution } return nil } -type GetContextTypeRequest struct { +type GetExecutionsByIDRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // If not set, it looks for the type with type_name with default type_version. - TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + // A list of execution ids to retrieve. + ExecutionIds []int64 `protobuf:"varint,1,rep,name=execution_ids,json=executionIds" json:"execution_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextTypeRequest) Reset() { - *x = GetContextTypeRequest{} +func (x *GetExecutionsByIDRequest) Reset() { + *x = GetExecutionsByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextTypeRequest) String() string { +func (x *GetExecutionsByIDRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextTypeRequest) ProtoMessage() {} +func (*GetExecutionsByIDRequest) ProtoMessage() {} -func (x *GetContextTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62] +func (x *GetExecutionsByIDRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3652,58 +4107,52 @@ func (x *GetContextTypeRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextTypeRequest.ProtoReflect.Descriptor instead. -func (*GetContextTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{62} -} - -func (x *GetContextTypeRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName - } - return "" +// Deprecated: Use GetExecutionsByIDRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionsByIDRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{64} } -func (x *GetContextTypeRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion +func (x *GetExecutionsByIDRequest) GetExecutionIds() []int64 { + if x != nil { + return x.ExecutionIds } - return "" + return nil } -func (x *GetContextTypeRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionsByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextTypeResponse struct { +type GetExecutionsByIDResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Gets a context type, or clear if it does not exist. - ContextType *ContextType `protobuf:"bytes,1,opt,name=context_type,json=contextType" json:"context_type,omitempty"` + // The result is not index-aligned: if an id is not found, it is not + // returned. + Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` } -func (x *GetContextTypeResponse) Reset() { - *x = GetContextTypeResponse{} +func (x *GetExecutionsByIDResponse) Reset() { + *x = GetExecutionsByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextTypeResponse) String() string { +func (x *GetExecutionsByIDResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextTypeResponse) ProtoMessage() {} +func (*GetExecutionsByIDResponse) ProtoMessage() {} -func (x *GetContextTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63] +func (x *GetExecutionsByIDResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3714,45 +4163,47 @@ func (x *GetContextTypeResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextTypeResponse.ProtoReflect.Descriptor instead. -func (*GetContextTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{63} +// Deprecated: Use GetExecutionsByIDResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionsByIDResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{65} } -func (x *GetContextTypeResponse) GetContextType() *ContextType { +func (x *GetExecutionsByIDResponse) GetExecutions() []*Execution { if x != nil { - return x.ContextType + return x.Executions } return nil } -type GetContextTypesByIDRequest struct { +type GetExecutionTypeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // If not set, it looks for the type with type_name with default type_version. + TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextTypesByIDRequest) Reset() { - *x = GetContextTypesByIDRequest{} +func (x *GetExecutionTypeRequest) Reset() { + *x = GetExecutionTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextTypesByIDRequest) String() string { +func (x *GetExecutionTypeRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextTypesByIDRequest) ProtoMessage() {} +func (*GetExecutionTypeRequest) ProtoMessage() {} -func (x *GetContextTypesByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64] +func (x *GetExecutionTypeRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3763,52 +4214,58 @@ func (x *GetContextTypesByIDRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextTypesByIDRequest.ProtoReflect.Descriptor instead. -func (*GetContextTypesByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{64} +// Deprecated: Use GetExecutionTypeRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionTypeRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{66} } -func (x *GetContextTypesByIDRequest) GetTypeIds() []int64 { - if x != nil { - return x.TypeIds +func (x *GetExecutionTypeRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName } - return nil + return "" } -func (x *GetContextTypesByIDRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionTypeRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion + } + return "" +} + +func (x *GetExecutionTypeRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextTypesByIDResponse struct { +type GetExecutionTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The result is not index-aligned: if an id is not found, it is not - // returned. - ContextTypes []*ContextType `protobuf:"bytes,1,rep,name=context_types,json=contextTypes" json:"context_types,omitempty"` + // Gets an execution type, or clear if it does not exist. + ExecutionType *ExecutionType `protobuf:"bytes,1,opt,name=execution_type,json=executionType" json:"execution_type,omitempty"` } -func (x *GetContextTypesByIDResponse) Reset() { - *x = GetContextTypesByIDResponse{} +func (x *GetExecutionTypeResponse) Reset() { + *x = GetExecutionTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextTypesByIDResponse) String() string { +func (x *GetExecutionTypeResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextTypesByIDResponse) ProtoMessage() {} +func (*GetExecutionTypeResponse) ProtoMessage() {} -func (x *GetContextTypesByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65] +func (x *GetExecutionTypeResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3819,51 +4276,46 @@ func (x *GetContextTypesByIDResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextTypesByIDResponse.ProtoReflect.Descriptor instead. -func (*GetContextTypesByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{65} +// Deprecated: Use GetExecutionTypeResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionTypeResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{67} } -func (x *GetContextTypesByIDResponse) GetContextTypes() []*ContextType { +func (x *GetExecutionTypeResponse) GetExecutionType() *ExecutionType { if x != nil { - return x.ContextTypes + return x.ExecutionType } return nil } -// Request to retrieve Contexts using List options. -// If option is not specified then all Contexts are returned. -type GetContextsRequest struct { +// Gets all events with matching execution ids. +type GetEventsByExecutionIDsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. - Options *ListOperationOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` + ExecutionIds []int64 `protobuf:"varint,1,rep,name=execution_ids,json=executionIds" json:"execution_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextsRequest) Reset() { - *x = GetContextsRequest{} +func (x *GetEventsByExecutionIDsRequest) Reset() { + *x = GetEventsByExecutionIDsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsRequest) String() string { +func (x *GetEventsByExecutionIDsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsRequest) ProtoMessage() {} +func (*GetEventsByExecutionIDsRequest) ProtoMessage() {} -func (x *GetContextsRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66] +func (x *GetEventsByExecutionIDsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3874,54 +4326,50 @@ func (x *GetContextsRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsRequest.ProtoReflect.Descriptor instead. -func (*GetContextsRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{66} +// Deprecated: Use GetEventsByExecutionIDsRequest.ProtoReflect.Descriptor instead. +func (*GetEventsByExecutionIDsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{68} } -func (x *GetContextsRequest) GetOptions() *ListOperationOptions { +func (x *GetEventsByExecutionIDsRequest) GetExecutionIds() []int64 { if x != nil { - return x.Options + return x.ExecutionIds } return nil } -func (x *GetContextsRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetEventsByExecutionIDsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextsResponse struct { +type GetEventsByExecutionIDsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Returned contexts. - Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` - // Token to use to retrieve next page of results if list options are used in - // the request. - NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` + Events []*Event `protobuf:"bytes,1,rep,name=events" json:"events,omitempty"` } -func (x *GetContextsResponse) Reset() { - *x = GetContextsResponse{} +func (x *GetEventsByExecutionIDsResponse) Reset() { + *x = GetEventsByExecutionIDsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsResponse) String() string { +func (x *GetEventsByExecutionIDsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsResponse) ProtoMessage() {} +func (*GetEventsByExecutionIDsResponse) ProtoMessage() {} -func (x *GetContextsResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67] +func (x *GetEventsByExecutionIDsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3932,60 +4380,45 @@ func (x *GetContextsResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsResponse.ProtoReflect.Descriptor instead. -func (*GetContextsResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{67} +// Deprecated: Use GetEventsByExecutionIDsResponse.ProtoReflect.Descriptor instead. +func (*GetEventsByExecutionIDsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{69} } -func (x *GetContextsResponse) GetContexts() []*Context { +func (x *GetEventsByExecutionIDsResponse) GetEvents() []*Event { if x != nil { - return x.Contexts + return x.Events } return nil } -func (x *GetContextsResponse) GetNextPageToken() string { - if x != nil && x.NextPageToken != nil { - return *x.NextPageToken - } - return "" -} - -type GetContextsByTypeRequest struct { +type GetEventsByArtifactIDsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. - Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` - // If not set, it looks for the type with type_name and options with default - // type_version. - TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + ArtifactIds []int64 `protobuf:"varint,1,rep,name=artifact_ids,json=artifactIds" json:"artifact_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextsByTypeRequest) Reset() { - *x = GetContextsByTypeRequest{} +func (x *GetEventsByArtifactIDsRequest) Reset() { + *x = GetEventsByArtifactIDsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByTypeRequest) String() string { +func (x *GetEventsByArtifactIDsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByTypeRequest) ProtoMessage() {} +func (*GetEventsByArtifactIDsRequest) ProtoMessage() {} -func (x *GetContextsByTypeRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68] +func (x *GetEventsByArtifactIDsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -3996,67 +4429,50 @@ func (x *GetContextsByTypeRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByTypeRequest.ProtoReflect.Descriptor instead. -func (*GetContextsByTypeRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{68} -} - -func (x *GetContextsByTypeRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName - } - return "" +// Deprecated: Use GetEventsByArtifactIDsRequest.ProtoReflect.Descriptor instead. +func (*GetEventsByArtifactIDsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{70} } -func (x *GetContextsByTypeRequest) GetOptions() *ListOperationOptions { +func (x *GetEventsByArtifactIDsRequest) GetArtifactIds() []int64 { if x != nil { - return x.Options + return x.ArtifactIds } return nil } -func (x *GetContextsByTypeRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion - } - return "" -} - -func (x *GetContextsByTypeRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetEventsByArtifactIDsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextsByTypeResponse struct { +type GetEventsByArtifactIDsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` - // Token to use to retrieve next page of results if list options are used in - // the request. - NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` + Events []*Event `protobuf:"bytes,1,rep,name=events" json:"events,omitempty"` } -func (x *GetContextsByTypeResponse) Reset() { - *x = GetContextsByTypeResponse{} +func (x *GetEventsByArtifactIDsResponse) Reset() { + *x = GetEventsByArtifactIDsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByTypeResponse) String() string { +func (x *GetEventsByArtifactIDsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByTypeResponse) ProtoMessage() {} +func (*GetEventsByArtifactIDsResponse) ProtoMessage() {} -func (x *GetContextsByTypeResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69] +func (x *GetEventsByArtifactIDsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4067,56 +4483,45 @@ func (x *GetContextsByTypeResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByTypeResponse.ProtoReflect.Descriptor instead. -func (*GetContextsByTypeResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{69} +// Deprecated: Use GetEventsByArtifactIDsResponse.ProtoReflect.Descriptor instead. +func (*GetEventsByArtifactIDsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{71} } -func (x *GetContextsByTypeResponse) GetContexts() []*Context { +func (x *GetEventsByArtifactIDsResponse) GetEvents() []*Event { if x != nil { - return x.Contexts + return x.Events } return nil } -func (x *GetContextsByTypeResponse) GetNextPageToken() string { - if x != nil && x.NextPageToken != nil { - return *x.NextPageToken - } - return "" -} - -type GetContextByTypeAndNameRequest struct { +type GetArtifactTypesByIDRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` - // If not set, it looks for the type with type_name and context_name with - // default type_version. - TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` - ContextName *string `protobuf:"bytes,2,opt,name=context_name,json=contextName" json:"context_name,omitempty"` + TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextByTypeAndNameRequest) Reset() { - *x = GetContextByTypeAndNameRequest{} +func (x *GetArtifactTypesByIDRequest) Reset() { + *x = GetArtifactTypesByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextByTypeAndNameRequest) String() string { +func (x *GetArtifactTypesByIDRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextByTypeAndNameRequest) ProtoMessage() {} +func (*GetArtifactTypesByIDRequest) ProtoMessage() {} -func (x *GetContextByTypeAndNameRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70] +func (x *GetArtifactTypesByIDRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4127,64 +4532,52 @@ func (x *GetContextByTypeAndNameRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextByTypeAndNameRequest.ProtoReflect.Descriptor instead. -func (*GetContextByTypeAndNameRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{70} -} - -func (x *GetContextByTypeAndNameRequest) GetTypeName() string { - if x != nil && x.TypeName != nil { - return *x.TypeName - } - return "" -} - -func (x *GetContextByTypeAndNameRequest) GetTypeVersion() string { - if x != nil && x.TypeVersion != nil { - return *x.TypeVersion - } - return "" +// Deprecated: Use GetArtifactTypesByIDRequest.ProtoReflect.Descriptor instead. +func (*GetArtifactTypesByIDRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{72} } -func (x *GetContextByTypeAndNameRequest) GetContextName() string { - if x != nil && x.ContextName != nil { - return *x.ContextName +func (x *GetArtifactTypesByIDRequest) GetTypeIds() []int64 { + if x != nil { + return x.TypeIds } - return "" + return nil } -func (x *GetContextByTypeAndNameRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetArtifactTypesByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextByTypeAndNameResponse struct { +type GetArtifactTypesByIDResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Context *Context `protobuf:"bytes,1,opt,name=context" json:"context,omitempty"` + // The result is not index-aligned: if an id is not found, it is not + // returned. + ArtifactTypes []*ArtifactType `protobuf:"bytes,1,rep,name=artifact_types,json=artifactTypes" json:"artifact_types,omitempty"` } -func (x *GetContextByTypeAndNameResponse) Reset() { - *x = GetContextByTypeAndNameResponse{} +func (x *GetArtifactTypesByIDResponse) Reset() { + *x = GetArtifactTypesByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextByTypeAndNameResponse) String() string { +func (x *GetArtifactTypesByIDResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextByTypeAndNameResponse) ProtoMessage() {} +func (*GetArtifactTypesByIDResponse) ProtoMessage() {} -func (x *GetContextByTypeAndNameResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71] +func (x *GetArtifactTypesByIDResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4195,46 +4588,45 @@ func (x *GetContextByTypeAndNameResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextByTypeAndNameResponse.ProtoReflect.Descriptor instead. -func (*GetContextByTypeAndNameResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{71} +// Deprecated: Use GetArtifactTypesByIDResponse.ProtoReflect.Descriptor instead. +func (*GetArtifactTypesByIDResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{73} } -func (x *GetContextByTypeAndNameResponse) GetContext() *Context { +func (x *GetArtifactTypesByIDResponse) GetArtifactTypes() []*ArtifactType { if x != nil { - return x.Context + return x.ArtifactTypes } return nil } -type GetContextsByIDRequest struct { +type GetExecutionTypesByIDRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // A list of context ids to retrieve. - ContextIds []int64 `protobuf:"varint,1,rep,name=context_ids,json=contextIds" json:"context_ids,omitempty"` + TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextsByIDRequest) Reset() { - *x = GetContextsByIDRequest{} +func (x *GetExecutionTypesByIDRequest) Reset() { + *x = GetExecutionTypesByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByIDRequest) String() string { +func (x *GetExecutionTypesByIDRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByIDRequest) ProtoMessage() {} +func (*GetExecutionTypesByIDRequest) ProtoMessage() {} -func (x *GetContextsByIDRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72] +func (x *GetExecutionTypesByIDRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4245,52 +4637,52 @@ func (x *GetContextsByIDRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByIDRequest.ProtoReflect.Descriptor instead. -func (*GetContextsByIDRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{72} +// Deprecated: Use GetExecutionTypesByIDRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionTypesByIDRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{74} } -func (x *GetContextsByIDRequest) GetContextIds() []int64 { +func (x *GetExecutionTypesByIDRequest) GetTypeIds() []int64 { if x != nil { - return x.ContextIds + return x.TypeIds } return nil } -func (x *GetContextsByIDRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetExecutionTypesByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextsByIDResponse struct { +type GetExecutionTypesByIDResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields // The result is not index-aligned: if an id is not found, it is not // returned. - Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + ExecutionTypes []*ExecutionType `protobuf:"bytes,1,rep,name=execution_types,json=executionTypes" json:"execution_types,omitempty"` } -func (x *GetContextsByIDResponse) Reset() { - *x = GetContextsByIDResponse{} +func (x *GetExecutionTypesByIDResponse) Reset() { + *x = GetExecutionTypesByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByIDResponse) String() string { +func (x *GetExecutionTypesByIDResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByIDResponse) ProtoMessage() {} +func (*GetExecutionTypesByIDResponse) ProtoMessage() {} -func (x *GetContextsByIDResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73] +func (x *GetExecutionTypesByIDResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4301,45 +4693,47 @@ func (x *GetContextsByIDResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByIDResponse.ProtoReflect.Descriptor instead. -func (*GetContextsByIDResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{73} +// Deprecated: Use GetExecutionTypesByIDResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionTypesByIDResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{75} } -func (x *GetContextsByIDResponse) GetContexts() []*Context { +func (x *GetExecutionTypesByIDResponse) GetExecutionTypes() []*ExecutionType { if x != nil { - return x.Contexts + return x.ExecutionTypes } return nil } -type GetContextsByArtifactRequest struct { +type GetContextTypeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ArtifactId *int64 `protobuf:"varint,1,opt,name=artifact_id,json=artifactId" json:"artifact_id,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // If not set, it looks for the type with type_name with default type_version. + TypeVersion *string `protobuf:"bytes,2,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextsByArtifactRequest) Reset() { - *x = GetContextsByArtifactRequest{} +func (x *GetContextTypeRequest) Reset() { + *x = GetContextTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByArtifactRequest) String() string { +func (x *GetContextTypeRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByArtifactRequest) ProtoMessage() {} +func (*GetContextTypeRequest) ProtoMessage() {} -func (x *GetContextsByArtifactRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74] +func (x *GetContextTypeRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4350,50 +4744,58 @@ func (x *GetContextsByArtifactRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByArtifactRequest.ProtoReflect.Descriptor instead. -func (*GetContextsByArtifactRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{74} +// Deprecated: Use GetContextTypeRequest.ProtoReflect.Descriptor instead. +func (*GetContextTypeRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{76} } -func (x *GetContextsByArtifactRequest) GetArtifactId() int64 { - if x != nil && x.ArtifactId != nil { - return *x.ArtifactId +func (x *GetContextTypeRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName } - return 0 + return "" } -func (x *GetContextsByArtifactRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextTypeRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion + } + return "" +} + +func (x *GetContextTypeRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextsByArtifactResponse struct { +type GetContextTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + // Gets a context type, or clear if it does not exist. + ContextType *ContextType `protobuf:"bytes,1,opt,name=context_type,json=contextType" json:"context_type,omitempty"` } -func (x *GetContextsByArtifactResponse) Reset() { - *x = GetContextsByArtifactResponse{} +func (x *GetContextTypeResponse) Reset() { + *x = GetContextTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByArtifactResponse) String() string { +func (x *GetContextTypeResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByArtifactResponse) ProtoMessage() {} +func (*GetContextTypeResponse) ProtoMessage() {} -func (x *GetContextsByArtifactResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75] +func (x *GetContextTypeResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4404,45 +4806,45 @@ func (x *GetContextsByArtifactResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByArtifactResponse.ProtoReflect.Descriptor instead. -func (*GetContextsByArtifactResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{75} +// Deprecated: Use GetContextTypeResponse.ProtoReflect.Descriptor instead. +func (*GetContextTypeResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{77} } -func (x *GetContextsByArtifactResponse) GetContexts() []*Context { +func (x *GetContextTypeResponse) GetContextType() *ContextType { if x != nil { - return x.Contexts + return x.ContextType } return nil } -type GetContextsByExecutionRequest struct { +type GetContextTypesByIDRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ExecutionId *int64 `protobuf:"varint,1,opt,name=execution_id,json=executionId" json:"execution_id,omitempty"` + TypeIds []int64 `protobuf:"varint,1,rep,name=type_ids,json=typeIds" json:"type_ids,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetContextsByExecutionRequest) Reset() { - *x = GetContextsByExecutionRequest{} +func (x *GetContextTypesByIDRequest) Reset() { + *x = GetContextTypesByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByExecutionRequest) String() string { +func (x *GetContextTypesByIDRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByExecutionRequest) ProtoMessage() {} +func (*GetContextTypesByIDRequest) ProtoMessage() {} -func (x *GetContextsByExecutionRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76] +func (x *GetContextTypesByIDRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4453,50 +4855,52 @@ func (x *GetContextsByExecutionRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByExecutionRequest.ProtoReflect.Descriptor instead. -func (*GetContextsByExecutionRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{76} +// Deprecated: Use GetContextTypesByIDRequest.ProtoReflect.Descriptor instead. +func (*GetContextTypesByIDRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{78} } -func (x *GetContextsByExecutionRequest) GetExecutionId() int64 { - if x != nil && x.ExecutionId != nil { - return *x.ExecutionId +func (x *GetContextTypesByIDRequest) GetTypeIds() []int64 { + if x != nil { + return x.TypeIds } - return 0 + return nil } -func (x *GetContextsByExecutionRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextTypesByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetContextsByExecutionResponse struct { +type GetContextTypesByIDResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + // The result is not index-aligned: if an id is not found, it is not + // returned. + ContextTypes []*ContextType `protobuf:"bytes,1,rep,name=context_types,json=contextTypes" json:"context_types,omitempty"` } -func (x *GetContextsByExecutionResponse) Reset() { - *x = GetContextsByExecutionResponse{} +func (x *GetContextTypesByIDResponse) Reset() { + *x = GetContextTypesByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetContextsByExecutionResponse) String() string { +func (x *GetContextTypesByIDResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetContextsByExecutionResponse) ProtoMessage() {} +func (*GetContextTypesByIDResponse) ProtoMessage() {} -func (x *GetContextsByExecutionResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77] +func (x *GetContextTypesByIDResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4507,45 +4911,50 @@ func (x *GetContextsByExecutionResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetContextsByExecutionResponse.ProtoReflect.Descriptor instead. -func (*GetContextsByExecutionResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{77} +// Deprecated: Use GetContextTypesByIDResponse.ProtoReflect.Descriptor instead. +func (*GetContextTypesByIDResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{79} } -func (x *GetContextsByExecutionResponse) GetContexts() []*Context { +func (x *GetContextTypesByIDResponse) GetContextTypes() []*ContextType { if x != nil { - return x.Contexts + return x.ContextTypes } return nil } -type GetParentContextsByContextRequest struct { +// Request to retrieve Contexts using List options. +// If option is not specified then all Contexts are returned. +type GetContextsRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + // Specify options. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. + Options *ListOperationOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetParentContextsByContextRequest) Reset() { - *x = GetParentContextsByContextRequest{} +func (x *GetContextsRequest) Reset() { + *x = GetContextsRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetParentContextsByContextRequest) String() string { +func (x *GetContextsRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetParentContextsByContextRequest) ProtoMessage() {} +func (*GetContextsRequest) ProtoMessage() {} -func (x *GetParentContextsByContextRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78] +func (x *GetContextsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4556,50 +4965,54 @@ func (x *GetParentContextsByContextRequest) ProtoReflect() protoreflect.Message return mi.MessageOf(x) } -// Deprecated: Use GetParentContextsByContextRequest.ProtoReflect.Descriptor instead. -func (*GetParentContextsByContextRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{78} +// Deprecated: Use GetContextsRequest.ProtoReflect.Descriptor instead. +func (*GetContextsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{80} } -func (x *GetParentContextsByContextRequest) GetContextId() int64 { - if x != nil && x.ContextId != nil { - return *x.ContextId +func (x *GetContextsRequest) GetOptions() *ListOperationOptions { + if x != nil { + return x.Options } - return 0 + return nil } -func (x *GetParentContextsByContextRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetParentContextsByContextResponse struct { +type GetContextsResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields + // Returned contexts. Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + // Token to use to retrieve next page of results if list options are used in + // the request. + NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` } -func (x *GetParentContextsByContextResponse) Reset() { - *x = GetParentContextsByContextResponse{} +func (x *GetContextsResponse) Reset() { + *x = GetContextsResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetParentContextsByContextResponse) String() string { +func (x *GetContextsResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetParentContextsByContextResponse) ProtoMessage() {} +func (*GetContextsResponse) ProtoMessage() {} -func (x *GetParentContextsByContextResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79] +func (x *GetContextsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4610,45 +5023,66 @@ func (x *GetParentContextsByContextResponse) ProtoReflect() protoreflect.Message return mi.MessageOf(x) } -// Deprecated: Use GetParentContextsByContextResponse.ProtoReflect.Descriptor instead. -func (*GetParentContextsByContextResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{79} +// Deprecated: Use GetContextsResponse.ProtoReflect.Descriptor instead. +func (*GetContextsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{81} } -func (x *GetParentContextsByContextResponse) GetContexts() []*Context { +func (x *GetContextsResponse) GetContexts() []*Context { if x != nil { return x.Contexts } return nil } -type GetChildrenContextsByContextRequest struct { +func (x *GetContextsResponse) GetNextPageToken() string { + if x != nil && x.NextPageToken != nil { + return *x.NextPageToken + } + return "" +} + +type GetContextsByTypeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // Specify options. + // Currently supports: + // 1. Field to order the results. + // 2. Page size. + // If set, the request will + // first fetch all contexts with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Contexts of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. + Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + // If not set, it looks for the type with type_name and options with default + // type_version. + TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetChildrenContextsByContextRequest) Reset() { - *x = GetChildrenContextsByContextRequest{} +func (x *GetContextsByTypeRequest) Reset() { + *x = GetContextsByTypeRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetChildrenContextsByContextRequest) String() string { +func (x *GetContextsByTypeRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetChildrenContextsByContextRequest) ProtoMessage() {} +func (*GetContextsByTypeRequest) ProtoMessage() {} -func (x *GetChildrenContextsByContextRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80] +func (x *GetContextsByTypeRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4659,50 +5093,67 @@ func (x *GetChildrenContextsByContextRequest) ProtoReflect() protoreflect.Messag return mi.MessageOf(x) } -// Deprecated: Use GetChildrenContextsByContextRequest.ProtoReflect.Descriptor instead. -func (*GetChildrenContextsByContextRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{80} +// Deprecated: Use GetContextsByTypeRequest.ProtoReflect.Descriptor instead. +func (*GetContextsByTypeRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{82} } -func (x *GetChildrenContextsByContextRequest) GetContextId() int64 { - if x != nil && x.ContextId != nil { - return *x.ContextId +func (x *GetContextsByTypeRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName } - return 0 + return "" } -func (x *GetChildrenContextsByContextRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsByTypeRequest) GetOptions() *ListOperationOptions { + if x != nil { + return x.Options + } + return nil +} + +func (x *GetContextsByTypeRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion + } + return "" +} + +func (x *GetContextsByTypeRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetChildrenContextsByContextResponse struct { +type GetContextsByTypeResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` + // Token to use to retrieve next page of results if list options are used in + // the request. + NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` } -func (x *GetChildrenContextsByContextResponse) Reset() { - *x = GetChildrenContextsByContextResponse{} +func (x *GetContextsByTypeResponse) Reset() { + *x = GetContextsByTypeResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetChildrenContextsByContextResponse) String() string { +func (x *GetContextsByTypeResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetChildrenContextsByContextResponse) ProtoMessage() {} +func (*GetContextsByTypeResponse) ProtoMessage() {} -func (x *GetChildrenContextsByContextResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81] +func (x *GetContextsByTypeResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4713,50 +5164,56 @@ func (x *GetChildrenContextsByContextResponse) ProtoReflect() protoreflect.Messa return mi.MessageOf(x) } -// Deprecated: Use GetChildrenContextsByContextResponse.ProtoReflect.Descriptor instead. -func (*GetChildrenContextsByContextResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{81} +// Deprecated: Use GetContextsByTypeResponse.ProtoReflect.Descriptor instead. +func (*GetContextsByTypeResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{83} } -func (x *GetChildrenContextsByContextResponse) GetContexts() []*Context { +func (x *GetContextsByTypeResponse) GetContexts() []*Context { if x != nil { return x.Contexts } return nil } -type GetArtifactsByContextRequest struct { +func (x *GetContextsByTypeResponse) GetNextPageToken() string { + if x != nil && x.NextPageToken != nil { + return *x.NextPageToken + } + return "" +} + +type GetContextByTypeAndNameRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` - // Specify List options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. - Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + TypeName *string `protobuf:"bytes,1,opt,name=type_name,json=typeName" json:"type_name,omitempty"` + // If not set, it looks for the type with type_name and context_name with + // default type_version. + TypeVersion *string `protobuf:"bytes,3,opt,name=type_version,json=typeVersion" json:"type_version,omitempty"` + ContextName *string `protobuf:"bytes,2,opt,name=context_name,json=contextName" json:"context_name,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,4,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetArtifactsByContextRequest) Reset() { - *x = GetArtifactsByContextRequest{} +func (x *GetContextByTypeAndNameRequest) Reset() { + *x = GetContextByTypeAndNameRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetArtifactsByContextRequest) String() string { +func (x *GetContextByTypeAndNameRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetArtifactsByContextRequest) ProtoMessage() {} +func (*GetContextByTypeAndNameRequest) ProtoMessage() {} -func (x *GetArtifactsByContextRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82] +func (x *GetContextByTypeAndNameRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4767,60 +5224,64 @@ func (x *GetArtifactsByContextRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetArtifactsByContextRequest.ProtoReflect.Descriptor instead. -func (*GetArtifactsByContextRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{82} +// Deprecated: Use GetContextByTypeAndNameRequest.ProtoReflect.Descriptor instead. +func (*GetContextByTypeAndNameRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{84} } -func (x *GetArtifactsByContextRequest) GetContextId() int64 { - if x != nil && x.ContextId != nil { - return *x.ContextId +func (x *GetContextByTypeAndNameRequest) GetTypeName() string { + if x != nil && x.TypeName != nil { + return *x.TypeName } - return 0 + return "" } -func (x *GetArtifactsByContextRequest) GetOptions() *ListOperationOptions { - if x != nil { - return x.Options +func (x *GetContextByTypeAndNameRequest) GetTypeVersion() string { + if x != nil && x.TypeVersion != nil { + return *x.TypeVersion } - return nil + return "" } -func (x *GetArtifactsByContextRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextByTypeAndNameRequest) GetContextName() string { + if x != nil && x.ContextName != nil { + return *x.ContextName + } + return "" +} + +func (x *GetContextByTypeAndNameRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetArtifactsByContextResponse struct { +type GetContextByTypeAndNameResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Artifacts []*Artifact `protobuf:"bytes,1,rep,name=artifacts" json:"artifacts,omitempty"` - // Token to use to retrieve next page of results if list options are used in - // the request. - NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` + Context *Context `protobuf:"bytes,1,opt,name=context" json:"context,omitempty"` } -func (x *GetArtifactsByContextResponse) Reset() { - *x = GetArtifactsByContextResponse{} +func (x *GetContextByTypeAndNameResponse) Reset() { + *x = GetContextByTypeAndNameResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetArtifactsByContextResponse) String() string { +func (x *GetContextByTypeAndNameResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetArtifactsByContextResponse) ProtoMessage() {} +func (*GetContextByTypeAndNameResponse) ProtoMessage() {} -func (x *GetArtifactsByContextResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83] +func (x *GetContextByTypeAndNameResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4831,57 +5292,46 @@ func (x *GetArtifactsByContextResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetArtifactsByContextResponse.ProtoReflect.Descriptor instead. -func (*GetArtifactsByContextResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{83} +// Deprecated: Use GetContextByTypeAndNameResponse.ProtoReflect.Descriptor instead. +func (*GetContextByTypeAndNameResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{85} } -func (x *GetArtifactsByContextResponse) GetArtifacts() []*Artifact { +func (x *GetContextByTypeAndNameResponse) GetContext() *Context { if x != nil { - return x.Artifacts + return x.Context } return nil } -func (x *GetArtifactsByContextResponse) GetNextPageToken() string { - if x != nil && x.NextPageToken != nil { - return *x.NextPageToken - } - return "" -} - -type GetExecutionsByContextRequest struct { +type GetContextsByIDRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` - // Specify List options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. - Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + // A list of context ids to retrieve. + ContextIds []int64 `protobuf:"varint,1,rep,name=context_ids,json=contextIds" json:"context_ids,omitempty"` // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetExecutionsByContextRequest) Reset() { - *x = GetExecutionsByContextRequest{} +func (x *GetContextsByIDRequest) Reset() { + *x = GetContextsByIDRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByContextRequest) String() string { +func (x *GetContextsByIDRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByContextRequest) ProtoMessage() {} +func (*GetContextsByIDRequest) ProtoMessage() {} -func (x *GetExecutionsByContextRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84] +func (x *GetContextsByIDRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4892,62 +5342,52 @@ func (x *GetExecutionsByContextRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByContextRequest.ProtoReflect.Descriptor instead. -func (*GetExecutionsByContextRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{84} +// Deprecated: Use GetContextsByIDRequest.ProtoReflect.Descriptor instead. +func (*GetContextsByIDRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{86} } -func (x *GetExecutionsByContextRequest) GetContextId() int64 { - if x != nil && x.ContextId != nil { - return *x.ContextId +func (x *GetContextsByIDRequest) GetContextIds() []int64 { + if x != nil { + return x.ContextIds } - return 0 + return nil } -func (x *GetExecutionsByContextRequest) GetOptions() *ListOperationOptions { - if x != nil { - return x.Options - } - return nil -} - -func (x *GetExecutionsByContextRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsByIDRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -type GetExecutionsByContextResponse struct { +type GetContextsByIDResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` - // Token to use to retrieve next page of results if list options are used in - // the request. - NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` - // Options regarding transactions. - TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` + // The result is not index-aligned: if an id is not found, it is not + // returned. + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` } -func (x *GetExecutionsByContextResponse) Reset() { - *x = GetExecutionsByContextResponse{} +func (x *GetContextsByIDResponse) Reset() { + *x = GetContextsByIDResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetExecutionsByContextResponse) String() string { +func (x *GetContextsByIDResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetExecutionsByContextResponse) ProtoMessage() {} +func (*GetContextsByIDResponse) ProtoMessage() {} -func (x *GetExecutionsByContextResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85] +func (x *GetContextsByIDResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -4958,61 +5398,45 @@ func (x *GetExecutionsByContextResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetExecutionsByContextResponse.ProtoReflect.Descriptor instead. -func (*GetExecutionsByContextResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{85} -} - -func (x *GetExecutionsByContextResponse) GetExecutions() []*Execution { - if x != nil { - return x.Executions - } - return nil -} - -func (x *GetExecutionsByContextResponse) GetNextPageToken() string { - if x != nil && x.NextPageToken != nil { - return *x.NextPageToken - } - return "" +// Deprecated: Use GetContextsByIDResponse.ProtoReflect.Descriptor instead. +func (*GetContextsByIDResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{87} } -func (x *GetExecutionsByContextResponse) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsByIDResponse) GetContexts() []*Context { if x != nil { - return x.TransactionOptions + return x.Contexts } return nil } -// A lineage query request to specify the query nodes of interests and the -// boundary conditions for the returned graph. -type GetLineageGraphRequest struct { +type GetContextsByArtifactRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Options *LineageGraphQueryOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` + ArtifactId *int64 `protobuf:"varint,1,opt,name=artifact_id,json=artifactId" json:"artifact_id,omitempty"` // Options regarding transactions. TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *GetLineageGraphRequest) Reset() { - *x = GetLineageGraphRequest{} +func (x *GetContextsByArtifactRequest) Reset() { + *x = GetContextsByArtifactRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[88] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetLineageGraphRequest) String() string { +func (x *GetContextsByArtifactRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetLineageGraphRequest) ProtoMessage() {} +func (*GetContextsByArtifactRequest) ProtoMessage() {} -func (x *GetLineageGraphRequest) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86] +func (x *GetContextsByArtifactRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[88] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -5023,52 +5447,50 @@ func (x *GetLineageGraphRequest) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetLineageGraphRequest.ProtoReflect.Descriptor instead. -func (*GetLineageGraphRequest) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{86} +// Deprecated: Use GetContextsByArtifactRequest.ProtoReflect.Descriptor instead. +func (*GetContextsByArtifactRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{88} } -func (x *GetLineageGraphRequest) GetOptions() *LineageGraphQueryOptions { - if x != nil { - return x.Options +func (x *GetContextsByArtifactRequest) GetArtifactId() int64 { + if x != nil && x.ArtifactId != nil { + return *x.ArtifactId } - return nil + return 0 } -func (x *GetLineageGraphRequest) GetTransactionOptions() *TransactionOptions { +func (x *GetContextsByArtifactRequest) GetTransactionOptions() *TransactionOptions { if x != nil { return x.TransactionOptions } return nil } -// A connected lineage `subgraph` about the MLMD nodes derived from -// LineageGraphRequest.query_conditions. -type GetLineageGraphResponse struct { +type GetContextsByArtifactResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Subgraph *LineageGraph `protobuf:"bytes,1,opt,name=subgraph" json:"subgraph,omitempty"` + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` } -func (x *GetLineageGraphResponse) Reset() { - *x = GetLineageGraphResponse{} +func (x *GetContextsByArtifactResponse) Reset() { + *x = GetContextsByArtifactResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetLineageGraphResponse) String() string { +func (x *GetContextsByArtifactResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetLineageGraphResponse) ProtoMessage() {} +func (*GetContextsByArtifactResponse) ProtoMessage() {} -func (x *GetLineageGraphResponse) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87] +func (x *GetContextsByArtifactResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -5079,54 +5501,45 @@ func (x *GetLineageGraphResponse) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GetLineageGraphResponse.ProtoReflect.Descriptor instead. -func (*GetLineageGraphResponse) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{87} +// Deprecated: Use GetContextsByArtifactResponse.ProtoReflect.Descriptor instead. +func (*GetContextsByArtifactResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{89} } -func (x *GetLineageGraphResponse) GetSubgraph() *LineageGraph { +func (x *GetContextsByArtifactResponse) GetContexts() []*Context { if x != nil { - return x.Subgraph + return x.Contexts } return nil } -type PutArtifactsRequest_Options struct { +type GetContextsByExecutionRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // When there are multiple writers to update an existing node to - // different states, there may be a race and the end result of the - // concurrent update is nondeterministic. If the field is set, then an - // optimistic concurrency control (OCC) scheme is used during update: - // it compares the `artifact`.`last_update_time_since_epoch` in the request - // with the stored `last_update_time_since_epoch` having the same - // `artifact`.`id`. If they are different, the request fails, and the user - // can read the stored node and retry node update. - // When the option is set, the timestamp after update is guaranteed to be - // increased and different from the input artifact. - // When set the option, the caller should set it for all concurrent writers. - AbortIfLatestUpdatedTimeChanged *bool `protobuf:"varint,1,opt,name=abort_if_latest_updated_time_changed,json=abortIfLatestUpdatedTimeChanged" json:"abort_if_latest_updated_time_changed,omitempty"` + ExecutionId *int64 `protobuf:"varint,1,opt,name=execution_id,json=executionId" json:"execution_id,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *PutArtifactsRequest_Options) Reset() { - *x = PutArtifactsRequest_Options{} +func (x *GetContextsByExecutionRequest) Reset() { + *x = GetContextsByExecutionRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *PutArtifactsRequest_Options) String() string { +func (x *GetContextsByExecutionRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*PutArtifactsRequest_Options) ProtoMessage() {} +func (*GetContextsByExecutionRequest) ProtoMessage() {} -func (x *PutArtifactsRequest_Options) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89] +func (x *GetContextsByExecutionRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -5137,53 +5550,50 @@ func (x *PutArtifactsRequest_Options) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use PutArtifactsRequest_Options.ProtoReflect.Descriptor instead. -func (*PutArtifactsRequest_Options) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{4, 0} +// Deprecated: Use GetContextsByExecutionRequest.ProtoReflect.Descriptor instead. +func (*GetContextsByExecutionRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{90} } -func (x *PutArtifactsRequest_Options) GetAbortIfLatestUpdatedTimeChanged() bool { - if x != nil && x.AbortIfLatestUpdatedTimeChanged != nil { - return *x.AbortIfLatestUpdatedTimeChanged +func (x *GetContextsByExecutionRequest) GetExecutionId() int64 { + if x != nil && x.ExecutionId != nil { + return *x.ExecutionId } - return false + return 0 } -// A pair of an artifact and an event used or generated by an execution, e.g., -// during the execution run, it uses none or many artifacts as input, and -// generate none or many artifacts as output. -type PutExecutionRequest_ArtifactAndEvent struct { +func (x *GetContextsByExecutionRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetContextsByExecutionResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // The pair may have an artifact. If present and no artifact.id is given, - // then it inserts the artifact, otherwise it updates the artifact. - Artifact *Artifact `protobuf:"bytes,1,opt,name=artifact" json:"artifact,omitempty"` - // The pair may have an event. Providing event.artifact_id or - // event.execution_id is optional. If the ids are given, it must align with - // the `artifact`.id / `execution`.id respectively. If artifact is not - // given and event.artifact_id is set, it must exist in the backend. - Event *Event `protobuf:"bytes,2,opt,name=event" json:"event,omitempty"` + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` } -func (x *PutExecutionRequest_ArtifactAndEvent) Reset() { - *x = PutExecutionRequest_ArtifactAndEvent{} +func (x *GetContextsByExecutionResponse) Reset() { + *x = GetContextsByExecutionResponse{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *PutExecutionRequest_ArtifactAndEvent) String() string { +func (x *GetContextsByExecutionResponse) String() string { return protoimpl.X.MessageStringOf(x) } -func (*PutExecutionRequest_ArtifactAndEvent) ProtoMessage() {} +func (*GetContextsByExecutionResponse) ProtoMessage() {} -func (x *PutExecutionRequest_ArtifactAndEvent) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90] +func (x *GetContextsByExecutionResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -5194,54 +5604,45 @@ func (x *PutExecutionRequest_ArtifactAndEvent) ProtoReflect() protoreflect.Messa return mi.MessageOf(x) } -// Deprecated: Use PutExecutionRequest_ArtifactAndEvent.ProtoReflect.Descriptor instead. -func (*PutExecutionRequest_ArtifactAndEvent) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{14, 0} -} - -func (x *PutExecutionRequest_ArtifactAndEvent) GetArtifact() *Artifact { - if x != nil { - return x.Artifact - } - return nil +// Deprecated: Use GetContextsByExecutionResponse.ProtoReflect.Descriptor instead. +func (*GetContextsByExecutionResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{91} } -func (x *PutExecutionRequest_ArtifactAndEvent) GetEvent() *Event { +func (x *GetContextsByExecutionResponse) GetContexts() []*Context { if x != nil { - return x.Event + return x.Contexts } return nil } -type PutExecutionRequest_Options struct { +type GetParentContextsByContextRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // When there's a race to publish executions with a new context with the - // same context.name, by default there'll be one writer succeeds and - // the rest of the writers returning AlreadyExists errors. If set the field, - // the failed writer will reuse the stored context in the transaction. - ReuseContextIfAlreadyExist *bool `protobuf:"varint,1,opt,name=reuse_context_if_already_exist,json=reuseContextIfAlreadyExist" json:"reuse_context_if_already_exist,omitempty"` + ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` } -func (x *PutExecutionRequest_Options) Reset() { - *x = PutExecutionRequest_Options{} +func (x *GetParentContextsByContextRequest) Reset() { + *x = GetParentContextsByContextRequest{} if protoimpl.UnsafeEnabled { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91] + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[92] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *PutExecutionRequest_Options) String() string { +func (x *GetParentContextsByContextRequest) String() string { return protoimpl.X.MessageStringOf(x) } -func (*PutExecutionRequest_Options) ProtoMessage() {} +func (*GetParentContextsByContextRequest) ProtoMessage() {} -func (x *PutExecutionRequest_Options) ProtoReflect() protoreflect.Message { - mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91] +func (x *GetParentContextsByContextRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[92] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -5252,314 +5653,1736 @@ func (x *PutExecutionRequest_Options) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use PutExecutionRequest_Options.ProtoReflect.Descriptor instead. -func (*PutExecutionRequest_Options) Descriptor() ([]byte, []int) { - return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{14, 1} +// Deprecated: Use GetParentContextsByContextRequest.ProtoReflect.Descriptor instead. +func (*GetParentContextsByContextRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{92} } -func (x *PutExecutionRequest_Options) GetReuseContextIfAlreadyExist() bool { - if x != nil && x.ReuseContextIfAlreadyExist != nil { - return *x.ReuseContextIfAlreadyExist +func (x *GetParentContextsByContextRequest) GetContextId() int64 { + if x != nil && x.ContextId != nil { + return *x.ContextId } - return false + return 0 } -var File_ml_metadata_proto_metadata_store_service_proto protoreflect.FileDescriptor +func (x *GetParentContextsByContextRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} -var file_ml_metadata_proto_metadata_store_service_proto_rawDesc = []byte{ - 0x0a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x73, 0x74, 0x6f, - 0x72, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x12, 0x0b, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x26, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x73, 0x0a, 0x0f, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x41, 0x6e, 0x64, 0x54, 0x79, 0x70, 0x65, 0x12, 0x31, 0x0a, 0x08, 0x61, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x2d, 0x0a, 0x04, 0x74, - 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0xbf, 0x01, 0x0a, 0x11, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4d, 0x61, 0x70, - 0x12, 0x4e, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, - 0x74, 0x4d, 0x61, 0x70, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, - 0x1a, 0x5a, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, - 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x4d, 0x0a, 0x12, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4c, 0x69, - 0x73, 0x74, 0x12, 0x37, 0x0a, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, - 0x74, 0x52, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xc0, 0x01, 0x0a, 0x0e, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x12, 0x3a, - 0x0a, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, - 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x32, 0x0a, 0x03, 0x6d, 0x61, - 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, - 0x72, 0x75, 0x63, 0x74, 0x4d, 0x61, 0x70, 0x48, 0x00, 0x52, 0x03, 0x6d, 0x61, 0x70, 0x12, 0x35, - 0x0a, 0x04, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, +type GetParentContextsByContextResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` +} + +func (x *GetParentContextsByContextResponse) Reset() { + *x = GetParentContextsByContextResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[93] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetParentContextsByContextResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetParentContextsByContextResponse) ProtoMessage() {} + +func (x *GetParentContextsByContextResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[93] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetParentContextsByContextResponse.ProtoReflect.Descriptor instead. +func (*GetParentContextsByContextResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{93} +} + +func (x *GetParentContextsByContextResponse) GetContexts() []*Context { + if x != nil { + return x.Contexts + } + return nil +} + +type GetChildrenContextsByContextRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetChildrenContextsByContextRequest) Reset() { + *x = GetChildrenContextsByContextRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[94] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetChildrenContextsByContextRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetChildrenContextsByContextRequest) ProtoMessage() {} + +func (x *GetChildrenContextsByContextRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[94] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetChildrenContextsByContextRequest.ProtoReflect.Descriptor instead. +func (*GetChildrenContextsByContextRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{94} +} + +func (x *GetChildrenContextsByContextRequest) GetContextId() int64 { + if x != nil && x.ContextId != nil { + return *x.ContextId + } + return 0 +} + +func (x *GetChildrenContextsByContextRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetChildrenContextsByContextResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Contexts []*Context `protobuf:"bytes,1,rep,name=contexts" json:"contexts,omitempty"` +} + +func (x *GetChildrenContextsByContextResponse) Reset() { + *x = GetChildrenContextsByContextResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[95] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetChildrenContextsByContextResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetChildrenContextsByContextResponse) ProtoMessage() {} + +func (x *GetChildrenContextsByContextResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[95] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetChildrenContextsByContextResponse.ProtoReflect.Descriptor instead. +func (*GetChildrenContextsByContextResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{95} +} + +func (x *GetChildrenContextsByContextResponse) GetContexts() []*Context { + if x != nil { + return x.Contexts + } + return nil +} + +type GetParentContextsByContextsRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ContextIds []int64 `protobuf:"varint,1,rep,packed,name=context_ids,json=contextIds" json:"context_ids,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetParentContextsByContextsRequest) Reset() { + *x = GetParentContextsByContextsRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[96] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetParentContextsByContextsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetParentContextsByContextsRequest) ProtoMessage() {} + +func (x *GetParentContextsByContextsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[96] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetParentContextsByContextsRequest.ProtoReflect.Descriptor instead. +func (*GetParentContextsByContextsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{96} +} + +func (x *GetParentContextsByContextsRequest) GetContextIds() []int64 { + if x != nil { + return x.ContextIds + } + return nil +} + +func (x *GetParentContextsByContextsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetParentContextsByContextsResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Contexts map[int64]*GetParentContextsByContextsResponse_ParentContextsPerChild `protobuf:"bytes,2,rep,name=contexts" json:"contexts,omitempty" protobuf_key:"varint,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` +} + +func (x *GetParentContextsByContextsResponse) Reset() { + *x = GetParentContextsByContextsResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[97] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetParentContextsByContextsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetParentContextsByContextsResponse) ProtoMessage() {} + +func (x *GetParentContextsByContextsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[97] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetParentContextsByContextsResponse.ProtoReflect.Descriptor instead. +func (*GetParentContextsByContextsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{97} +} + +func (x *GetParentContextsByContextsResponse) GetContexts() map[int64]*GetParentContextsByContextsResponse_ParentContextsPerChild { + if x != nil { + return x.Contexts + } + return nil +} + +type GetChildrenContextsByContextsRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ContextIds []int64 `protobuf:"varint,1,rep,packed,name=context_ids,json=contextIds" json:"context_ids,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetChildrenContextsByContextsRequest) Reset() { + *x = GetChildrenContextsByContextsRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[98] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetChildrenContextsByContextsRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetChildrenContextsByContextsRequest) ProtoMessage() {} + +func (x *GetChildrenContextsByContextsRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[98] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetChildrenContextsByContextsRequest.ProtoReflect.Descriptor instead. +func (*GetChildrenContextsByContextsRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{98} +} + +func (x *GetChildrenContextsByContextsRequest) GetContextIds() []int64 { + if x != nil { + return x.ContextIds + } + return nil +} + +func (x *GetChildrenContextsByContextsRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetChildrenContextsByContextsResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Contexts map[int64]*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent `protobuf:"bytes,2,rep,name=contexts" json:"contexts,omitempty" protobuf_key:"varint,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` +} + +func (x *GetChildrenContextsByContextsResponse) Reset() { + *x = GetChildrenContextsByContextsResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[99] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetChildrenContextsByContextsResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetChildrenContextsByContextsResponse) ProtoMessage() {} + +func (x *GetChildrenContextsByContextsResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[99] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetChildrenContextsByContextsResponse.ProtoReflect.Descriptor instead. +func (*GetChildrenContextsByContextsResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{99} +} + +func (x *GetChildrenContextsByContextsResponse) GetContexts() map[int64]*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent { + if x != nil { + return x.Contexts + } + return nil +} + +type GetArtifactsByContextRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + // Specify List options. + // Currently supports: + // 1. Field to order the results. + // 2. Page size. + Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetArtifactsByContextRequest) Reset() { + *x = GetArtifactsByContextRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[100] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetArtifactsByContextRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetArtifactsByContextRequest) ProtoMessage() {} + +func (x *GetArtifactsByContextRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[100] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetArtifactsByContextRequest.ProtoReflect.Descriptor instead. +func (*GetArtifactsByContextRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{100} +} + +func (x *GetArtifactsByContextRequest) GetContextId() int64 { + if x != nil && x.ContextId != nil { + return *x.ContextId + } + return 0 +} + +func (x *GetArtifactsByContextRequest) GetOptions() *ListOperationOptions { + if x != nil { + return x.Options + } + return nil +} + +func (x *GetArtifactsByContextRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetArtifactsByContextResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Artifacts []*Artifact `protobuf:"bytes,1,rep,name=artifacts" json:"artifacts,omitempty"` + // Token to use to retrieve next page of results if list options are used in + // the request. + NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` +} + +func (x *GetArtifactsByContextResponse) Reset() { + *x = GetArtifactsByContextResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[101] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetArtifactsByContextResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetArtifactsByContextResponse) ProtoMessage() {} + +func (x *GetArtifactsByContextResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[101] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetArtifactsByContextResponse.ProtoReflect.Descriptor instead. +func (*GetArtifactsByContextResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{101} +} + +func (x *GetArtifactsByContextResponse) GetArtifacts() []*Artifact { + if x != nil { + return x.Artifacts + } + return nil +} + +func (x *GetArtifactsByContextResponse) GetNextPageToken() string { + if x != nil && x.NextPageToken != nil { + return *x.NextPageToken + } + return "" +} + +type GetExecutionsByContextRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ContextId *int64 `protobuf:"varint,1,opt,name=context_id,json=contextId" json:"context_id,omitempty"` + // Specify List options. + // Currently supports: + // 1. Field to order the results. + // 2. Page size. + Options *ListOperationOptions `protobuf:"bytes,2,opt,name=options" json:"options,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetExecutionsByContextRequest) Reset() { + *x = GetExecutionsByContextRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[102] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetExecutionsByContextRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetExecutionsByContextRequest) ProtoMessage() {} + +func (x *GetExecutionsByContextRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[102] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetExecutionsByContextRequest.ProtoReflect.Descriptor instead. +func (*GetExecutionsByContextRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{102} +} + +func (x *GetExecutionsByContextRequest) GetContextId() int64 { + if x != nil && x.ContextId != nil { + return *x.ContextId + } + return 0 +} + +func (x *GetExecutionsByContextRequest) GetOptions() *ListOperationOptions { + if x != nil { + return x.Options + } + return nil +} + +func (x *GetExecutionsByContextRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetExecutionsByContextResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Executions []*Execution `protobuf:"bytes,1,rep,name=executions" json:"executions,omitempty"` + // Token to use to retrieve next page of results if list options are used in + // the request. + NextPageToken *string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken" json:"next_page_token,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,3,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetExecutionsByContextResponse) Reset() { + *x = GetExecutionsByContextResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[103] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetExecutionsByContextResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetExecutionsByContextResponse) ProtoMessage() {} + +func (x *GetExecutionsByContextResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[103] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetExecutionsByContextResponse.ProtoReflect.Descriptor instead. +func (*GetExecutionsByContextResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{103} +} + +func (x *GetExecutionsByContextResponse) GetExecutions() []*Execution { + if x != nil { + return x.Executions + } + return nil +} + +func (x *GetExecutionsByContextResponse) GetNextPageToken() string { + if x != nil && x.NextPageToken != nil { + return *x.NextPageToken + } + return "" +} + +func (x *GetExecutionsByContextResponse) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +// TODO(b/283852485): Deprecate GetLineageGraph API after migration to +// GetLineageSubgraph API. +// A lineage query request to specify the query nodes of interest and the +// boundary conditions for pruning the returned graph. +type GetLineageGraphRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Options *LineageGraphQueryOptions `protobuf:"bytes,1,opt,name=options" json:"options,omitempty"` + // Options regarding transactions. + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetLineageGraphRequest) Reset() { + *x = GetLineageGraphRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[104] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetLineageGraphRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetLineageGraphRequest) ProtoMessage() {} + +func (x *GetLineageGraphRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[104] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetLineageGraphRequest.ProtoReflect.Descriptor instead. +func (*GetLineageGraphRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{104} +} + +func (x *GetLineageGraphRequest) GetOptions() *LineageGraphQueryOptions { + if x != nil { + return x.Options + } + return nil +} + +func (x *GetLineageGraphRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +// A connected lineage `subgraph` about the MLMD nodes derived from +// LineageGraphRequest.query_conditions. +type GetLineageGraphResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Subgraph *LineageGraph `protobuf:"bytes,1,opt,name=subgraph" json:"subgraph,omitempty"` +} + +func (x *GetLineageGraphResponse) Reset() { + *x = GetLineageGraphResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[105] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetLineageGraphResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetLineageGraphResponse) ProtoMessage() {} + +func (x *GetLineageGraphResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[105] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetLineageGraphResponse.ProtoReflect.Descriptor instead. +func (*GetLineageGraphResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{105} +} + +func (x *GetLineageGraphResponse) GetSubgraph() *LineageGraph { + if x != nil { + return x.Subgraph + } + return nil +} + +type GetLineageSubgraphRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Query options for lineage graph tracing from a list of interested + // nodes. + // A lineage subgraph without node details (e.g., external_id, properties) + // will be returned. Please refer to `LineageSubgraphQueryOptions` for more + // details. + LineageSubgraphQueryOptions *LineageSubgraphQueryOptions `protobuf:"bytes,1,opt,name=lineage_subgraph_query_options,json=lineageSubgraphQueryOptions" json:"lineage_subgraph_query_options,omitempty"` + // `read_mask` contains user specified paths of fields that should be included + // in the returned lineage subgraph. + // Supported field paths are: 'artifacts', 'executions', 'contexts', + // 'artifact_types', 'execution_types', 'context_types', and 'events'. + // TODO(b/283852485): Include 'associations' or 'attributions' in the + // returned graph. + // If 'artifacts', 'executions', or 'contexts' is specified in `read_mask`, + // the dehydrated nodes will be included. + // Note: A dehydrated node means a node containing only its id and no + // other information. User should call GetNodesByID or other APIs to get + // node details later on. + // If 'artifact_types', 'execution_types', or 'context_types' is specified + // in `read_mask`, all the node types will be included. + // If 'events' is specified in `read_mask`, the events will be included. + // If `read_mask` is not set, the API will return all the fields in + // the returned graph. + // Note: Only paths of fields in LineageGraph message are supported. Paths + // of fields in the submessage, such as "artifacts.id", "contexts.name" are + // not acknowledged. + ReadMask *fieldmaskpb.FieldMask `protobuf:"bytes,3,opt,name=read_mask,json=readMask" json:"read_mask,omitempty"` + TransactionOptions *TransactionOptions `protobuf:"bytes,2,opt,name=transaction_options,json=transactionOptions" json:"transaction_options,omitempty"` +} + +func (x *GetLineageSubgraphRequest) Reset() { + *x = GetLineageSubgraphRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[106] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetLineageSubgraphRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetLineageSubgraphRequest) ProtoMessage() {} + +func (x *GetLineageSubgraphRequest) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[106] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetLineageSubgraphRequest.ProtoReflect.Descriptor instead. +func (*GetLineageSubgraphRequest) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{106} +} + +func (x *GetLineageSubgraphRequest) GetLineageSubgraphQueryOptions() *LineageSubgraphQueryOptions { + if x != nil { + return x.LineageSubgraphQueryOptions + } + return nil +} + +func (x *GetLineageSubgraphRequest) GetReadMask() *fieldmaskpb.FieldMask { + if x != nil { + return x.ReadMask + } + return nil +} + +func (x *GetLineageSubgraphRequest) GetTransactionOptions() *TransactionOptions { + if x != nil { + return x.TransactionOptions + } + return nil +} + +type GetLineageSubgraphResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // A lineage subgraph of MLMD nodes and relations retrieved from lineage + // graph tracing. + LineageSubgraph *LineageGraph `protobuf:"bytes,1,opt,name=lineage_subgraph,json=lineageSubgraph" json:"lineage_subgraph,omitempty"` +} + +func (x *GetLineageSubgraphResponse) Reset() { + *x = GetLineageSubgraphResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[107] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetLineageSubgraphResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetLineageSubgraphResponse) ProtoMessage() {} + +func (x *GetLineageSubgraphResponse) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[107] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetLineageSubgraphResponse.ProtoReflect.Descriptor instead. +func (*GetLineageSubgraphResponse) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{107} +} + +func (x *GetLineageSubgraphResponse) GetLineageSubgraph() *LineageGraph { + if x != nil { + return x.LineageSubgraph + } + return nil +} + +type PutArtifactsRequest_Options struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // When there are multiple writers to update an existing node to + // different states, there may be a race and the end result of the + // concurrent update is nondeterministic. If the field is set, then an + // optimistic concurrency control (OCC) scheme is used during update: + // it compares the `artifact`.`last_update_time_since_epoch` in the request + // with the stored `last_update_time_since_epoch` having the same + // `artifact`.`id`. If they are different, the request fails, and the user + // can read the stored node and retry node update. + // When the option is set, the timestamp after update is guaranteed to be + // increased and different from the input artifact. + // When set the option, the caller should set it for all concurrent writers. + AbortIfLatestUpdatedTimeChanged *bool `protobuf:"varint,1,opt,name=abort_if_latest_updated_time_changed,json=abortIfLatestUpdatedTimeChanged" json:"abort_if_latest_updated_time_changed,omitempty"` +} + +func (x *PutArtifactsRequest_Options) Reset() { + *x = PutArtifactsRequest_Options{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[109] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutArtifactsRequest_Options) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutArtifactsRequest_Options) ProtoMessage() {} + +func (x *PutArtifactsRequest_Options) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[109] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutArtifactsRequest_Options.ProtoReflect.Descriptor instead. +func (*PutArtifactsRequest_Options) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{4, 0} +} + +func (x *PutArtifactsRequest_Options) GetAbortIfLatestUpdatedTimeChanged() bool { + if x != nil && x.AbortIfLatestUpdatedTimeChanged != nil { + return *x.AbortIfLatestUpdatedTimeChanged + } + return false +} + +// A pair of an artifact and an event used or generated by an execution, e.g., +// during the execution run, it uses none or many artifacts as input, and +// generate none or many artifacts as output. +type PutExecutionRequest_ArtifactAndEvent struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The pair may have an artifact. If present and no artifact.id is given, + // then it inserts the artifact, otherwise it updates the artifact. + Artifact *Artifact `protobuf:"bytes,1,opt,name=artifact" json:"artifact,omitempty"` + // The pair may have an event. Providing event.artifact_id or + // event.execution_id is optional. If the ids are given, it must align with + // the `artifact`.id / `execution`.id respectively. If artifact is not + // given and event.artifact_id is set, it must exist in the backend. + Event *Event `protobuf:"bytes,2,opt,name=event" json:"event,omitempty"` +} + +func (x *PutExecutionRequest_ArtifactAndEvent) Reset() { + *x = PutExecutionRequest_ArtifactAndEvent{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[110] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutExecutionRequest_ArtifactAndEvent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutExecutionRequest_ArtifactAndEvent) ProtoMessage() {} + +func (x *PutExecutionRequest_ArtifactAndEvent) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[110] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutExecutionRequest_ArtifactAndEvent.ProtoReflect.Descriptor instead. +func (*PutExecutionRequest_ArtifactAndEvent) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{14, 0} +} + +func (x *PutExecutionRequest_ArtifactAndEvent) GetArtifact() *Artifact { + if x != nil { + return x.Artifact + } + return nil +} + +func (x *PutExecutionRequest_ArtifactAndEvent) GetEvent() *Event { + if x != nil { + return x.Event + } + return nil +} + +type PutExecutionRequest_Options struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // When there's a race to publish executions with a new context with the + // same context.name, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. If set to true, + // the API will reuse the stored context in the transaction and perform + // an update. + ReuseContextIfAlreadyExist *bool `protobuf:"varint,1,opt,name=reuse_context_if_already_exist,json=reuseContextIfAlreadyExist" json:"reuse_context_if_already_exist,omitempty"` + // When there's a race to publish executions with a new artifact with the + // same artifact.external_id, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. + // If set to true and an Artifact has non-empty external_id, + // the API will reuse the stored artifact in the transaction and + // perform an update. Otherwise, it will fall back to relying on `id` field + // to decide if it's update (if `id` exists) or insert (if `id` is empty). + ReuseArtifactIfAlreadyExistByExternalId *bool `protobuf:"varint,2,opt,name=reuse_artifact_if_already_exist_by_external_id,json=reuseArtifactIfAlreadyExistByExternalId" json:"reuse_artifact_if_already_exist_by_external_id,omitempty"` +} + +func (x *PutExecutionRequest_Options) Reset() { + *x = PutExecutionRequest_Options{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[111] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutExecutionRequest_Options) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutExecutionRequest_Options) ProtoMessage() {} + +func (x *PutExecutionRequest_Options) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[111] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutExecutionRequest_Options.ProtoReflect.Descriptor instead. +func (*PutExecutionRequest_Options) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{14, 1} +} + +func (x *PutExecutionRequest_Options) GetReuseContextIfAlreadyExist() bool { + if x != nil && x.ReuseContextIfAlreadyExist != nil { + return *x.ReuseContextIfAlreadyExist + } + return false +} + +func (x *PutExecutionRequest_Options) GetReuseArtifactIfAlreadyExistByExternalId() bool { + if x != nil && x.ReuseArtifactIfAlreadyExistByExternalId != nil { + return *x.ReuseArtifactIfAlreadyExistByExternalId + } + return false +} + +type PutLineageSubgraphRequest_EventEdge struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Index in the array of executions. + ExecutionIndex *int32 `protobuf:"varint,1,opt,name=execution_index,json=executionIndex" json:"execution_index,omitempty"` + // Index in the array of artifacts. + ArtifactIndex *int32 `protobuf:"varint,2,opt,name=artifact_index,json=artifactIndex" json:"artifact_index,omitempty"` + Event *Event `protobuf:"bytes,3,opt,name=event" json:"event,omitempty"` +} + +func (x *PutLineageSubgraphRequest_EventEdge) Reset() { + *x = PutLineageSubgraphRequest_EventEdge{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[112] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutLineageSubgraphRequest_EventEdge) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutLineageSubgraphRequest_EventEdge) ProtoMessage() {} + +func (x *PutLineageSubgraphRequest_EventEdge) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[112] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutLineageSubgraphRequest_EventEdge.ProtoReflect.Descriptor instead. +func (*PutLineageSubgraphRequest_EventEdge) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{16, 0} +} + +func (x *PutLineageSubgraphRequest_EventEdge) GetExecutionIndex() int32 { + if x != nil && x.ExecutionIndex != nil { + return *x.ExecutionIndex + } + return 0 +} + +func (x *PutLineageSubgraphRequest_EventEdge) GetArtifactIndex() int32 { + if x != nil && x.ArtifactIndex != nil { + return *x.ArtifactIndex + } + return 0 +} + +func (x *PutLineageSubgraphRequest_EventEdge) GetEvent() *Event { + if x != nil { + return x.Event + } + return nil +} + +type PutLineageSubgraphRequest_Options struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // When there's a race to publish executions with a new context with the + // same context.name, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. If set to true, + // the API will reuse the stored context in the transaction and perform + // an update. + ReuseContextIfAlreadyExist *bool `protobuf:"varint,1,opt,name=reuse_context_if_already_exist,json=reuseContextIfAlreadyExist" json:"reuse_context_if_already_exist,omitempty"` + // When there's a race to publish executions with a new artifact with the + // same artifact.external_id, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. + // If set to true and an Artifact has non-empty external_id, + // the API will reuse the stored artifact in the transaction and + // perform an update. Otherwise, it will fall back to relying on `id` field + // to decide if it's update (if `id` exists) or insert (if `id` is empty). + ReuseArtifactIfAlreadyExistByExternalId *bool `protobuf:"varint,2,opt,name=reuse_artifact_if_already_exist_by_external_id,json=reuseArtifactIfAlreadyExistByExternalId" json:"reuse_artifact_if_already_exist_by_external_id,omitempty"` +} + +func (x *PutLineageSubgraphRequest_Options) Reset() { + *x = PutLineageSubgraphRequest_Options{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[113] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutLineageSubgraphRequest_Options) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutLineageSubgraphRequest_Options) ProtoMessage() {} + +func (x *PutLineageSubgraphRequest_Options) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[113] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutLineageSubgraphRequest_Options.ProtoReflect.Descriptor instead. +func (*PutLineageSubgraphRequest_Options) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{16, 1} +} + +func (x *PutLineageSubgraphRequest_Options) GetReuseContextIfAlreadyExist() bool { + if x != nil && x.ReuseContextIfAlreadyExist != nil { + return *x.ReuseContextIfAlreadyExist + } + return false +} + +func (x *PutLineageSubgraphRequest_Options) GetReuseArtifactIfAlreadyExistByExternalId() bool { + if x != nil && x.ReuseArtifactIfAlreadyExistByExternalId != nil { + return *x.ReuseArtifactIfAlreadyExistByExternalId + } + return false +} + +type GetParentContextsByContextsResponse_ParentContextsPerChild struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ParentContexts []*Context `protobuf:"bytes,1,rep,name=parent_contexts,json=parentContexts" json:"parent_contexts,omitempty"` +} + +func (x *GetParentContextsByContextsResponse_ParentContextsPerChild) Reset() { + *x = GetParentContextsByContextsResponse_ParentContextsPerChild{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[114] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetParentContextsByContextsResponse_ParentContextsPerChild) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetParentContextsByContextsResponse_ParentContextsPerChild) ProtoMessage() {} + +func (x *GetParentContextsByContextsResponse_ParentContextsPerChild) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[114] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetParentContextsByContextsResponse_ParentContextsPerChild.ProtoReflect.Descriptor instead. +func (*GetParentContextsByContextsResponse_ParentContextsPerChild) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{97, 0} +} + +func (x *GetParentContextsByContextsResponse_ParentContextsPerChild) GetParentContexts() []*Context { + if x != nil { + return x.ParentContexts + } + return nil +} + +type GetChildrenContextsByContextsResponse_ChildrenContextsPerParent struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ChildrenContexts []*Context `protobuf:"bytes,1,rep,name=children_contexts,json=childrenContexts" json:"children_contexts,omitempty"` +} + +func (x *GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) Reset() { + *x = GetChildrenContextsByContextsResponse_ChildrenContextsPerParent{} + if protoimpl.UnsafeEnabled { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[116] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) ProtoMessage() {} + +func (x *GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) ProtoReflect() protoreflect.Message { + mi := &file_ml_metadata_proto_metadata_store_service_proto_msgTypes[116] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetChildrenContextsByContextsResponse_ChildrenContextsPerParent.ProtoReflect.Descriptor instead. +func (*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) Descriptor() ([]byte, []int) { + return file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP(), []int{99, 0} +} + +func (x *GetChildrenContextsByContextsResponse_ChildrenContextsPerParent) GetChildrenContexts() []*Context { + if x != nil { + return x.ChildrenContexts + } + return nil +} + +var File_ml_metadata_proto_metadata_store_service_proto protoreflect.FileDescriptor + +var file_ml_metadata_proto_metadata_store_service_proto_rawDesc = []byte{ + 0x0a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x73, 0x74, 0x6f, + 0x72, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x0b, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x20, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x66, + 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, + 0x26, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x73, 0x74, 0x6f, 0x72, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x73, 0x0a, 0x0f, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, 0x54, 0x79, 0x70, 0x65, 0x12, 0x31, 0x0a, 0x08, 0x61, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x48, 0x00, 0x52, - 0x04, 0x6c, 0x69, 0x73, 0x74, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22, 0xe8, - 0x01, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x61, 0x63, 0x74, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x2d, 0x0a, + 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0xbf, 0x01, 0x0a, + 0x11, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4d, + 0x61, 0x70, 0x12, 0x4e, 0x0a, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x4d, 0x61, 0x70, 0x2e, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, + 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x70, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, + 0x65, 0x73, 0x1a, 0x5a, 0x0a, 0x0f, 0x50, 0x72, 0x6f, 0x70, 0x65, 0x72, 0x74, 0x69, 0x65, 0x73, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x4d, + 0x0a, 0x12, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x4c, 0x69, 0x73, 0x74, 0x12, 0x37, 0x0a, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, + 0x75, 0x63, 0x74, 0x52, 0x08, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xc0, 0x01, + 0x0a, 0x0e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, + 0x12, 0x3a, 0x0a, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, 0x54, 0x79, 0x70, 0x65, + 0x48, 0x00, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x32, 0x0a, 0x03, + 0x6d, 0x61, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x42, 0x0a, 0x07, 0x6f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, - 0x58, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4d, 0x0a, 0x24, 0x61, 0x62, - 0x6f, 0x72, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x75, 0x70, - 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x63, 0x68, 0x61, 0x6e, 0x67, - 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x1f, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x49, - 0x66, 0x4c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x54, 0x69, - 0x6d, 0x65, 0x43, 0x68, 0x61, 0x6e, 0x67, 0x65, 0x64, 0x22, 0x39, 0x0a, 0x14, 0x50, 0x75, 0x74, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, - 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x49, 0x64, 0x73, 0x22, 0x8a, 0x02, 0x0a, 0x16, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, - 0x3e, 0x0a, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, - 0x65, 0x52, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, - 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, 0x64, 0x64, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, 0x61, 0x6e, 0x5f, 0x6f, 0x6d, 0x69, - 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, - 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x2e, 0x0a, - 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0f, 0x63, 0x61, - 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x32, 0x0a, - 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x5f, 0x6d, 0x61, 0x74, 0x63, - 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x42, 0x02, 0x18, - 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x4d, 0x61, 0x74, 0x63, - 0x68, 0x22, 0x32, 0x0a, 0x17, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x17, 0x0a, 0x07, - 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, - 0x79, 0x70, 0x65, 0x49, 0x64, 0x22, 0x4e, 0x0a, 0x14, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x36, 0x0a, - 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x3c, 0x0a, 0x15, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x23, - 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x73, 0x18, - 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x49, 0x64, 0x73, 0x22, 0x8e, 0x02, 0x0a, 0x17, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, - 0x41, 0x0a, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, - 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, 0x5f, 0x66, 0x69, - 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, - 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, 0x61, 0x6e, 0x5f, - 0x6f, 0x6d, 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, - 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, - 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, - 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, - 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x5f, 0x6d, - 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, - 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x4d, - 0x61, 0x74, 0x63, 0x68, 0x22, 0x33, 0x0a, 0x18, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x22, 0x3e, 0x0a, 0x10, 0x50, 0x75, 0x74, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2a, 0x0a, - 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, - 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x13, 0x0a, 0x11, 0x50, 0x75, 0x74, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0xe6, - 0x03, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x34, 0x0a, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x52, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x63, 0x0a, 0x14, - 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x70, - 0x61, 0x69, 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x12, 0x61, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x50, 0x61, 0x69, 0x72, - 0x73, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x03, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x12, 0x42, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, - 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x6f, 0x0a, 0x10, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x31, 0x0a, 0x08, 0x61, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x28, - 0x0a, 0x05, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, - 0x74, 0x52, 0x05, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x1a, 0x4d, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x42, 0x0a, 0x1e, 0x72, 0x65, 0x75, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x5f, - 0x65, 0x78, 0x69, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x1a, 0x72, 0x65, 0x75, - 0x73, 0x65, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x66, 0x41, 0x6c, 0x72, 0x65, 0x61, - 0x64, 0x79, 0x45, 0x78, 0x69, 0x73, 0x74, 0x22, 0x7d, 0x0a, 0x14, 0x50, 0x75, 0x74, 0x45, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x49, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, - 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x49, 0x64, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x5f, 0x69, 0x64, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, 0x22, 0x89, 0x03, 0x0a, 0x0f, 0x50, 0x75, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x40, 0x0a, 0x0e, 0x61, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x61, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x43, 0x0a, 0x0f, - 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, - 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, - 0x65, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, - 0x73, 0x12, 0x3d, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, - 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, + 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4d, 0x61, 0x70, 0x48, 0x00, 0x52, 0x03, 0x6d, 0x61, 0x70, + 0x12, 0x35, 0x0a, 0x04, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x48, + 0x00, 0x52, 0x04, 0x6c, 0x69, 0x73, 0x74, 0x42, 0x07, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, + 0x22, 0xf7, 0x02, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x42, 0x0a, + 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x3b, 0x0a, 0x0b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x61, + 0x73, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, + 0x4d, 0x61, 0x73, 0x6b, 0x52, 0x0a, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x4d, 0x61, 0x73, 0x6b, + 0x1a, 0x58, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4d, 0x0a, 0x24, 0x61, + 0x62, 0x6f, 0x72, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x75, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x63, 0x68, 0x61, 0x6e, + 0x67, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x1f, 0x61, 0x62, 0x6f, 0x72, 0x74, + 0x49, 0x66, 0x4c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x54, + 0x69, 0x6d, 0x65, 0x43, 0x68, 0x61, 0x6e, 0x67, 0x65, 0x64, 0x22, 0x39, 0x0a, 0x14, 0x50, 0x75, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, + 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x49, 0x64, 0x73, 0x22, 0xdc, 0x02, 0x0a, 0x16, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x3e, 0x0a, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x52, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, 0x5f, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, 0x64, 0x64, + 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, 0x61, 0x6e, 0x5f, 0x6f, 0x6d, - 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, 0x69, 0x65, - 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0f, 0x63, + 0x6c, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x5f, 0x6d, 0x61, 0x74, - 0x63, 0x68, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x42, 0x02, + 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x4d, 0x61, 0x74, - 0x63, 0x68, 0x22, 0x96, 0x01, 0x0a, 0x10, 0x50, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2a, 0x0a, 0x11, 0x61, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, - 0x28, 0x03, 0x52, 0x0f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x49, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x03, 0x52, - 0x10, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x49, 0x64, - 0x73, 0x12, 0x28, 0x0a, 0x10, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, - 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0e, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x22, 0x86, 0x02, 0x0a, 0x15, - 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, + 0x63, 0x68, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x32, 0x0a, 0x17, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x22, 0xdd, 0x01, 0x0a, 0x14, 0x50, 0x75, 0x74, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, + 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3b, 0x0a, 0x0b, 0x75, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x73, 0x6b, 0x52, 0x0a, 0x75, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x4d, 0x61, 0x73, 0x6b, 0x22, 0x3c, 0x0a, 0x15, 0x50, 0x75, 0x74, 0x45, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, + 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x73, 0x22, 0xe0, 0x02, 0x0a, 0x17, 0x50, 0x75, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x41, 0x0a, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, + 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, + 0x61, 0x6e, 0x41, 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, + 0x61, 0x6e, 0x5f, 0x6f, 0x6d, 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x73, 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, + 0x18, 0x01, 0x52, 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x73, 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, + 0x73, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, + 0x72, 0x75, 0x65, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, + 0x64, 0x73, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x33, 0x0a, 0x18, 0x50, 0x75, 0x74, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x22, 0x90, + 0x01, 0x0a, 0x10, 0x50, 0x75, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x2a, 0x0a, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x12, + 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x22, 0x13, 0x0a, 0x11, 0x50, 0x75, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x9a, 0x05, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x34, + 0x0a, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x63, 0x0a, 0x14, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x5f, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x70, 0x61, 0x69, 0x72, 0x73, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x12, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x45, + 0x76, 0x65, 0x6e, 0x74, 0x50, 0x61, 0x69, 0x72, 0x73, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, 0x5f, 0x66, 0x69, - 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, - 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, 0x61, 0x6e, 0x5f, - 0x6f, 0x6d, 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, - 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, - 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, - 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, - 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x5f, 0x6d, - 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, - 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x4d, - 0x61, 0x74, 0x63, 0x68, 0x22, 0x31, 0x0a, 0x16, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x17, - 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, - 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x22, 0x46, 0x0a, 0x12, 0x50, 0x75, 0x74, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x30, 0x0a, - 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x42, 0x0a, 0x07, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x1a, 0x6f, 0x0a, 0x10, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x41, 0x6e, 0x64, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x31, 0x0a, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x08, + 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x28, 0x0a, 0x05, 0x65, 0x76, 0x65, 0x6e, + 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x05, 0x65, 0x76, 0x65, + 0x6e, 0x74, 0x1a, 0xae, 0x01, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x42, + 0x0a, 0x1e, 0x72, 0x65, 0x75, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, + 0x69, 0x66, 0x5f, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x5f, 0x65, 0x78, 0x69, 0x73, 0x74, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x1a, 0x72, 0x65, 0x75, 0x73, 0x65, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x49, 0x66, 0x41, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x45, 0x78, 0x69, + 0x73, 0x74, 0x12, 0x5f, 0x0a, 0x2e, 0x72, 0x65, 0x75, 0x73, 0x65, 0x5f, 0x61, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x5f, + 0x65, 0x78, 0x69, 0x73, 0x74, 0x5f, 0x62, 0x79, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x27, 0x72, 0x65, 0x75, 0x73, + 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x66, 0x41, 0x6c, 0x72, 0x65, 0x61, + 0x64, 0x79, 0x45, 0x78, 0x69, 0x73, 0x74, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x49, 0x64, 0x22, 0x7d, 0x0a, 0x14, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x65, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, 0x21, + 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, + 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x73, + 0x18, 0x03, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, + 0x64, 0x73, 0x22, 0xe2, 0x05, 0x0a, 0x19, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, + 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x30, 0x0a, + 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, - 0x36, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x25, 0x50, 0x75, 0x74, 0x41, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, + 0x51, 0x0a, 0x0b, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x65, 0x64, 0x67, 0x65, 0x73, 0x18, 0x04, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, + 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x45, 0x64, 0x67, 0x65, 0x52, 0x0a, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x45, 0x64, 0x67, + 0x65, 0x73, 0x12, 0x48, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, + 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, + 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x85, + 0x01, 0x0a, 0x09, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x45, 0x64, 0x67, 0x65, 0x12, 0x27, 0x0a, 0x0f, + 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x25, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0d, 0x61, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x28, 0x0a, 0x05, + 0x65, 0x76, 0x65, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, + 0x05, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x1a, 0xae, 0x01, 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x42, 0x0a, 0x1e, 0x72, 0x65, 0x75, 0x73, 0x65, 0x5f, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x61, 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x5f, 0x65, + 0x78, 0x69, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x1a, 0x72, 0x65, 0x75, 0x73, + 0x65, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x66, 0x41, 0x6c, 0x72, 0x65, 0x61, 0x64, + 0x79, 0x45, 0x78, 0x69, 0x73, 0x74, 0x12, 0x5f, 0x0a, 0x2e, 0x72, 0x65, 0x75, 0x73, 0x65, 0x5f, + 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x66, 0x5f, 0x61, 0x6c, 0x72, 0x65, + 0x61, 0x64, 0x79, 0x5f, 0x65, 0x78, 0x69, 0x73, 0x74, 0x5f, 0x62, 0x79, 0x5f, 0x65, 0x78, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x27, + 0x72, 0x65, 0x75, 0x73, 0x65, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x66, 0x41, + 0x6c, 0x72, 0x65, 0x61, 0x64, 0x79, 0x45, 0x78, 0x69, 0x73, 0x74, 0x42, 0x79, 0x45, 0x78, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x22, 0x91, 0x01, 0x0a, 0x1a, 0x50, 0x75, 0x74, 0x4c, + 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x27, 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x42, 0x02, 0x10, + 0x01, 0x52, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x73, 0x12, + 0x25, 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, + 0x02, 0x20, 0x03, 0x28, 0x03, 0x42, 0x02, 0x10, 0x01, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x49, 0x64, 0x73, 0x12, 0x23, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x03, 0x42, 0x02, 0x10, 0x01, 0x52, + 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, 0x22, 0xdb, 0x03, 0x0a, 0x0f, + 0x50, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, + 0x73, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x3d, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, 0x5f, 0x61, 0x64, 0x64, + 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x63, + 0x61, 0x6e, 0x41, 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x63, + 0x61, 0x6e, 0x5f, 0x6f, 0x6d, 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x07, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, 0x74, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x73, 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, + 0x18, 0x01, 0x52, 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x73, 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, + 0x73, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x04, 0x74, + 0x72, 0x75, 0x65, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, 0x46, 0x69, 0x65, 0x6c, + 0x64, 0x73, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x96, 0x01, 0x0a, 0x10, 0x50, 0x75, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2a, + 0x0a, 0x11, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, + 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0f, 0x61, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x65, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x03, 0x52, 0x10, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x28, 0x0a, 0x10, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x03, 0x20, 0x03, + 0x28, 0x03, 0x52, 0x0e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x49, + 0x64, 0x73, 0x22, 0xd8, 0x02, 0x0a, 0x15, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x0c, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x0a, 0x0e, 0x63, 0x61, 0x6e, + 0x5f, 0x61, 0x64, 0x64, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x0c, 0x63, 0x61, 0x6e, 0x41, 0x64, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, + 0x26, 0x0a, 0x0f, 0x63, 0x61, 0x6e, 0x5f, 0x6f, 0x6d, 0x69, 0x74, 0x5f, 0x66, 0x69, 0x65, 0x6c, + 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x63, 0x61, 0x6e, 0x4f, 0x6d, 0x69, + 0x74, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x2e, 0x0a, 0x11, 0x63, 0x61, 0x6e, 0x5f, 0x64, + 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0f, 0x63, 0x61, 0x6e, 0x44, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x32, 0x0a, 0x10, 0x61, 0x6c, 0x6c, 0x5f, 0x66, + 0x69, 0x65, 0x6c, 0x64, 0x73, 0x5f, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x08, 0x3a, 0x04, 0x74, 0x72, 0x75, 0x65, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0e, 0x61, 0x6c, 0x6c, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x50, 0x0a, 0x13, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x31, 0x0a, + 0x16, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x79, 0x70, 0x65, 0x5f, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, + 0x22, 0xd5, 0x01, 0x0a, 0x12, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, + 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, + 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3b, 0x0a, 0x0b, 0x75, + 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x73, 0x6b, 0x52, 0x0a, 0x75, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x4d, 0x61, 0x73, 0x6b, 0x22, 0x36, 0x0a, 0x13, 0x50, 0x75, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, + 0x22, 0xf5, 0x01, 0x0a, 0x25, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x74, + 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, + 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, 0x74, 0x74, 0x72, + 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x73, 0x73, 0x6f, + 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x73, 0x73, + 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x61, 0x73, 0x73, 0x6f, 0x63, 0x69, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x28, 0x0a, 0x26, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, - 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x52, 0x0c, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, - 0x3c, 0x0a, 0x0c, 0x61, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, - 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, - 0x0c, 0x61, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x28, 0x0a, - 0x26, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x5f, 0x0a, 0x18, 0x50, 0x75, 0x74, 0x50, 0x61, - 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x43, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x61, 0x72, 0x65, 0x6e, - 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x0e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, - 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x1b, 0x0a, 0x19, 0x50, 0x75, 0x74, 0x50, - 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0xea, 0x01, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, - 0x69, 0x6f, 0x6e, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x22, 0x79, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, - 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, - 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xd8, 0x01, - 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, - 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0xb1, 0x01, 0x0a, 0x18, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x43, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x52, 0x0e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x1b, 0x0a, 0x19, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0xea, 0x01, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, - 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, + 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x12, 0x23, 0x0a, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, - 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x55, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, - 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x31, 0x0a, 0x08, - 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x22, - 0x8e, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, - 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x73, 0x12, 0x50, + 0x6e, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x22, 0x4f, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, - 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x09, - 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x22, 0x79, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, + 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, + 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, 0x67, 0x65, + 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6e, 0x65, + 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xd8, 0x01, 0x0a, 0x1f, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, + 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, + 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, + 0x23, 0x0a, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x55, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, + 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x31, 0x0a, 0x08, 0x61, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x52, 0x08, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x22, 0xcd, 0x01, + 0x0a, 0x17, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, + 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, + 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x73, 0x12, 0x3d, 0x0a, 0x17, + 0x70, 0x6f, 0x70, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x5f, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x52, 0x15, 0x70, 0x6f, 0x70, 0x75, 0x6c, 0x61, 0x74, 0x65, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x91, 0x01, + 0x0a, 0x18, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, + 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, + 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, + 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0xa4, 0x01, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, @@ -5664,638 +7487,903 @@ var file_ml_metadata_proto_metadata_store_service_proto_rawDesc = []byte{ 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, - 0x65, 0x73, 0x22, 0xeb, 0x01, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, - 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, - 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, - 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, - 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x22, 0x7d, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, - 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, - 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, - 0xdb, 0x01, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, - 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, - 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x65, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x74, - 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, - 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, - 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x59, 0x0a, - 0x21, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, - 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x12, 0x34, 0x0a, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x65, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x91, 0x01, 0x0a, 0x18, 0x47, 0x65, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0c, 0x65, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, + 0x65, 0x73, 0x22, 0x97, 0x01, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x65, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, - 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x53, 0x0a, 0x19, - 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, - 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, - 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x22, 0xab, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, - 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, - 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, - 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, - 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, - 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, - 0x5d, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x41, 0x0a, 0x0e, 0x65, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x97, - 0x01, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, - 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x58, 0x0a, 0x21, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, + 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x22, 0x98, 0x01, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x09, 0x52, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, + 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x22, 0x5c, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, + 0x96, 0x01, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, + 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, + 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x4d, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2a, 0x0a, 0x06, 0x65, - 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, - 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x94, 0x01, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, - 0x44, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, - 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, - 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x4c, - 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x2a, 0x0a, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x8a, 0x01, 0x0a, - 0x1b, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x07, - 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, - 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, - 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x60, 0x0a, 0x1c, 0x47, 0x65, 0x74, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, - 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0x8b, 0x01, 0x0a, 0x1c, - 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, - 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x07, - 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, - 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, - 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x64, 0x0a, 0x1d, 0x47, 0x65, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, - 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, - 0xa9, 0x01, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, - 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, - 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, - 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, - 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, - 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x55, 0x0a, 0x16, 0x47, - 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3b, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x22, 0x89, 0x01, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x03, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, - 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x5c, - 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, - 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0xa3, 0x01, 0x0a, - 0x12, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x54, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x9b, + 0x01, 0x0a, 0x24, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x65, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x69, 0x0a, 0x25, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, + 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0x9c, 0x01, 0x0a, 0x25, 0x47, 0x65, 0x74, 0x45, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x6d, 0x0a, 0x26, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0x9a, 0x01, 0x0a, 0x23, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, + 0x0c, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x0b, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x22, 0x6f, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, - 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, - 0x6b, 0x65, 0x6e, 0x22, 0xe9, 0x01, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x3b, 0x0a, - 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, - 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, - 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, - 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, - 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, - 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, - 0x75, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x26, + 0x6e, 0x73, 0x22, 0x65, 0x0a, 0x24, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, + 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x0d, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0xeb, 0x01, 0x0a, 0x1a, 0x47, 0x65, + 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, + 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, + 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, + 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, + 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x7d, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, - 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xd5, 0x01, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, - 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, - 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, - 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, - 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, - 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x51, - 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, - 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x2e, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x22, 0x8b, 0x01, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, + 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xdb, 0x01, 0x0a, 0x20, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, + 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, + 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, + 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, + 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, + 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x65, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x4e, 0x61, + 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x59, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, + 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x34, 0x0a, 0x09, 0x65, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, + 0x91, 0x01, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, + 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x03, 0x52, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, + 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, + 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x22, 0x53, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xab, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, + 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x5d, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x41, 0x0a, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0d, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x97, 0x01, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x65, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, + 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, - 0x4b, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, - 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x91, 0x01, 0x0a, - 0x1c, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, - 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x03, 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x64, 0x12, 0x50, + 0x4d, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x2a, 0x0a, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x94, + 0x01, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x69, 0x64, 0x73, + 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x4c, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, + 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2a, 0x0a, 0x06, 0x65, 0x76, 0x65, 0x6e, 0x74, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x06, 0x65, 0x76, 0x65, + 0x6e, 0x74, 0x73, 0x22, 0x8a, 0x01, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x50, + 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x22, 0x60, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x40, 0x0a, 0x0e, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x5f, 0x74, 0x79, 0x70, + 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x0d, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x73, 0x22, 0x8b, 0x01, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x69, 0x64, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x22, 0x51, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, - 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x22, 0x94, 0x01, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, - 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, - 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x52, 0x0a, 0x1e, 0x47, 0x65, - 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x94, - 0x01, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, - 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x22, 0x64, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0e, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x22, 0xa9, 0x01, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, + 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x22, 0x55, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3b, 0x0a, 0x0c, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0b, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x22, 0x89, 0x01, 0x0a, 0x1a, 0x47, 0x65, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, + 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, + 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, + 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x56, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, - 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x96, 0x01, - 0x0a, 0x23, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x5c, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x3d, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, + 0x74, 0x79, 0x70, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x73, 0x22, 0xa3, 0x01, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, + 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x6f, 0x0a, 0x13, 0x47, 0x65, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, + 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, + 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xe9, 0x01, 0x0a, 0x18, 0x47, + 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, + 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x75, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, + 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, + 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xd5, 0x01, + 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, + 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, + 0x0c, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x74, 0x79, 0x70, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x4e, + 0x61, 0x6d, 0x65, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x51, 0x0a, 0x1f, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2e, 0x0a, 0x07, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, + 0x07, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x22, 0x8b, 0x01, 0x0a, 0x16, 0x47, 0x65, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, + 0x64, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x58, 0x0a, 0x24, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, - 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, - 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, - 0x22, 0xcc, 0x01, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, - 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x0b, 0x32, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x4c, 0x69, 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x4b, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x22, 0x91, 0x01, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x61, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x51, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x94, 0x01, 0x0a, 0x1d, 0x47, + 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x21, 0x0a, 0x0c, + 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x0b, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x64, 0x12, + 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x22, 0x52, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, + 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x94, 0x01, 0x0a, 0x21, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, 0x63, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x56, 0x0a, 0x22, + 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x22, 0x96, 0x01, 0x0a, 0x23, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, + 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x12, 0x50, 0x0a, 0x13, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x58, 0x0a, + 0x24, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x30, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x08, 0x63, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x22, 0x9b, 0x01, 0x0a, 0x22, 0x47, 0x65, 0x74, 0x50, + 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x23, + 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x03, 0x42, 0x02, 0x10, 0x01, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, + 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xe1, 0x02, 0x0a, 0x23, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x5a, 0x0a, + 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x3e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, + 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, + 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x1a, 0x57, 0x0a, 0x16, 0x50, 0x61, 0x72, + 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x50, 0x65, 0x72, 0x43, 0x68, + 0x69, 0x6c, 0x64, 0x12, 0x3d, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x52, 0x0e, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x73, 0x1a, 0x84, 0x01, 0x0a, 0x0d, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x5d, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x47, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x50, 0x65, 0x72, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x9d, 0x01, 0x0a, 0x24, 0x47, 0x65, + 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x23, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x03, 0x42, 0x02, 0x10, 0x01, 0x52, 0x0a, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, + 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xf1, 0x02, 0x0a, 0x25, 0x47, 0x65, + 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x5c, 0x0a, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, + 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x40, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x1a, 0x5e, 0x0a, 0x19, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x50, 0x65, 0x72, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x41, + 0x0a, 0x11, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, + 0x10, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x1a, 0x89, 0x01, 0x0a, 0x0d, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x62, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x4c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, + 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x50, 0x65, 0x72, 0x50, 0x61, 0x72, 0x65, + 0x6e, 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xcc, 0x01, + 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, + 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x12, 0x3b, 0x0a, + 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, + 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x7c, 0x0a, 0x1d, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x33, 0x0a, + 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x41, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, + 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, + 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xcd, 0x01, 0x0a, 0x1d, 0x47, + 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, 0x0a, 0x0a, + 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x12, 0x3b, 0x0a, 0x07, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x73, 0x74, 0x4f, + 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, + 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, + 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xd2, 0x01, 0x0a, 0x1e, 0x47, + 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x36, 0x0a, + 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, + 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, + 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, - 0x7c, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, - 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x33, 0x0a, 0x09, 0x61, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x09, 0x61, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x70, 0x61, - 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, - 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x22, 0xcd, 0x01, - 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, - 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, - 0x1d, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x49, 0x64, 0x12, 0x3b, - 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, - 0x73, 0x74, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0xab, 0x01, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, + 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3f, 0x0a, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, + 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, - 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xd2, 0x01, - 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, - 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x36, 0x0a, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x65, 0x78, - 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x26, 0x0a, 0x0f, 0x6e, 0x65, 0x78, 0x74, - 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0d, 0x6e, 0x65, 0x78, 0x74, 0x50, 0x61, 0x67, 0x65, 0x54, 0x6f, 0x6b, 0x65, 0x6e, - 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, - 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x22, 0xab, 0x01, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, - 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x3f, 0x0a, - 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, - 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x50, - 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, - 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, - 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x22, 0x50, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, - 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x35, 0x0a, 0x08, 0x73, - 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, - 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x08, 0x73, 0x75, 0x62, 0x67, 0x72, 0x61, - 0x70, 0x68, 0x32, 0x99, 0x22, 0x0a, 0x14, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, - 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x5e, 0x0a, 0x0f, 0x50, - 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x23, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, - 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x50, - 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, - 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, - 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5b, - 0x0a, 0x0e, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, - 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, - 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x49, 0x0a, 0x08, 0x50, - 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x55, 0x0a, 0x0c, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x20, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x58, 0x0a, - 0x0d, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x21, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x4c, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x73, 0x12, 0x1d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x55, 0x0a, 0x0c, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x20, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x52, 0x0a, 0x0b, - 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, - 0x12, 0x8b, 0x01, 0x0a, 0x1e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x33, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, - 0x0a, 0x11, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, - 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x50, 0x0a, + 0x17, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x35, 0x0a, 0x08, 0x73, 0x75, 0x62, 0x67, + 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, + 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x08, 0x73, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x22, + 0x95, 0x02, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, + 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x6d, 0x0a, + 0x1e, 0x6c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, + 0x68, 0x5f, 0x71, 0x75, 0x65, 0x72, 0x79, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, + 0x61, 0x70, 0x68, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x1b, 0x6c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, + 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x09, + 0x72, 0x65, 0x61, 0x64, 0x5f, 0x6d, 0x61, 0x73, 0x6b, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x73, 0x6b, 0x52, 0x08, 0x72, 0x65, 0x61, + 0x64, 0x4d, 0x61, 0x73, 0x6b, 0x12, 0x50, 0x0a, 0x13, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x12, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x62, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x4c, 0x69, + 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x44, 0x0a, 0x10, 0x6c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, + 0x5f, 0x73, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x19, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x4c, 0x69, + 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x0f, 0x6c, 0x69, 0x6e, 0x65, + 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x32, 0x96, 0x2c, 0x0a, 0x14, + 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x12, 0x5e, 0x0a, 0x0f, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0x00, 0x12, 0x6d, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x28, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, - 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, - 0x49, 0x44, 0x12, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5b, 0x0a, 0x0e, 0x50, 0x75, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x49, 0x0a, 0x08, 0x50, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, + 0x12, 0x1c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, + 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1d, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, + 0x55, 0x0a, 0x0c, 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, + 0x20, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x50, 0x75, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x58, 0x0a, 0x0d, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x4c, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x1d, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, + 0x76, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1e, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x45, 0x76, + 0x65, 0x6e, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x55, + 0x0a, 0x0c, 0x50, 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x20, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, + 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, + 0x75, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x67, 0x0a, 0x12, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, 0x65, + 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x12, 0x26, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, + 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x50, 0x75, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, + 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x52, + 0x0a, 0x0b, 0x50, 0x75, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x1f, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, + 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x00, 0x12, 0x8b, 0x01, 0x0a, 0x1e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x32, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x33, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x41, 0x74, 0x74, 0x72, 0x69, + 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x41, 0x6e, 0x64, 0x41, 0x73, 0x73, 0x6f, 0x63, 0x69, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x64, 0x0a, 0x11, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x50, 0x75, 0x74, 0x50, 0x61, + 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x6d, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x28, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, + 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x45, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x24, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, - 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, - 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, 0x11, 0x47, + 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, - 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x5b, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x6a, - 0x0a, 0x13, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, - 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x42, 0x79, 0x49, 0x44, 0x12, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, + 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, + 0x11, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, + 0x65, 0x73, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, + 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x5b, 0x0a, 0x0e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, + 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x6a, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, + 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, + 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, + 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, + 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, + 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x55, 0x0a, 0x0c, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x20, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, + 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, - 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x49, 0x44, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x58, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x52, 0x0a, + 0x0b, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, + 0x00, 0x12, 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, + 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, + 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, 0x47, 0x65, - 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x23, 0x2e, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x55, 0x0a, 0x0c, 0x47, 0x65, - 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x12, 0x20, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x21, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x58, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x12, 0x21, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x52, 0x0a, 0x0b, 0x47, - 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x6d, 0x6c, 0x5f, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x20, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, - 0x61, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, - 0x79, 0x49, 0x44, 0x12, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, - 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x22, 0x00, 0x12, 0x64, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x43, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x12, 0x23, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x67, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x26, - 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, - 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x6a, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, - 0x74, 0x1a, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, - 0x11, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, - 0x70, 0x65, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x22, 0x00, 0x12, 0x79, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, - 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x12, - 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, - 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, - 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2d, 0x2e, - 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, - 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, - 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x7c, - 0x0a, 0x19, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, - 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2d, 0x2e, 0x6d, 0x6c, - 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, - 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, - 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, + 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x49, 0x44, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x67, 0x0a, 0x12, 0x47, 0x65, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, + 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, + 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x6a, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, - 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x76, 0x0a, 0x17, - 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, - 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, - 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, 0x52, 0x49, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, - 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, 0x52, 0x49, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, 0x52, 0x49, - 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x76, 0x0a, 0x17, 0x47, 0x65, - 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, - 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x12, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, - 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, - 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x12, 0x2a, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, - 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, - 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, - 0x12, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, - 0x66, 0x61, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x6d, 0x6c, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, + 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, + 0x64, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, - 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, - 0x69, 0x6f, 0x6e, 0x12, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, - 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, - 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, - 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x7f, - 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x2e, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, - 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2f, 0x2e, 0x6d, - 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, - 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, - 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, - 0x85, 0x01, 0x0a, 0x1c, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, - 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x12, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, - 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x1a, 0x31, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x41, 0x72, - 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, - 0x12, 0x29, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, - 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x6d, 0x6c, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x79, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, + 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, + 0x65, 0x12, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, + 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x2d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, + 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x12, 0x7c, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2d, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, + 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, + 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2e, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, + 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x76, + 0x0a, 0x17, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, + 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x42, 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x42, + 0x79, 0x54, 0x79, 0x70, 0x65, 0x41, 0x6e, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x64, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, 0x52, 0x49, 0x12, 0x25, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, - 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, - 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, - 0x65, 0x78, 0x74, 0x12, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, + 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, 0x52, 0x49, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x55, + 0x52, 0x49, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x76, 0x0a, 0x17, + 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, + 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x12, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, + 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x44, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, + 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x12, 0x2a, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x49, 0x44, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x76, 0x65, 0x6e, + 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x49, 0x44, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x7c, 0x0a, 0x19, 0x47, 0x65, 0x74, + 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, + 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x2d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, + 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, + 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x7f, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x79, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x49, 0x64, 0x73, 0x12, 0x2c, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, + 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x2d, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, + 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x12, 0x88, 0x01, 0x0a, 0x1d, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x31, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x32, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, + 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x8b, + 0x01, 0x0a, 0x1e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, + 0x73, 0x12, 0x32, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, + 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x33, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, + 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x85, 0x01, 0x0a, + 0x1c, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, + 0x42, 0x79, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x12, 0x30, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, 0x78, 0x74, + 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, + 0x31, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x54, 0x79, 0x70, 0x65, 0x73, 0x42, 0x79, 0x45, + 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x49, 0x64, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x12, 0x29, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, + 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x73, 0x42, 0x79, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, + 0x12, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, + 0x65, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, + 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, + 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x7f, 0x0a, 0x1a, 0x47, + 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, + 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x2e, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, + 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, + 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x85, 0x01, 0x0a, + 0x1c, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x30, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, + 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x2b, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, - 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, - 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, - 0x0a, 0x0f, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, - 0x68, 0x12, 0x23, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, - 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, - 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, + 0x31, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, + 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x82, 0x01, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, + 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x12, 0x2f, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x74, + 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x30, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, + 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x88, 0x01, 0x0a, 0x1d, 0x47, 0x65, + 0x74, 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x12, 0x31, 0x2e, 0x6d, 0x6c, + 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x43, 0x68, 0x69, + 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x42, 0x79, 0x43, + 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x32, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, + 0x43, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, + 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, 0x12, 0x70, 0x0a, 0x15, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, + 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x12, 0x29, 0x2e, + 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, + 0x72, 0x74, 0x69, 0x66, 0x61, 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x41, 0x72, 0x74, 0x69, 0x66, 0x61, + 0x63, 0x74, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x73, 0x0a, 0x16, 0x47, 0x65, 0x74, 0x45, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, + 0x12, 0x2a, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, + 0x65, 0x74, 0x45, 0x78, 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, + 0x6e, 0x74, 0x65, 0x78, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2b, 0x2e, 0x6d, + 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x45, 0x78, + 0x65, 0x63, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x79, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x5e, 0x0a, 0x0f, 0x47, + 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x12, 0x23, + 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, + 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, + 0x65, 0x73, 0x74, 0x1a, 0x24, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x47, 0x72, 0x61, 0x70, + 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x67, 0x0a, 0x12, 0x47, + 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, + 0x68, 0x12, 0x26, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, + 0x70, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x27, 0x2e, 0x6d, 0x6c, 0x5f, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x47, 0x65, 0x74, 0x4c, 0x69, 0x6e, 0x65, 0x61, + 0x67, 0x65, 0x53, 0x75, 0x62, 0x67, 0x72, 0x61, 0x70, 0x68, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, + 0x73, 0x65, 0x22, 0x00, } var ( @@ -6310,305 +8398,399 @@ func file_ml_metadata_proto_metadata_store_service_proto_rawDescGZIP() []byte { return file_ml_metadata_proto_metadata_store_service_proto_rawDescData } -var file_ml_metadata_proto_metadata_store_service_proto_msgTypes = make([]protoimpl.MessageInfo, 92) +var file_ml_metadata_proto_metadata_store_service_proto_msgTypes = make([]protoimpl.MessageInfo, 118) var file_ml_metadata_proto_metadata_store_service_proto_goTypes = []interface{}{ - (*ArtifactAndType)(nil), // 0: ml_metadata.ArtifactAndType - (*ArtifactStructMap)(nil), // 1: ml_metadata.ArtifactStructMap - (*ArtifactStructList)(nil), // 2: ml_metadata.ArtifactStructList - (*ArtifactStruct)(nil), // 3: ml_metadata.ArtifactStruct - (*PutArtifactsRequest)(nil), // 4: ml_metadata.PutArtifactsRequest - (*PutArtifactsResponse)(nil), // 5: ml_metadata.PutArtifactsResponse - (*PutArtifactTypeRequest)(nil), // 6: ml_metadata.PutArtifactTypeRequest - (*PutArtifactTypeResponse)(nil), // 7: ml_metadata.PutArtifactTypeResponse - (*PutExecutionsRequest)(nil), // 8: ml_metadata.PutExecutionsRequest - (*PutExecutionsResponse)(nil), // 9: ml_metadata.PutExecutionsResponse - (*PutExecutionTypeRequest)(nil), // 10: ml_metadata.PutExecutionTypeRequest - (*PutExecutionTypeResponse)(nil), // 11: ml_metadata.PutExecutionTypeResponse - (*PutEventsRequest)(nil), // 12: ml_metadata.PutEventsRequest - (*PutEventsResponse)(nil), // 13: ml_metadata.PutEventsResponse - (*PutExecutionRequest)(nil), // 14: ml_metadata.PutExecutionRequest - (*PutExecutionResponse)(nil), // 15: ml_metadata.PutExecutionResponse - (*PutTypesRequest)(nil), // 16: ml_metadata.PutTypesRequest - (*PutTypesResponse)(nil), // 17: ml_metadata.PutTypesResponse - (*PutContextTypeRequest)(nil), // 18: ml_metadata.PutContextTypeRequest - (*PutContextTypeResponse)(nil), // 19: ml_metadata.PutContextTypeResponse - (*PutContextsRequest)(nil), // 20: ml_metadata.PutContextsRequest - (*PutContextsResponse)(nil), // 21: ml_metadata.PutContextsResponse - (*PutAttributionsAndAssociationsRequest)(nil), // 22: ml_metadata.PutAttributionsAndAssociationsRequest - (*PutAttributionsAndAssociationsResponse)(nil), // 23: ml_metadata.PutAttributionsAndAssociationsResponse - (*PutParentContextsRequest)(nil), // 24: ml_metadata.PutParentContextsRequest - (*PutParentContextsResponse)(nil), // 25: ml_metadata.PutParentContextsResponse - (*GetArtifactsByTypeRequest)(nil), // 26: ml_metadata.GetArtifactsByTypeRequest - (*GetArtifactsByTypeResponse)(nil), // 27: ml_metadata.GetArtifactsByTypeResponse - (*GetArtifactByTypeAndNameRequest)(nil), // 28: ml_metadata.GetArtifactByTypeAndNameRequest - (*GetArtifactByTypeAndNameResponse)(nil), // 29: ml_metadata.GetArtifactByTypeAndNameResponse - (*GetArtifactsByIDRequest)(nil), // 30: ml_metadata.GetArtifactsByIDRequest - (*GetArtifactsByIDResponse)(nil), // 31: ml_metadata.GetArtifactsByIDResponse - (*GetArtifactsRequest)(nil), // 32: ml_metadata.GetArtifactsRequest - (*GetArtifactsResponse)(nil), // 33: ml_metadata.GetArtifactsResponse - (*GetArtifactsByURIRequest)(nil), // 34: ml_metadata.GetArtifactsByURIRequest - (*GetArtifactsByURIResponse)(nil), // 35: ml_metadata.GetArtifactsByURIResponse - (*GetExecutionsRequest)(nil), // 36: ml_metadata.GetExecutionsRequest - (*GetExecutionsResponse)(nil), // 37: ml_metadata.GetExecutionsResponse - (*GetArtifactTypeRequest)(nil), // 38: ml_metadata.GetArtifactTypeRequest - (*GetArtifactTypeResponse)(nil), // 39: ml_metadata.GetArtifactTypeResponse - (*GetArtifactTypesRequest)(nil), // 40: ml_metadata.GetArtifactTypesRequest - (*GetArtifactTypesResponse)(nil), // 41: ml_metadata.GetArtifactTypesResponse - (*GetExecutionTypesRequest)(nil), // 42: ml_metadata.GetExecutionTypesRequest - (*GetExecutionTypesResponse)(nil), // 43: ml_metadata.GetExecutionTypesResponse - (*GetContextTypesRequest)(nil), // 44: ml_metadata.GetContextTypesRequest - (*GetContextTypesResponse)(nil), // 45: ml_metadata.GetContextTypesResponse - (*GetExecutionsByTypeRequest)(nil), // 46: ml_metadata.GetExecutionsByTypeRequest - (*GetExecutionsByTypeResponse)(nil), // 47: ml_metadata.GetExecutionsByTypeResponse - (*GetExecutionByTypeAndNameRequest)(nil), // 48: ml_metadata.GetExecutionByTypeAndNameRequest - (*GetExecutionByTypeAndNameResponse)(nil), // 49: ml_metadata.GetExecutionByTypeAndNameResponse - (*GetExecutionsByIDRequest)(nil), // 50: ml_metadata.GetExecutionsByIDRequest - (*GetExecutionsByIDResponse)(nil), // 51: ml_metadata.GetExecutionsByIDResponse - (*GetExecutionTypeRequest)(nil), // 52: ml_metadata.GetExecutionTypeRequest - (*GetExecutionTypeResponse)(nil), // 53: ml_metadata.GetExecutionTypeResponse - (*GetEventsByExecutionIDsRequest)(nil), // 54: ml_metadata.GetEventsByExecutionIDsRequest - (*GetEventsByExecutionIDsResponse)(nil), // 55: ml_metadata.GetEventsByExecutionIDsResponse - (*GetEventsByArtifactIDsRequest)(nil), // 56: ml_metadata.GetEventsByArtifactIDsRequest - (*GetEventsByArtifactIDsResponse)(nil), // 57: ml_metadata.GetEventsByArtifactIDsResponse - (*GetArtifactTypesByIDRequest)(nil), // 58: ml_metadata.GetArtifactTypesByIDRequest - (*GetArtifactTypesByIDResponse)(nil), // 59: ml_metadata.GetArtifactTypesByIDResponse - (*GetExecutionTypesByIDRequest)(nil), // 60: ml_metadata.GetExecutionTypesByIDRequest - (*GetExecutionTypesByIDResponse)(nil), // 61: ml_metadata.GetExecutionTypesByIDResponse - (*GetContextTypeRequest)(nil), // 62: ml_metadata.GetContextTypeRequest - (*GetContextTypeResponse)(nil), // 63: ml_metadata.GetContextTypeResponse - (*GetContextTypesByIDRequest)(nil), // 64: ml_metadata.GetContextTypesByIDRequest - (*GetContextTypesByIDResponse)(nil), // 65: ml_metadata.GetContextTypesByIDResponse - (*GetContextsRequest)(nil), // 66: ml_metadata.GetContextsRequest - (*GetContextsResponse)(nil), // 67: ml_metadata.GetContextsResponse - (*GetContextsByTypeRequest)(nil), // 68: ml_metadata.GetContextsByTypeRequest - (*GetContextsByTypeResponse)(nil), // 69: ml_metadata.GetContextsByTypeResponse - (*GetContextByTypeAndNameRequest)(nil), // 70: ml_metadata.GetContextByTypeAndNameRequest - (*GetContextByTypeAndNameResponse)(nil), // 71: ml_metadata.GetContextByTypeAndNameResponse - (*GetContextsByIDRequest)(nil), // 72: ml_metadata.GetContextsByIDRequest - (*GetContextsByIDResponse)(nil), // 73: ml_metadata.GetContextsByIDResponse - (*GetContextsByArtifactRequest)(nil), // 74: ml_metadata.GetContextsByArtifactRequest - (*GetContextsByArtifactResponse)(nil), // 75: ml_metadata.GetContextsByArtifactResponse - (*GetContextsByExecutionRequest)(nil), // 76: ml_metadata.GetContextsByExecutionRequest - (*GetContextsByExecutionResponse)(nil), // 77: ml_metadata.GetContextsByExecutionResponse - (*GetParentContextsByContextRequest)(nil), // 78: ml_metadata.GetParentContextsByContextRequest - (*GetParentContextsByContextResponse)(nil), // 79: ml_metadata.GetParentContextsByContextResponse - (*GetChildrenContextsByContextRequest)(nil), // 80: ml_metadata.GetChildrenContextsByContextRequest - (*GetChildrenContextsByContextResponse)(nil), // 81: ml_metadata.GetChildrenContextsByContextResponse - (*GetArtifactsByContextRequest)(nil), // 82: ml_metadata.GetArtifactsByContextRequest - (*GetArtifactsByContextResponse)(nil), // 83: ml_metadata.GetArtifactsByContextResponse - (*GetExecutionsByContextRequest)(nil), // 84: ml_metadata.GetExecutionsByContextRequest - (*GetExecutionsByContextResponse)(nil), // 85: ml_metadata.GetExecutionsByContextResponse - (*GetLineageGraphRequest)(nil), // 86: ml_metadata.GetLineageGraphRequest - (*GetLineageGraphResponse)(nil), // 87: ml_metadata.GetLineageGraphResponse - nil, // 88: ml_metadata.ArtifactStructMap.PropertiesEntry - (*PutArtifactsRequest_Options)(nil), // 89: ml_metadata.PutArtifactsRequest.Options - (*PutExecutionRequest_ArtifactAndEvent)(nil), // 90: ml_metadata.PutExecutionRequest.ArtifactAndEvent - (*PutExecutionRequest_Options)(nil), // 91: ml_metadata.PutExecutionRequest.Options - (*Artifact)(nil), // 92: ml_metadata.Artifact - (*ArtifactType)(nil), // 93: ml_metadata.ArtifactType - (*Execution)(nil), // 94: ml_metadata.Execution - (*ExecutionType)(nil), // 95: ml_metadata.ExecutionType - (*Event)(nil), // 96: ml_metadata.Event - (*Context)(nil), // 97: ml_metadata.Context - (*ContextType)(nil), // 98: ml_metadata.ContextType - (*Attribution)(nil), // 99: ml_metadata.Attribution - (*Association)(nil), // 100: ml_metadata.Association - (*ParentContext)(nil), // 101: ml_metadata.ParentContext - (*ListOperationOptions)(nil), // 102: ml_metadata.ListOperationOptions - (*TransactionOptions)(nil), // 103: ml_metadata.TransactionOptions - (*LineageGraphQueryOptions)(nil), // 104: ml_metadata.LineageGraphQueryOptions - (*LineageGraph)(nil), // 105: ml_metadata.LineageGraph + (*ArtifactAndType)(nil), // 0: ml_metadata.ArtifactAndType + (*ArtifactStructMap)(nil), // 1: ml_metadata.ArtifactStructMap + (*ArtifactStructList)(nil), // 2: ml_metadata.ArtifactStructList + (*ArtifactStruct)(nil), // 3: ml_metadata.ArtifactStruct + (*PutArtifactsRequest)(nil), // 4: ml_metadata.PutArtifactsRequest + (*PutArtifactsResponse)(nil), // 5: ml_metadata.PutArtifactsResponse + (*PutArtifactTypeRequest)(nil), // 6: ml_metadata.PutArtifactTypeRequest + (*PutArtifactTypeResponse)(nil), // 7: ml_metadata.PutArtifactTypeResponse + (*PutExecutionsRequest)(nil), // 8: ml_metadata.PutExecutionsRequest + (*PutExecutionsResponse)(nil), // 9: ml_metadata.PutExecutionsResponse + (*PutExecutionTypeRequest)(nil), // 10: ml_metadata.PutExecutionTypeRequest + (*PutExecutionTypeResponse)(nil), // 11: ml_metadata.PutExecutionTypeResponse + (*PutEventsRequest)(nil), // 12: ml_metadata.PutEventsRequest + (*PutEventsResponse)(nil), // 13: ml_metadata.PutEventsResponse + (*PutExecutionRequest)(nil), // 14: ml_metadata.PutExecutionRequest + (*PutExecutionResponse)(nil), // 15: ml_metadata.PutExecutionResponse + (*PutLineageSubgraphRequest)(nil), // 16: ml_metadata.PutLineageSubgraphRequest + (*PutLineageSubgraphResponse)(nil), // 17: ml_metadata.PutLineageSubgraphResponse + (*PutTypesRequest)(nil), // 18: ml_metadata.PutTypesRequest + (*PutTypesResponse)(nil), // 19: ml_metadata.PutTypesResponse + (*PutContextTypeRequest)(nil), // 20: ml_metadata.PutContextTypeRequest + (*PutContextTypeResponse)(nil), // 21: ml_metadata.PutContextTypeResponse + (*PutContextsRequest)(nil), // 22: ml_metadata.PutContextsRequest + (*PutContextsResponse)(nil), // 23: ml_metadata.PutContextsResponse + (*PutAttributionsAndAssociationsRequest)(nil), // 24: ml_metadata.PutAttributionsAndAssociationsRequest + (*PutAttributionsAndAssociationsResponse)(nil), // 25: ml_metadata.PutAttributionsAndAssociationsResponse + (*PutParentContextsRequest)(nil), // 26: ml_metadata.PutParentContextsRequest + (*PutParentContextsResponse)(nil), // 27: ml_metadata.PutParentContextsResponse + (*GetArtifactsByTypeRequest)(nil), // 28: ml_metadata.GetArtifactsByTypeRequest + (*GetArtifactsByTypeResponse)(nil), // 29: ml_metadata.GetArtifactsByTypeResponse + (*GetArtifactByTypeAndNameRequest)(nil), // 30: ml_metadata.GetArtifactByTypeAndNameRequest + (*GetArtifactByTypeAndNameResponse)(nil), // 31: ml_metadata.GetArtifactByTypeAndNameResponse + (*GetArtifactsByIDRequest)(nil), // 32: ml_metadata.GetArtifactsByIDRequest + (*GetArtifactsByIDResponse)(nil), // 33: ml_metadata.GetArtifactsByIDResponse + (*GetArtifactsRequest)(nil), // 34: ml_metadata.GetArtifactsRequest + (*GetArtifactsResponse)(nil), // 35: ml_metadata.GetArtifactsResponse + (*GetArtifactsByURIRequest)(nil), // 36: ml_metadata.GetArtifactsByURIRequest + (*GetArtifactsByURIResponse)(nil), // 37: ml_metadata.GetArtifactsByURIResponse + (*GetExecutionsRequest)(nil), // 38: ml_metadata.GetExecutionsRequest + (*GetExecutionsResponse)(nil), // 39: ml_metadata.GetExecutionsResponse + (*GetArtifactTypeRequest)(nil), // 40: ml_metadata.GetArtifactTypeRequest + (*GetArtifactTypeResponse)(nil), // 41: ml_metadata.GetArtifactTypeResponse + (*GetArtifactTypesRequest)(nil), // 42: ml_metadata.GetArtifactTypesRequest + (*GetArtifactTypesResponse)(nil), // 43: ml_metadata.GetArtifactTypesResponse + (*GetExecutionTypesRequest)(nil), // 44: ml_metadata.GetExecutionTypesRequest + (*GetExecutionTypesResponse)(nil), // 45: ml_metadata.GetExecutionTypesResponse + (*GetContextTypesRequest)(nil), // 46: ml_metadata.GetContextTypesRequest + (*GetContextTypesResponse)(nil), // 47: ml_metadata.GetContextTypesResponse + (*GetArtifactsByExternalIdsRequest)(nil), // 48: ml_metadata.GetArtifactsByExternalIdsRequest + (*GetArtifactsByExternalIdsResponse)(nil), // 49: ml_metadata.GetArtifactsByExternalIdsResponse + (*GetExecutionsByExternalIdsRequest)(nil), // 50: ml_metadata.GetExecutionsByExternalIdsRequest + (*GetExecutionsByExternalIdsResponse)(nil), // 51: ml_metadata.GetExecutionsByExternalIdsResponse + (*GetContextsByExternalIdsRequest)(nil), // 52: ml_metadata.GetContextsByExternalIdsRequest + (*GetContextsByExternalIdsResponse)(nil), // 53: ml_metadata.GetContextsByExternalIdsResponse + (*GetArtifactTypesByExternalIdsRequest)(nil), // 54: ml_metadata.GetArtifactTypesByExternalIdsRequest + (*GetArtifactTypesByExternalIdsResponse)(nil), // 55: ml_metadata.GetArtifactTypesByExternalIdsResponse + (*GetExecutionTypesByExternalIdsRequest)(nil), // 56: ml_metadata.GetExecutionTypesByExternalIdsRequest + (*GetExecutionTypesByExternalIdsResponse)(nil), // 57: ml_metadata.GetExecutionTypesByExternalIdsResponse + (*GetContextTypesByExternalIdsRequest)(nil), // 58: ml_metadata.GetContextTypesByExternalIdsRequest + (*GetContextTypesByExternalIdsResponse)(nil), // 59: ml_metadata.GetContextTypesByExternalIdsResponse + (*GetExecutionsByTypeRequest)(nil), // 60: ml_metadata.GetExecutionsByTypeRequest + (*GetExecutionsByTypeResponse)(nil), // 61: ml_metadata.GetExecutionsByTypeResponse + (*GetExecutionByTypeAndNameRequest)(nil), // 62: ml_metadata.GetExecutionByTypeAndNameRequest + (*GetExecutionByTypeAndNameResponse)(nil), // 63: ml_metadata.GetExecutionByTypeAndNameResponse + (*GetExecutionsByIDRequest)(nil), // 64: ml_metadata.GetExecutionsByIDRequest + (*GetExecutionsByIDResponse)(nil), // 65: ml_metadata.GetExecutionsByIDResponse + (*GetExecutionTypeRequest)(nil), // 66: ml_metadata.GetExecutionTypeRequest + (*GetExecutionTypeResponse)(nil), // 67: ml_metadata.GetExecutionTypeResponse + (*GetEventsByExecutionIDsRequest)(nil), // 68: ml_metadata.GetEventsByExecutionIDsRequest + (*GetEventsByExecutionIDsResponse)(nil), // 69: ml_metadata.GetEventsByExecutionIDsResponse + (*GetEventsByArtifactIDsRequest)(nil), // 70: ml_metadata.GetEventsByArtifactIDsRequest + (*GetEventsByArtifactIDsResponse)(nil), // 71: ml_metadata.GetEventsByArtifactIDsResponse + (*GetArtifactTypesByIDRequest)(nil), // 72: ml_metadata.GetArtifactTypesByIDRequest + (*GetArtifactTypesByIDResponse)(nil), // 73: ml_metadata.GetArtifactTypesByIDResponse + (*GetExecutionTypesByIDRequest)(nil), // 74: ml_metadata.GetExecutionTypesByIDRequest + (*GetExecutionTypesByIDResponse)(nil), // 75: ml_metadata.GetExecutionTypesByIDResponse + (*GetContextTypeRequest)(nil), // 76: ml_metadata.GetContextTypeRequest + (*GetContextTypeResponse)(nil), // 77: ml_metadata.GetContextTypeResponse + (*GetContextTypesByIDRequest)(nil), // 78: ml_metadata.GetContextTypesByIDRequest + (*GetContextTypesByIDResponse)(nil), // 79: ml_metadata.GetContextTypesByIDResponse + (*GetContextsRequest)(nil), // 80: ml_metadata.GetContextsRequest + (*GetContextsResponse)(nil), // 81: ml_metadata.GetContextsResponse + (*GetContextsByTypeRequest)(nil), // 82: ml_metadata.GetContextsByTypeRequest + (*GetContextsByTypeResponse)(nil), // 83: ml_metadata.GetContextsByTypeResponse + (*GetContextByTypeAndNameRequest)(nil), // 84: ml_metadata.GetContextByTypeAndNameRequest + (*GetContextByTypeAndNameResponse)(nil), // 85: ml_metadata.GetContextByTypeAndNameResponse + (*GetContextsByIDRequest)(nil), // 86: ml_metadata.GetContextsByIDRequest + (*GetContextsByIDResponse)(nil), // 87: ml_metadata.GetContextsByIDResponse + (*GetContextsByArtifactRequest)(nil), // 88: ml_metadata.GetContextsByArtifactRequest + (*GetContextsByArtifactResponse)(nil), // 89: ml_metadata.GetContextsByArtifactResponse + (*GetContextsByExecutionRequest)(nil), // 90: ml_metadata.GetContextsByExecutionRequest + (*GetContextsByExecutionResponse)(nil), // 91: ml_metadata.GetContextsByExecutionResponse + (*GetParentContextsByContextRequest)(nil), // 92: ml_metadata.GetParentContextsByContextRequest + (*GetParentContextsByContextResponse)(nil), // 93: ml_metadata.GetParentContextsByContextResponse + (*GetChildrenContextsByContextRequest)(nil), // 94: ml_metadata.GetChildrenContextsByContextRequest + (*GetChildrenContextsByContextResponse)(nil), // 95: ml_metadata.GetChildrenContextsByContextResponse + (*GetParentContextsByContextsRequest)(nil), // 96: ml_metadata.GetParentContextsByContextsRequest + (*GetParentContextsByContextsResponse)(nil), // 97: ml_metadata.GetParentContextsByContextsResponse + (*GetChildrenContextsByContextsRequest)(nil), // 98: ml_metadata.GetChildrenContextsByContextsRequest + (*GetChildrenContextsByContextsResponse)(nil), // 99: ml_metadata.GetChildrenContextsByContextsResponse + (*GetArtifactsByContextRequest)(nil), // 100: ml_metadata.GetArtifactsByContextRequest + (*GetArtifactsByContextResponse)(nil), // 101: ml_metadata.GetArtifactsByContextResponse + (*GetExecutionsByContextRequest)(nil), // 102: ml_metadata.GetExecutionsByContextRequest + (*GetExecutionsByContextResponse)(nil), // 103: ml_metadata.GetExecutionsByContextResponse + (*GetLineageGraphRequest)(nil), // 104: ml_metadata.GetLineageGraphRequest + (*GetLineageGraphResponse)(nil), // 105: ml_metadata.GetLineageGraphResponse + (*GetLineageSubgraphRequest)(nil), // 106: ml_metadata.GetLineageSubgraphRequest + (*GetLineageSubgraphResponse)(nil), // 107: ml_metadata.GetLineageSubgraphResponse + nil, // 108: ml_metadata.ArtifactStructMap.PropertiesEntry + (*PutArtifactsRequest_Options)(nil), // 109: ml_metadata.PutArtifactsRequest.Options + (*PutExecutionRequest_ArtifactAndEvent)(nil), // 110: ml_metadata.PutExecutionRequest.ArtifactAndEvent + (*PutExecutionRequest_Options)(nil), // 111: ml_metadata.PutExecutionRequest.Options + (*PutLineageSubgraphRequest_EventEdge)(nil), // 112: ml_metadata.PutLineageSubgraphRequest.EventEdge + (*PutLineageSubgraphRequest_Options)(nil), // 113: ml_metadata.PutLineageSubgraphRequest.Options + (*GetParentContextsByContextsResponse_ParentContextsPerChild)(nil), // 114: ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild + nil, // 115: ml_metadata.GetParentContextsByContextsResponse.ContextsEntry + (*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent)(nil), // 116: ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent + nil, // 117: ml_metadata.GetChildrenContextsByContextsResponse.ContextsEntry + (*Artifact)(nil), // 118: ml_metadata.Artifact + (*ArtifactType)(nil), // 119: ml_metadata.ArtifactType + (*TransactionOptions)(nil), // 120: ml_metadata.TransactionOptions + (*fieldmaskpb.FieldMask)(nil), // 121: google.protobuf.FieldMask + (*Execution)(nil), // 122: ml_metadata.Execution + (*ExecutionType)(nil), // 123: ml_metadata.ExecutionType + (*Event)(nil), // 124: ml_metadata.Event + (*Context)(nil), // 125: ml_metadata.Context + (*ContextType)(nil), // 126: ml_metadata.ContextType + (*Attribution)(nil), // 127: ml_metadata.Attribution + (*Association)(nil), // 128: ml_metadata.Association + (*ParentContext)(nil), // 129: ml_metadata.ParentContext + (*ListOperationOptions)(nil), // 130: ml_metadata.ListOperationOptions + (*LineageGraphQueryOptions)(nil), // 131: ml_metadata.LineageGraphQueryOptions + (*LineageGraph)(nil), // 132: ml_metadata.LineageGraph + (*LineageSubgraphQueryOptions)(nil), // 133: ml_metadata.LineageSubgraphQueryOptions } var file_ml_metadata_proto_metadata_store_service_proto_depIdxs = []int32{ - 92, // 0: ml_metadata.ArtifactAndType.artifact:type_name -> ml_metadata.Artifact - 93, // 1: ml_metadata.ArtifactAndType.type:type_name -> ml_metadata.ArtifactType - 88, // 2: ml_metadata.ArtifactStructMap.properties:type_name -> ml_metadata.ArtifactStructMap.PropertiesEntry + 118, // 0: ml_metadata.ArtifactAndType.artifact:type_name -> ml_metadata.Artifact + 119, // 1: ml_metadata.ArtifactAndType.type:type_name -> ml_metadata.ArtifactType + 108, // 2: ml_metadata.ArtifactStructMap.properties:type_name -> ml_metadata.ArtifactStructMap.PropertiesEntry 3, // 3: ml_metadata.ArtifactStructList.elements:type_name -> ml_metadata.ArtifactStruct 0, // 4: ml_metadata.ArtifactStruct.artifact:type_name -> ml_metadata.ArtifactAndType 1, // 5: ml_metadata.ArtifactStruct.map:type_name -> ml_metadata.ArtifactStructMap 2, // 6: ml_metadata.ArtifactStruct.list:type_name -> ml_metadata.ArtifactStructList - 92, // 7: ml_metadata.PutArtifactsRequest.artifacts:type_name -> ml_metadata.Artifact - 89, // 8: ml_metadata.PutArtifactsRequest.options:type_name -> ml_metadata.PutArtifactsRequest.Options - 93, // 9: ml_metadata.PutArtifactTypeRequest.artifact_type:type_name -> ml_metadata.ArtifactType - 94, // 10: ml_metadata.PutExecutionsRequest.executions:type_name -> ml_metadata.Execution - 95, // 11: ml_metadata.PutExecutionTypeRequest.execution_type:type_name -> ml_metadata.ExecutionType - 96, // 12: ml_metadata.PutEventsRequest.events:type_name -> ml_metadata.Event - 94, // 13: ml_metadata.PutExecutionRequest.execution:type_name -> ml_metadata.Execution - 90, // 14: ml_metadata.PutExecutionRequest.artifact_event_pairs:type_name -> ml_metadata.PutExecutionRequest.ArtifactAndEvent - 97, // 15: ml_metadata.PutExecutionRequest.contexts:type_name -> ml_metadata.Context - 91, // 16: ml_metadata.PutExecutionRequest.options:type_name -> ml_metadata.PutExecutionRequest.Options - 93, // 17: ml_metadata.PutTypesRequest.artifact_types:type_name -> ml_metadata.ArtifactType - 95, // 18: ml_metadata.PutTypesRequest.execution_types:type_name -> ml_metadata.ExecutionType - 98, // 19: ml_metadata.PutTypesRequest.context_types:type_name -> ml_metadata.ContextType - 98, // 20: ml_metadata.PutContextTypeRequest.context_type:type_name -> ml_metadata.ContextType - 97, // 21: ml_metadata.PutContextsRequest.contexts:type_name -> ml_metadata.Context - 99, // 22: ml_metadata.PutAttributionsAndAssociationsRequest.attributions:type_name -> ml_metadata.Attribution - 100, // 23: ml_metadata.PutAttributionsAndAssociationsRequest.associations:type_name -> ml_metadata.Association - 101, // 24: ml_metadata.PutParentContextsRequest.parent_contexts:type_name -> ml_metadata.ParentContext - 102, // 25: ml_metadata.GetArtifactsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 26: ml_metadata.GetArtifactsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 27: ml_metadata.GetArtifactsByTypeResponse.artifacts:type_name -> ml_metadata.Artifact - 103, // 28: ml_metadata.GetArtifactByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 29: ml_metadata.GetArtifactByTypeAndNameResponse.artifact:type_name -> ml_metadata.Artifact - 103, // 30: ml_metadata.GetArtifactsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 31: ml_metadata.GetArtifactsByIDResponse.artifacts:type_name -> ml_metadata.Artifact - 102, // 32: ml_metadata.GetArtifactsRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 33: ml_metadata.GetArtifactsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 34: ml_metadata.GetArtifactsResponse.artifacts:type_name -> ml_metadata.Artifact - 103, // 35: ml_metadata.GetArtifactsByURIRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 36: ml_metadata.GetArtifactsByURIResponse.artifacts:type_name -> ml_metadata.Artifact - 102, // 37: ml_metadata.GetExecutionsRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 38: ml_metadata.GetExecutionsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 94, // 39: ml_metadata.GetExecutionsResponse.executions:type_name -> ml_metadata.Execution - 103, // 40: ml_metadata.GetArtifactTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 93, // 41: ml_metadata.GetArtifactTypeResponse.artifact_type:type_name -> ml_metadata.ArtifactType - 103, // 42: ml_metadata.GetArtifactTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 93, // 43: ml_metadata.GetArtifactTypesResponse.artifact_types:type_name -> ml_metadata.ArtifactType - 103, // 44: ml_metadata.GetExecutionTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 95, // 45: ml_metadata.GetExecutionTypesResponse.execution_types:type_name -> ml_metadata.ExecutionType - 103, // 46: ml_metadata.GetContextTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 98, // 47: ml_metadata.GetContextTypesResponse.context_types:type_name -> ml_metadata.ContextType - 102, // 48: ml_metadata.GetExecutionsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 49: ml_metadata.GetExecutionsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 94, // 50: ml_metadata.GetExecutionsByTypeResponse.executions:type_name -> ml_metadata.Execution - 103, // 51: ml_metadata.GetExecutionByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 94, // 52: ml_metadata.GetExecutionByTypeAndNameResponse.execution:type_name -> ml_metadata.Execution - 103, // 53: ml_metadata.GetExecutionsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 94, // 54: ml_metadata.GetExecutionsByIDResponse.executions:type_name -> ml_metadata.Execution - 103, // 55: ml_metadata.GetExecutionTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 95, // 56: ml_metadata.GetExecutionTypeResponse.execution_type:type_name -> ml_metadata.ExecutionType - 103, // 57: ml_metadata.GetEventsByExecutionIDsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 96, // 58: ml_metadata.GetEventsByExecutionIDsResponse.events:type_name -> ml_metadata.Event - 103, // 59: ml_metadata.GetEventsByArtifactIDsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 96, // 60: ml_metadata.GetEventsByArtifactIDsResponse.events:type_name -> ml_metadata.Event - 103, // 61: ml_metadata.GetArtifactTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 93, // 62: ml_metadata.GetArtifactTypesByIDResponse.artifact_types:type_name -> ml_metadata.ArtifactType - 103, // 63: ml_metadata.GetExecutionTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 95, // 64: ml_metadata.GetExecutionTypesByIDResponse.execution_types:type_name -> ml_metadata.ExecutionType - 103, // 65: ml_metadata.GetContextTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 98, // 66: ml_metadata.GetContextTypeResponse.context_type:type_name -> ml_metadata.ContextType - 103, // 67: ml_metadata.GetContextTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 98, // 68: ml_metadata.GetContextTypesByIDResponse.context_types:type_name -> ml_metadata.ContextType - 102, // 69: ml_metadata.GetContextsRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 70: ml_metadata.GetContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 71: ml_metadata.GetContextsResponse.contexts:type_name -> ml_metadata.Context - 102, // 72: ml_metadata.GetContextsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 73: ml_metadata.GetContextsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 74: ml_metadata.GetContextsByTypeResponse.contexts:type_name -> ml_metadata.Context - 103, // 75: ml_metadata.GetContextByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 76: ml_metadata.GetContextByTypeAndNameResponse.context:type_name -> ml_metadata.Context - 103, // 77: ml_metadata.GetContextsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 78: ml_metadata.GetContextsByIDResponse.contexts:type_name -> ml_metadata.Context - 103, // 79: ml_metadata.GetContextsByArtifactRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 80: ml_metadata.GetContextsByArtifactResponse.contexts:type_name -> ml_metadata.Context - 103, // 81: ml_metadata.GetContextsByExecutionRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 82: ml_metadata.GetContextsByExecutionResponse.contexts:type_name -> ml_metadata.Context - 103, // 83: ml_metadata.GetParentContextsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 84: ml_metadata.GetParentContextsByContextResponse.contexts:type_name -> ml_metadata.Context - 103, // 85: ml_metadata.GetChildrenContextsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 97, // 86: ml_metadata.GetChildrenContextsByContextResponse.contexts:type_name -> ml_metadata.Context - 102, // 87: ml_metadata.GetArtifactsByContextRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 88: ml_metadata.GetArtifactsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 92, // 89: ml_metadata.GetArtifactsByContextResponse.artifacts:type_name -> ml_metadata.Artifact - 102, // 90: ml_metadata.GetExecutionsByContextRequest.options:type_name -> ml_metadata.ListOperationOptions - 103, // 91: ml_metadata.GetExecutionsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 94, // 92: ml_metadata.GetExecutionsByContextResponse.executions:type_name -> ml_metadata.Execution - 103, // 93: ml_metadata.GetExecutionsByContextResponse.transaction_options:type_name -> ml_metadata.TransactionOptions - 104, // 94: ml_metadata.GetLineageGraphRequest.options:type_name -> ml_metadata.LineageGraphQueryOptions - 103, // 95: ml_metadata.GetLineageGraphRequest.transaction_options:type_name -> ml_metadata.TransactionOptions - 105, // 96: ml_metadata.GetLineageGraphResponse.subgraph:type_name -> ml_metadata.LineageGraph - 3, // 97: ml_metadata.ArtifactStructMap.PropertiesEntry.value:type_name -> ml_metadata.ArtifactStruct - 92, // 98: ml_metadata.PutExecutionRequest.ArtifactAndEvent.artifact:type_name -> ml_metadata.Artifact - 96, // 99: ml_metadata.PutExecutionRequest.ArtifactAndEvent.event:type_name -> ml_metadata.Event - 6, // 100: ml_metadata.MetadataStoreService.PutArtifactType:input_type -> ml_metadata.PutArtifactTypeRequest - 10, // 101: ml_metadata.MetadataStoreService.PutExecutionType:input_type -> ml_metadata.PutExecutionTypeRequest - 18, // 102: ml_metadata.MetadataStoreService.PutContextType:input_type -> ml_metadata.PutContextTypeRequest - 16, // 103: ml_metadata.MetadataStoreService.PutTypes:input_type -> ml_metadata.PutTypesRequest - 4, // 104: ml_metadata.MetadataStoreService.PutArtifacts:input_type -> ml_metadata.PutArtifactsRequest - 8, // 105: ml_metadata.MetadataStoreService.PutExecutions:input_type -> ml_metadata.PutExecutionsRequest - 12, // 106: ml_metadata.MetadataStoreService.PutEvents:input_type -> ml_metadata.PutEventsRequest - 14, // 107: ml_metadata.MetadataStoreService.PutExecution:input_type -> ml_metadata.PutExecutionRequest - 20, // 108: ml_metadata.MetadataStoreService.PutContexts:input_type -> ml_metadata.PutContextsRequest - 22, // 109: ml_metadata.MetadataStoreService.PutAttributionsAndAssociations:input_type -> ml_metadata.PutAttributionsAndAssociationsRequest - 24, // 110: ml_metadata.MetadataStoreService.PutParentContexts:input_type -> ml_metadata.PutParentContextsRequest - 38, // 111: ml_metadata.MetadataStoreService.GetArtifactType:input_type -> ml_metadata.GetArtifactTypeRequest - 58, // 112: ml_metadata.MetadataStoreService.GetArtifactTypesByID:input_type -> ml_metadata.GetArtifactTypesByIDRequest - 40, // 113: ml_metadata.MetadataStoreService.GetArtifactTypes:input_type -> ml_metadata.GetArtifactTypesRequest - 52, // 114: ml_metadata.MetadataStoreService.GetExecutionType:input_type -> ml_metadata.GetExecutionTypeRequest - 60, // 115: ml_metadata.MetadataStoreService.GetExecutionTypesByID:input_type -> ml_metadata.GetExecutionTypesByIDRequest - 42, // 116: ml_metadata.MetadataStoreService.GetExecutionTypes:input_type -> ml_metadata.GetExecutionTypesRequest - 62, // 117: ml_metadata.MetadataStoreService.GetContextType:input_type -> ml_metadata.GetContextTypeRequest - 64, // 118: ml_metadata.MetadataStoreService.GetContextTypesByID:input_type -> ml_metadata.GetContextTypesByIDRequest - 44, // 119: ml_metadata.MetadataStoreService.GetContextTypes:input_type -> ml_metadata.GetContextTypesRequest - 32, // 120: ml_metadata.MetadataStoreService.GetArtifacts:input_type -> ml_metadata.GetArtifactsRequest - 36, // 121: ml_metadata.MetadataStoreService.GetExecutions:input_type -> ml_metadata.GetExecutionsRequest - 66, // 122: ml_metadata.MetadataStoreService.GetContexts:input_type -> ml_metadata.GetContextsRequest - 30, // 123: ml_metadata.MetadataStoreService.GetArtifactsByID:input_type -> ml_metadata.GetArtifactsByIDRequest - 50, // 124: ml_metadata.MetadataStoreService.GetExecutionsByID:input_type -> ml_metadata.GetExecutionsByIDRequest - 72, // 125: ml_metadata.MetadataStoreService.GetContextsByID:input_type -> ml_metadata.GetContextsByIDRequest - 26, // 126: ml_metadata.MetadataStoreService.GetArtifactsByType:input_type -> ml_metadata.GetArtifactsByTypeRequest - 46, // 127: ml_metadata.MetadataStoreService.GetExecutionsByType:input_type -> ml_metadata.GetExecutionsByTypeRequest - 68, // 128: ml_metadata.MetadataStoreService.GetContextsByType:input_type -> ml_metadata.GetContextsByTypeRequest - 28, // 129: ml_metadata.MetadataStoreService.GetArtifactByTypeAndName:input_type -> ml_metadata.GetArtifactByTypeAndNameRequest - 48, // 130: ml_metadata.MetadataStoreService.GetExecutionByTypeAndName:input_type -> ml_metadata.GetExecutionByTypeAndNameRequest - 70, // 131: ml_metadata.MetadataStoreService.GetContextByTypeAndName:input_type -> ml_metadata.GetContextByTypeAndNameRequest - 34, // 132: ml_metadata.MetadataStoreService.GetArtifactsByURI:input_type -> ml_metadata.GetArtifactsByURIRequest - 54, // 133: ml_metadata.MetadataStoreService.GetEventsByExecutionIDs:input_type -> ml_metadata.GetEventsByExecutionIDsRequest - 56, // 134: ml_metadata.MetadataStoreService.GetEventsByArtifactIDs:input_type -> ml_metadata.GetEventsByArtifactIDsRequest - 74, // 135: ml_metadata.MetadataStoreService.GetContextsByArtifact:input_type -> ml_metadata.GetContextsByArtifactRequest - 76, // 136: ml_metadata.MetadataStoreService.GetContextsByExecution:input_type -> ml_metadata.GetContextsByExecutionRequest - 78, // 137: ml_metadata.MetadataStoreService.GetParentContextsByContext:input_type -> ml_metadata.GetParentContextsByContextRequest - 80, // 138: ml_metadata.MetadataStoreService.GetChildrenContextsByContext:input_type -> ml_metadata.GetChildrenContextsByContextRequest - 82, // 139: ml_metadata.MetadataStoreService.GetArtifactsByContext:input_type -> ml_metadata.GetArtifactsByContextRequest - 84, // 140: ml_metadata.MetadataStoreService.GetExecutionsByContext:input_type -> ml_metadata.GetExecutionsByContextRequest - 86, // 141: ml_metadata.MetadataStoreService.GetLineageGraph:input_type -> ml_metadata.GetLineageGraphRequest - 7, // 142: ml_metadata.MetadataStoreService.PutArtifactType:output_type -> ml_metadata.PutArtifactTypeResponse - 11, // 143: ml_metadata.MetadataStoreService.PutExecutionType:output_type -> ml_metadata.PutExecutionTypeResponse - 19, // 144: ml_metadata.MetadataStoreService.PutContextType:output_type -> ml_metadata.PutContextTypeResponse - 17, // 145: ml_metadata.MetadataStoreService.PutTypes:output_type -> ml_metadata.PutTypesResponse - 5, // 146: ml_metadata.MetadataStoreService.PutArtifacts:output_type -> ml_metadata.PutArtifactsResponse - 9, // 147: ml_metadata.MetadataStoreService.PutExecutions:output_type -> ml_metadata.PutExecutionsResponse - 13, // 148: ml_metadata.MetadataStoreService.PutEvents:output_type -> ml_metadata.PutEventsResponse - 15, // 149: ml_metadata.MetadataStoreService.PutExecution:output_type -> ml_metadata.PutExecutionResponse - 21, // 150: ml_metadata.MetadataStoreService.PutContexts:output_type -> ml_metadata.PutContextsResponse - 23, // 151: ml_metadata.MetadataStoreService.PutAttributionsAndAssociations:output_type -> ml_metadata.PutAttributionsAndAssociationsResponse - 25, // 152: ml_metadata.MetadataStoreService.PutParentContexts:output_type -> ml_metadata.PutParentContextsResponse - 39, // 153: ml_metadata.MetadataStoreService.GetArtifactType:output_type -> ml_metadata.GetArtifactTypeResponse - 59, // 154: ml_metadata.MetadataStoreService.GetArtifactTypesByID:output_type -> ml_metadata.GetArtifactTypesByIDResponse - 41, // 155: ml_metadata.MetadataStoreService.GetArtifactTypes:output_type -> ml_metadata.GetArtifactTypesResponse - 53, // 156: ml_metadata.MetadataStoreService.GetExecutionType:output_type -> ml_metadata.GetExecutionTypeResponse - 61, // 157: ml_metadata.MetadataStoreService.GetExecutionTypesByID:output_type -> ml_metadata.GetExecutionTypesByIDResponse - 43, // 158: ml_metadata.MetadataStoreService.GetExecutionTypes:output_type -> ml_metadata.GetExecutionTypesResponse - 63, // 159: ml_metadata.MetadataStoreService.GetContextType:output_type -> ml_metadata.GetContextTypeResponse - 65, // 160: ml_metadata.MetadataStoreService.GetContextTypesByID:output_type -> ml_metadata.GetContextTypesByIDResponse - 45, // 161: ml_metadata.MetadataStoreService.GetContextTypes:output_type -> ml_metadata.GetContextTypesResponse - 33, // 162: ml_metadata.MetadataStoreService.GetArtifacts:output_type -> ml_metadata.GetArtifactsResponse - 37, // 163: ml_metadata.MetadataStoreService.GetExecutions:output_type -> ml_metadata.GetExecutionsResponse - 67, // 164: ml_metadata.MetadataStoreService.GetContexts:output_type -> ml_metadata.GetContextsResponse - 31, // 165: ml_metadata.MetadataStoreService.GetArtifactsByID:output_type -> ml_metadata.GetArtifactsByIDResponse - 51, // 166: ml_metadata.MetadataStoreService.GetExecutionsByID:output_type -> ml_metadata.GetExecutionsByIDResponse - 73, // 167: ml_metadata.MetadataStoreService.GetContextsByID:output_type -> ml_metadata.GetContextsByIDResponse - 27, // 168: ml_metadata.MetadataStoreService.GetArtifactsByType:output_type -> ml_metadata.GetArtifactsByTypeResponse - 47, // 169: ml_metadata.MetadataStoreService.GetExecutionsByType:output_type -> ml_metadata.GetExecutionsByTypeResponse - 69, // 170: ml_metadata.MetadataStoreService.GetContextsByType:output_type -> ml_metadata.GetContextsByTypeResponse - 29, // 171: ml_metadata.MetadataStoreService.GetArtifactByTypeAndName:output_type -> ml_metadata.GetArtifactByTypeAndNameResponse - 49, // 172: ml_metadata.MetadataStoreService.GetExecutionByTypeAndName:output_type -> ml_metadata.GetExecutionByTypeAndNameResponse - 71, // 173: ml_metadata.MetadataStoreService.GetContextByTypeAndName:output_type -> ml_metadata.GetContextByTypeAndNameResponse - 35, // 174: ml_metadata.MetadataStoreService.GetArtifactsByURI:output_type -> ml_metadata.GetArtifactsByURIResponse - 55, // 175: ml_metadata.MetadataStoreService.GetEventsByExecutionIDs:output_type -> ml_metadata.GetEventsByExecutionIDsResponse - 57, // 176: ml_metadata.MetadataStoreService.GetEventsByArtifactIDs:output_type -> ml_metadata.GetEventsByArtifactIDsResponse - 75, // 177: ml_metadata.MetadataStoreService.GetContextsByArtifact:output_type -> ml_metadata.GetContextsByArtifactResponse - 77, // 178: ml_metadata.MetadataStoreService.GetContextsByExecution:output_type -> ml_metadata.GetContextsByExecutionResponse - 79, // 179: ml_metadata.MetadataStoreService.GetParentContextsByContext:output_type -> ml_metadata.GetParentContextsByContextResponse - 81, // 180: ml_metadata.MetadataStoreService.GetChildrenContextsByContext:output_type -> ml_metadata.GetChildrenContextsByContextResponse - 83, // 181: ml_metadata.MetadataStoreService.GetArtifactsByContext:output_type -> ml_metadata.GetArtifactsByContextResponse - 85, // 182: ml_metadata.MetadataStoreService.GetExecutionsByContext:output_type -> ml_metadata.GetExecutionsByContextResponse - 87, // 183: ml_metadata.MetadataStoreService.GetLineageGraph:output_type -> ml_metadata.GetLineageGraphResponse - 142, // [142:184] is the sub-list for method output_type - 100, // [100:142] is the sub-list for method input_type - 100, // [100:100] is the sub-list for extension type_name - 100, // [100:100] is the sub-list for extension extendee - 0, // [0:100] is the sub-list for field type_name + 118, // 7: ml_metadata.PutArtifactsRequest.artifacts:type_name -> ml_metadata.Artifact + 109, // 8: ml_metadata.PutArtifactsRequest.options:type_name -> ml_metadata.PutArtifactsRequest.Options + 120, // 9: ml_metadata.PutArtifactsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 121, // 10: ml_metadata.PutArtifactsRequest.update_mask:type_name -> google.protobuf.FieldMask + 119, // 11: ml_metadata.PutArtifactTypeRequest.artifact_type:type_name -> ml_metadata.ArtifactType + 120, // 12: ml_metadata.PutArtifactTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 13: ml_metadata.PutExecutionsRequest.executions:type_name -> ml_metadata.Execution + 120, // 14: ml_metadata.PutExecutionsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 121, // 15: ml_metadata.PutExecutionsRequest.update_mask:type_name -> google.protobuf.FieldMask + 123, // 16: ml_metadata.PutExecutionTypeRequest.execution_type:type_name -> ml_metadata.ExecutionType + 120, // 17: ml_metadata.PutExecutionTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 124, // 18: ml_metadata.PutEventsRequest.events:type_name -> ml_metadata.Event + 120, // 19: ml_metadata.PutEventsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 20: ml_metadata.PutExecutionRequest.execution:type_name -> ml_metadata.Execution + 110, // 21: ml_metadata.PutExecutionRequest.artifact_event_pairs:type_name -> ml_metadata.PutExecutionRequest.ArtifactAndEvent + 125, // 22: ml_metadata.PutExecutionRequest.contexts:type_name -> ml_metadata.Context + 111, // 23: ml_metadata.PutExecutionRequest.options:type_name -> ml_metadata.PutExecutionRequest.Options + 120, // 24: ml_metadata.PutExecutionRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 25: ml_metadata.PutLineageSubgraphRequest.executions:type_name -> ml_metadata.Execution + 118, // 26: ml_metadata.PutLineageSubgraphRequest.artifacts:type_name -> ml_metadata.Artifact + 125, // 27: ml_metadata.PutLineageSubgraphRequest.contexts:type_name -> ml_metadata.Context + 112, // 28: ml_metadata.PutLineageSubgraphRequest.event_edges:type_name -> ml_metadata.PutLineageSubgraphRequest.EventEdge + 113, // 29: ml_metadata.PutLineageSubgraphRequest.options:type_name -> ml_metadata.PutLineageSubgraphRequest.Options + 120, // 30: ml_metadata.PutLineageSubgraphRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 119, // 31: ml_metadata.PutTypesRequest.artifact_types:type_name -> ml_metadata.ArtifactType + 123, // 32: ml_metadata.PutTypesRequest.execution_types:type_name -> ml_metadata.ExecutionType + 126, // 33: ml_metadata.PutTypesRequest.context_types:type_name -> ml_metadata.ContextType + 120, // 34: ml_metadata.PutTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 126, // 35: ml_metadata.PutContextTypeRequest.context_type:type_name -> ml_metadata.ContextType + 120, // 36: ml_metadata.PutContextTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 37: ml_metadata.PutContextsRequest.contexts:type_name -> ml_metadata.Context + 120, // 38: ml_metadata.PutContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 121, // 39: ml_metadata.PutContextsRequest.update_mask:type_name -> google.protobuf.FieldMask + 127, // 40: ml_metadata.PutAttributionsAndAssociationsRequest.attributions:type_name -> ml_metadata.Attribution + 128, // 41: ml_metadata.PutAttributionsAndAssociationsRequest.associations:type_name -> ml_metadata.Association + 120, // 42: ml_metadata.PutAttributionsAndAssociationsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 129, // 43: ml_metadata.PutParentContextsRequest.parent_contexts:type_name -> ml_metadata.ParentContext + 120, // 44: ml_metadata.PutParentContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 130, // 45: ml_metadata.GetArtifactsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 46: ml_metadata.GetArtifactsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 47: ml_metadata.GetArtifactsByTypeResponse.artifacts:type_name -> ml_metadata.Artifact + 120, // 48: ml_metadata.GetArtifactByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 49: ml_metadata.GetArtifactByTypeAndNameResponse.artifact:type_name -> ml_metadata.Artifact + 120, // 50: ml_metadata.GetArtifactsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 51: ml_metadata.GetArtifactsByIDResponse.artifacts:type_name -> ml_metadata.Artifact + 119, // 52: ml_metadata.GetArtifactsByIDResponse.artifact_types:type_name -> ml_metadata.ArtifactType + 130, // 53: ml_metadata.GetArtifactsRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 54: ml_metadata.GetArtifactsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 55: ml_metadata.GetArtifactsResponse.artifacts:type_name -> ml_metadata.Artifact + 120, // 56: ml_metadata.GetArtifactsByURIRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 57: ml_metadata.GetArtifactsByURIResponse.artifacts:type_name -> ml_metadata.Artifact + 130, // 58: ml_metadata.GetExecutionsRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 59: ml_metadata.GetExecutionsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 60: ml_metadata.GetExecutionsResponse.executions:type_name -> ml_metadata.Execution + 120, // 61: ml_metadata.GetArtifactTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 119, // 62: ml_metadata.GetArtifactTypeResponse.artifact_type:type_name -> ml_metadata.ArtifactType + 120, // 63: ml_metadata.GetArtifactTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 119, // 64: ml_metadata.GetArtifactTypesResponse.artifact_types:type_name -> ml_metadata.ArtifactType + 120, // 65: ml_metadata.GetExecutionTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 123, // 66: ml_metadata.GetExecutionTypesResponse.execution_types:type_name -> ml_metadata.ExecutionType + 120, // 67: ml_metadata.GetContextTypesRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 126, // 68: ml_metadata.GetContextTypesResponse.context_types:type_name -> ml_metadata.ContextType + 120, // 69: ml_metadata.GetArtifactsByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 70: ml_metadata.GetArtifactsByExternalIdsResponse.artifacts:type_name -> ml_metadata.Artifact + 120, // 71: ml_metadata.GetExecutionsByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 72: ml_metadata.GetExecutionsByExternalIdsResponse.executions:type_name -> ml_metadata.Execution + 120, // 73: ml_metadata.GetContextsByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 74: ml_metadata.GetContextsByExternalIdsResponse.contexts:type_name -> ml_metadata.Context + 120, // 75: ml_metadata.GetArtifactTypesByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 119, // 76: ml_metadata.GetArtifactTypesByExternalIdsResponse.artifact_types:type_name -> ml_metadata.ArtifactType + 120, // 77: ml_metadata.GetExecutionTypesByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 123, // 78: ml_metadata.GetExecutionTypesByExternalIdsResponse.execution_types:type_name -> ml_metadata.ExecutionType + 120, // 79: ml_metadata.GetContextTypesByExternalIdsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 126, // 80: ml_metadata.GetContextTypesByExternalIdsResponse.context_types:type_name -> ml_metadata.ContextType + 130, // 81: ml_metadata.GetExecutionsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 82: ml_metadata.GetExecutionsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 83: ml_metadata.GetExecutionsByTypeResponse.executions:type_name -> ml_metadata.Execution + 120, // 84: ml_metadata.GetExecutionByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 85: ml_metadata.GetExecutionByTypeAndNameResponse.execution:type_name -> ml_metadata.Execution + 120, // 86: ml_metadata.GetExecutionsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 87: ml_metadata.GetExecutionsByIDResponse.executions:type_name -> ml_metadata.Execution + 120, // 88: ml_metadata.GetExecutionTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 123, // 89: ml_metadata.GetExecutionTypeResponse.execution_type:type_name -> ml_metadata.ExecutionType + 120, // 90: ml_metadata.GetEventsByExecutionIDsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 124, // 91: ml_metadata.GetEventsByExecutionIDsResponse.events:type_name -> ml_metadata.Event + 120, // 92: ml_metadata.GetEventsByArtifactIDsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 124, // 93: ml_metadata.GetEventsByArtifactIDsResponse.events:type_name -> ml_metadata.Event + 120, // 94: ml_metadata.GetArtifactTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 119, // 95: ml_metadata.GetArtifactTypesByIDResponse.artifact_types:type_name -> ml_metadata.ArtifactType + 120, // 96: ml_metadata.GetExecutionTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 123, // 97: ml_metadata.GetExecutionTypesByIDResponse.execution_types:type_name -> ml_metadata.ExecutionType + 120, // 98: ml_metadata.GetContextTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 126, // 99: ml_metadata.GetContextTypeResponse.context_type:type_name -> ml_metadata.ContextType + 120, // 100: ml_metadata.GetContextTypesByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 126, // 101: ml_metadata.GetContextTypesByIDResponse.context_types:type_name -> ml_metadata.ContextType + 130, // 102: ml_metadata.GetContextsRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 103: ml_metadata.GetContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 104: ml_metadata.GetContextsResponse.contexts:type_name -> ml_metadata.Context + 130, // 105: ml_metadata.GetContextsByTypeRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 106: ml_metadata.GetContextsByTypeRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 107: ml_metadata.GetContextsByTypeResponse.contexts:type_name -> ml_metadata.Context + 120, // 108: ml_metadata.GetContextByTypeAndNameRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 109: ml_metadata.GetContextByTypeAndNameResponse.context:type_name -> ml_metadata.Context + 120, // 110: ml_metadata.GetContextsByIDRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 111: ml_metadata.GetContextsByIDResponse.contexts:type_name -> ml_metadata.Context + 120, // 112: ml_metadata.GetContextsByArtifactRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 113: ml_metadata.GetContextsByArtifactResponse.contexts:type_name -> ml_metadata.Context + 120, // 114: ml_metadata.GetContextsByExecutionRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 115: ml_metadata.GetContextsByExecutionResponse.contexts:type_name -> ml_metadata.Context + 120, // 116: ml_metadata.GetParentContextsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 117: ml_metadata.GetParentContextsByContextResponse.contexts:type_name -> ml_metadata.Context + 120, // 118: ml_metadata.GetChildrenContextsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 125, // 119: ml_metadata.GetChildrenContextsByContextResponse.contexts:type_name -> ml_metadata.Context + 120, // 120: ml_metadata.GetParentContextsByContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 115, // 121: ml_metadata.GetParentContextsByContextsResponse.contexts:type_name -> ml_metadata.GetParentContextsByContextsResponse.ContextsEntry + 120, // 122: ml_metadata.GetChildrenContextsByContextsRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 117, // 123: ml_metadata.GetChildrenContextsByContextsResponse.contexts:type_name -> ml_metadata.GetChildrenContextsByContextsResponse.ContextsEntry + 130, // 124: ml_metadata.GetArtifactsByContextRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 125: ml_metadata.GetArtifactsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 118, // 126: ml_metadata.GetArtifactsByContextResponse.artifacts:type_name -> ml_metadata.Artifact + 130, // 127: ml_metadata.GetExecutionsByContextRequest.options:type_name -> ml_metadata.ListOperationOptions + 120, // 128: ml_metadata.GetExecutionsByContextRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 122, // 129: ml_metadata.GetExecutionsByContextResponse.executions:type_name -> ml_metadata.Execution + 120, // 130: ml_metadata.GetExecutionsByContextResponse.transaction_options:type_name -> ml_metadata.TransactionOptions + 131, // 131: ml_metadata.GetLineageGraphRequest.options:type_name -> ml_metadata.LineageGraphQueryOptions + 120, // 132: ml_metadata.GetLineageGraphRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 132, // 133: ml_metadata.GetLineageGraphResponse.subgraph:type_name -> ml_metadata.LineageGraph + 133, // 134: ml_metadata.GetLineageSubgraphRequest.lineage_subgraph_query_options:type_name -> ml_metadata.LineageSubgraphQueryOptions + 121, // 135: ml_metadata.GetLineageSubgraphRequest.read_mask:type_name -> google.protobuf.FieldMask + 120, // 136: ml_metadata.GetLineageSubgraphRequest.transaction_options:type_name -> ml_metadata.TransactionOptions + 132, // 137: ml_metadata.GetLineageSubgraphResponse.lineage_subgraph:type_name -> ml_metadata.LineageGraph + 3, // 138: ml_metadata.ArtifactStructMap.PropertiesEntry.value:type_name -> ml_metadata.ArtifactStruct + 118, // 139: ml_metadata.PutExecutionRequest.ArtifactAndEvent.artifact:type_name -> ml_metadata.Artifact + 124, // 140: ml_metadata.PutExecutionRequest.ArtifactAndEvent.event:type_name -> ml_metadata.Event + 124, // 141: ml_metadata.PutLineageSubgraphRequest.EventEdge.event:type_name -> ml_metadata.Event + 125, // 142: ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild.parent_contexts:type_name -> ml_metadata.Context + 114, // 143: ml_metadata.GetParentContextsByContextsResponse.ContextsEntry.value:type_name -> ml_metadata.GetParentContextsByContextsResponse.ParentContextsPerChild + 125, // 144: ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent.children_contexts:type_name -> ml_metadata.Context + 116, // 145: ml_metadata.GetChildrenContextsByContextsResponse.ContextsEntry.value:type_name -> ml_metadata.GetChildrenContextsByContextsResponse.ChildrenContextsPerParent + 6, // 146: ml_metadata.MetadataStoreService.PutArtifactType:input_type -> ml_metadata.PutArtifactTypeRequest + 10, // 147: ml_metadata.MetadataStoreService.PutExecutionType:input_type -> ml_metadata.PutExecutionTypeRequest + 20, // 148: ml_metadata.MetadataStoreService.PutContextType:input_type -> ml_metadata.PutContextTypeRequest + 18, // 149: ml_metadata.MetadataStoreService.PutTypes:input_type -> ml_metadata.PutTypesRequest + 4, // 150: ml_metadata.MetadataStoreService.PutArtifacts:input_type -> ml_metadata.PutArtifactsRequest + 8, // 151: ml_metadata.MetadataStoreService.PutExecutions:input_type -> ml_metadata.PutExecutionsRequest + 12, // 152: ml_metadata.MetadataStoreService.PutEvents:input_type -> ml_metadata.PutEventsRequest + 14, // 153: ml_metadata.MetadataStoreService.PutExecution:input_type -> ml_metadata.PutExecutionRequest + 16, // 154: ml_metadata.MetadataStoreService.PutLineageSubgraph:input_type -> ml_metadata.PutLineageSubgraphRequest + 22, // 155: ml_metadata.MetadataStoreService.PutContexts:input_type -> ml_metadata.PutContextsRequest + 24, // 156: ml_metadata.MetadataStoreService.PutAttributionsAndAssociations:input_type -> ml_metadata.PutAttributionsAndAssociationsRequest + 26, // 157: ml_metadata.MetadataStoreService.PutParentContexts:input_type -> ml_metadata.PutParentContextsRequest + 40, // 158: ml_metadata.MetadataStoreService.GetArtifactType:input_type -> ml_metadata.GetArtifactTypeRequest + 72, // 159: ml_metadata.MetadataStoreService.GetArtifactTypesByID:input_type -> ml_metadata.GetArtifactTypesByIDRequest + 42, // 160: ml_metadata.MetadataStoreService.GetArtifactTypes:input_type -> ml_metadata.GetArtifactTypesRequest + 66, // 161: ml_metadata.MetadataStoreService.GetExecutionType:input_type -> ml_metadata.GetExecutionTypeRequest + 74, // 162: ml_metadata.MetadataStoreService.GetExecutionTypesByID:input_type -> ml_metadata.GetExecutionTypesByIDRequest + 44, // 163: ml_metadata.MetadataStoreService.GetExecutionTypes:input_type -> ml_metadata.GetExecutionTypesRequest + 76, // 164: ml_metadata.MetadataStoreService.GetContextType:input_type -> ml_metadata.GetContextTypeRequest + 78, // 165: ml_metadata.MetadataStoreService.GetContextTypesByID:input_type -> ml_metadata.GetContextTypesByIDRequest + 46, // 166: ml_metadata.MetadataStoreService.GetContextTypes:input_type -> ml_metadata.GetContextTypesRequest + 34, // 167: ml_metadata.MetadataStoreService.GetArtifacts:input_type -> ml_metadata.GetArtifactsRequest + 38, // 168: ml_metadata.MetadataStoreService.GetExecutions:input_type -> ml_metadata.GetExecutionsRequest + 80, // 169: ml_metadata.MetadataStoreService.GetContexts:input_type -> ml_metadata.GetContextsRequest + 32, // 170: ml_metadata.MetadataStoreService.GetArtifactsByID:input_type -> ml_metadata.GetArtifactsByIDRequest + 64, // 171: ml_metadata.MetadataStoreService.GetExecutionsByID:input_type -> ml_metadata.GetExecutionsByIDRequest + 86, // 172: ml_metadata.MetadataStoreService.GetContextsByID:input_type -> ml_metadata.GetContextsByIDRequest + 28, // 173: ml_metadata.MetadataStoreService.GetArtifactsByType:input_type -> ml_metadata.GetArtifactsByTypeRequest + 60, // 174: ml_metadata.MetadataStoreService.GetExecutionsByType:input_type -> ml_metadata.GetExecutionsByTypeRequest + 82, // 175: ml_metadata.MetadataStoreService.GetContextsByType:input_type -> ml_metadata.GetContextsByTypeRequest + 30, // 176: ml_metadata.MetadataStoreService.GetArtifactByTypeAndName:input_type -> ml_metadata.GetArtifactByTypeAndNameRequest + 62, // 177: ml_metadata.MetadataStoreService.GetExecutionByTypeAndName:input_type -> ml_metadata.GetExecutionByTypeAndNameRequest + 84, // 178: ml_metadata.MetadataStoreService.GetContextByTypeAndName:input_type -> ml_metadata.GetContextByTypeAndNameRequest + 36, // 179: ml_metadata.MetadataStoreService.GetArtifactsByURI:input_type -> ml_metadata.GetArtifactsByURIRequest + 68, // 180: ml_metadata.MetadataStoreService.GetEventsByExecutionIDs:input_type -> ml_metadata.GetEventsByExecutionIDsRequest + 70, // 181: ml_metadata.MetadataStoreService.GetEventsByArtifactIDs:input_type -> ml_metadata.GetEventsByArtifactIDsRequest + 48, // 182: ml_metadata.MetadataStoreService.GetArtifactsByExternalIds:input_type -> ml_metadata.GetArtifactsByExternalIdsRequest + 50, // 183: ml_metadata.MetadataStoreService.GetExecutionsByExternalIds:input_type -> ml_metadata.GetExecutionsByExternalIdsRequest + 52, // 184: ml_metadata.MetadataStoreService.GetContextsByExternalIds:input_type -> ml_metadata.GetContextsByExternalIdsRequest + 54, // 185: ml_metadata.MetadataStoreService.GetArtifactTypesByExternalIds:input_type -> ml_metadata.GetArtifactTypesByExternalIdsRequest + 56, // 186: ml_metadata.MetadataStoreService.GetExecutionTypesByExternalIds:input_type -> ml_metadata.GetExecutionTypesByExternalIdsRequest + 58, // 187: ml_metadata.MetadataStoreService.GetContextTypesByExternalIds:input_type -> ml_metadata.GetContextTypesByExternalIdsRequest + 88, // 188: ml_metadata.MetadataStoreService.GetContextsByArtifact:input_type -> ml_metadata.GetContextsByArtifactRequest + 90, // 189: ml_metadata.MetadataStoreService.GetContextsByExecution:input_type -> ml_metadata.GetContextsByExecutionRequest + 92, // 190: ml_metadata.MetadataStoreService.GetParentContextsByContext:input_type -> ml_metadata.GetParentContextsByContextRequest + 94, // 191: ml_metadata.MetadataStoreService.GetChildrenContextsByContext:input_type -> ml_metadata.GetChildrenContextsByContextRequest + 96, // 192: ml_metadata.MetadataStoreService.GetParentContextsByContexts:input_type -> ml_metadata.GetParentContextsByContextsRequest + 98, // 193: ml_metadata.MetadataStoreService.GetChildrenContextsByContexts:input_type -> ml_metadata.GetChildrenContextsByContextsRequest + 100, // 194: ml_metadata.MetadataStoreService.GetArtifactsByContext:input_type -> ml_metadata.GetArtifactsByContextRequest + 102, // 195: ml_metadata.MetadataStoreService.GetExecutionsByContext:input_type -> ml_metadata.GetExecutionsByContextRequest + 104, // 196: ml_metadata.MetadataStoreService.GetLineageGraph:input_type -> ml_metadata.GetLineageGraphRequest + 106, // 197: ml_metadata.MetadataStoreService.GetLineageSubgraph:input_type -> ml_metadata.GetLineageSubgraphRequest + 7, // 198: ml_metadata.MetadataStoreService.PutArtifactType:output_type -> ml_metadata.PutArtifactTypeResponse + 11, // 199: ml_metadata.MetadataStoreService.PutExecutionType:output_type -> ml_metadata.PutExecutionTypeResponse + 21, // 200: ml_metadata.MetadataStoreService.PutContextType:output_type -> ml_metadata.PutContextTypeResponse + 19, // 201: ml_metadata.MetadataStoreService.PutTypes:output_type -> ml_metadata.PutTypesResponse + 5, // 202: ml_metadata.MetadataStoreService.PutArtifacts:output_type -> ml_metadata.PutArtifactsResponse + 9, // 203: ml_metadata.MetadataStoreService.PutExecutions:output_type -> ml_metadata.PutExecutionsResponse + 13, // 204: ml_metadata.MetadataStoreService.PutEvents:output_type -> ml_metadata.PutEventsResponse + 15, // 205: ml_metadata.MetadataStoreService.PutExecution:output_type -> ml_metadata.PutExecutionResponse + 17, // 206: ml_metadata.MetadataStoreService.PutLineageSubgraph:output_type -> ml_metadata.PutLineageSubgraphResponse + 23, // 207: ml_metadata.MetadataStoreService.PutContexts:output_type -> ml_metadata.PutContextsResponse + 25, // 208: ml_metadata.MetadataStoreService.PutAttributionsAndAssociations:output_type -> ml_metadata.PutAttributionsAndAssociationsResponse + 27, // 209: ml_metadata.MetadataStoreService.PutParentContexts:output_type -> ml_metadata.PutParentContextsResponse + 41, // 210: ml_metadata.MetadataStoreService.GetArtifactType:output_type -> ml_metadata.GetArtifactTypeResponse + 73, // 211: ml_metadata.MetadataStoreService.GetArtifactTypesByID:output_type -> ml_metadata.GetArtifactTypesByIDResponse + 43, // 212: ml_metadata.MetadataStoreService.GetArtifactTypes:output_type -> ml_metadata.GetArtifactTypesResponse + 67, // 213: ml_metadata.MetadataStoreService.GetExecutionType:output_type -> ml_metadata.GetExecutionTypeResponse + 75, // 214: ml_metadata.MetadataStoreService.GetExecutionTypesByID:output_type -> ml_metadata.GetExecutionTypesByIDResponse + 45, // 215: ml_metadata.MetadataStoreService.GetExecutionTypes:output_type -> ml_metadata.GetExecutionTypesResponse + 77, // 216: ml_metadata.MetadataStoreService.GetContextType:output_type -> ml_metadata.GetContextTypeResponse + 79, // 217: ml_metadata.MetadataStoreService.GetContextTypesByID:output_type -> ml_metadata.GetContextTypesByIDResponse + 47, // 218: ml_metadata.MetadataStoreService.GetContextTypes:output_type -> ml_metadata.GetContextTypesResponse + 35, // 219: ml_metadata.MetadataStoreService.GetArtifacts:output_type -> ml_metadata.GetArtifactsResponse + 39, // 220: ml_metadata.MetadataStoreService.GetExecutions:output_type -> ml_metadata.GetExecutionsResponse + 81, // 221: ml_metadata.MetadataStoreService.GetContexts:output_type -> ml_metadata.GetContextsResponse + 33, // 222: ml_metadata.MetadataStoreService.GetArtifactsByID:output_type -> ml_metadata.GetArtifactsByIDResponse + 65, // 223: ml_metadata.MetadataStoreService.GetExecutionsByID:output_type -> ml_metadata.GetExecutionsByIDResponse + 87, // 224: ml_metadata.MetadataStoreService.GetContextsByID:output_type -> ml_metadata.GetContextsByIDResponse + 29, // 225: ml_metadata.MetadataStoreService.GetArtifactsByType:output_type -> ml_metadata.GetArtifactsByTypeResponse + 61, // 226: ml_metadata.MetadataStoreService.GetExecutionsByType:output_type -> ml_metadata.GetExecutionsByTypeResponse + 83, // 227: ml_metadata.MetadataStoreService.GetContextsByType:output_type -> ml_metadata.GetContextsByTypeResponse + 31, // 228: ml_metadata.MetadataStoreService.GetArtifactByTypeAndName:output_type -> ml_metadata.GetArtifactByTypeAndNameResponse + 63, // 229: ml_metadata.MetadataStoreService.GetExecutionByTypeAndName:output_type -> ml_metadata.GetExecutionByTypeAndNameResponse + 85, // 230: ml_metadata.MetadataStoreService.GetContextByTypeAndName:output_type -> ml_metadata.GetContextByTypeAndNameResponse + 37, // 231: ml_metadata.MetadataStoreService.GetArtifactsByURI:output_type -> ml_metadata.GetArtifactsByURIResponse + 69, // 232: ml_metadata.MetadataStoreService.GetEventsByExecutionIDs:output_type -> ml_metadata.GetEventsByExecutionIDsResponse + 71, // 233: ml_metadata.MetadataStoreService.GetEventsByArtifactIDs:output_type -> ml_metadata.GetEventsByArtifactIDsResponse + 49, // 234: ml_metadata.MetadataStoreService.GetArtifactsByExternalIds:output_type -> ml_metadata.GetArtifactsByExternalIdsResponse + 51, // 235: ml_metadata.MetadataStoreService.GetExecutionsByExternalIds:output_type -> ml_metadata.GetExecutionsByExternalIdsResponse + 53, // 236: ml_metadata.MetadataStoreService.GetContextsByExternalIds:output_type -> ml_metadata.GetContextsByExternalIdsResponse + 55, // 237: ml_metadata.MetadataStoreService.GetArtifactTypesByExternalIds:output_type -> ml_metadata.GetArtifactTypesByExternalIdsResponse + 57, // 238: ml_metadata.MetadataStoreService.GetExecutionTypesByExternalIds:output_type -> ml_metadata.GetExecutionTypesByExternalIdsResponse + 59, // 239: ml_metadata.MetadataStoreService.GetContextTypesByExternalIds:output_type -> ml_metadata.GetContextTypesByExternalIdsResponse + 89, // 240: ml_metadata.MetadataStoreService.GetContextsByArtifact:output_type -> ml_metadata.GetContextsByArtifactResponse + 91, // 241: ml_metadata.MetadataStoreService.GetContextsByExecution:output_type -> ml_metadata.GetContextsByExecutionResponse + 93, // 242: ml_metadata.MetadataStoreService.GetParentContextsByContext:output_type -> ml_metadata.GetParentContextsByContextResponse + 95, // 243: ml_metadata.MetadataStoreService.GetChildrenContextsByContext:output_type -> ml_metadata.GetChildrenContextsByContextResponse + 97, // 244: ml_metadata.MetadataStoreService.GetParentContextsByContexts:output_type -> ml_metadata.GetParentContextsByContextsResponse + 99, // 245: ml_metadata.MetadataStoreService.GetChildrenContextsByContexts:output_type -> ml_metadata.GetChildrenContextsByContextsResponse + 101, // 246: ml_metadata.MetadataStoreService.GetArtifactsByContext:output_type -> ml_metadata.GetArtifactsByContextResponse + 103, // 247: ml_metadata.MetadataStoreService.GetExecutionsByContext:output_type -> ml_metadata.GetExecutionsByContextResponse + 105, // 248: ml_metadata.MetadataStoreService.GetLineageGraph:output_type -> ml_metadata.GetLineageGraphResponse + 107, // 249: ml_metadata.MetadataStoreService.GetLineageSubgraph:output_type -> ml_metadata.GetLineageSubgraphResponse + 198, // [198:250] is the sub-list for method output_type + 146, // [146:198] is the sub-list for method input_type + 146, // [146:146] is the sub-list for extension type_name + 146, // [146:146] is the sub-list for extension extendee + 0, // [0:146] is the sub-list for field type_name } func init() { file_ml_metadata_proto_metadata_store_service_proto_init() } @@ -6630,8 +8812,104 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ArtifactStructMap); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ArtifactStructMap); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ArtifactStructList); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ArtifactStruct); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutArtifactsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutArtifactsResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutArtifactTypeRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutArtifactTypeResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionsResponse); i { case 0: return &v.state case 1: @@ -6642,8 +8920,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ArtifactStructList); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionTypeRequest); i { case 0: return &v.state case 1: @@ -6654,8 +8932,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ArtifactStruct); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionTypeResponse); i { case 0: return &v.state case 1: @@ -6666,8 +8944,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutArtifactsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutEventsRequest); i { case 0: return &v.state case 1: @@ -6678,8 +8956,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutArtifactsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutEventsResponse); i { case 0: return &v.state case 1: @@ -6690,8 +8968,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutArtifactTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionRequest); i { case 0: return &v.state case 1: @@ -6702,8 +8980,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutArtifactTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionResponse); i { case 0: return &v.state case 1: @@ -6714,8 +8992,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutLineageSubgraphRequest); i { case 0: return &v.state case 1: @@ -6726,8 +9004,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutLineageSubgraphResponse); i { case 0: return &v.state case 1: @@ -6738,8 +9016,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutTypesRequest); i { case 0: return &v.state case 1: @@ -6750,8 +9028,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutTypesResponse); i { case 0: return &v.state case 1: @@ -6762,8 +9040,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutEventsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutContextTypeRequest); i { case 0: return &v.state case 1: @@ -6774,8 +9052,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutEventsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutContextTypeResponse); i { case 0: return &v.state case 1: @@ -6786,8 +9064,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutContextsRequest); i { case 0: return &v.state case 1: @@ -6798,8 +9076,200 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutContextsResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutAttributionsAndAssociationsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutAttributionsAndAssociationsResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutParentContextsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutParentContextsResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByTypeRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByTypeResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactByTypeAndNameRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactByTypeAndNameResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByIDRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByIDResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByURIRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByURIResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsResponse); i { case 0: return &v.state case 1: @@ -6810,8 +9280,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutTypesRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypeRequest); i { case 0: return &v.state case 1: @@ -6822,8 +9292,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutTypesResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypeResponse); i { case 0: return &v.state case 1: @@ -6834,8 +9304,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutContextTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesRequest); i { case 0: return &v.state case 1: @@ -6846,8 +9316,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutContextTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesResponse); i { case 0: return &v.state case 1: @@ -6858,8 +9328,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutContextsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesRequest); i { case 0: return &v.state case 1: @@ -6870,8 +9340,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutContextsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesResponse); i { case 0: return &v.state case 1: @@ -6882,8 +9352,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutAttributionsAndAssociationsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesRequest); i { case 0: return &v.state case 1: @@ -6894,8 +9364,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutAttributionsAndAssociationsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesResponse); i { case 0: return &v.state case 1: @@ -6906,8 +9376,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutParentContextsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -6918,8 +9388,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutParentContextsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -6930,8 +9400,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -6942,8 +9412,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -6954,8 +9424,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[28].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactByTypeAndNameRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -6966,8 +9436,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[29].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactByTypeAndNameResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -6978,8 +9448,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[30].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -6990,8 +9460,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[31].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -7002,8 +9472,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[32].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -7014,8 +9484,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[33].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -7026,8 +9496,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[34].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByURIRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesByExternalIdsRequest); i { case 0: return &v.state case 1: @@ -7038,8 +9508,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[35].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByURIResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesByExternalIdsResponse); i { case 0: return &v.state case 1: @@ -7050,8 +9520,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[36].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByTypeRequest); i { case 0: return &v.state case 1: @@ -7062,8 +9532,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[37].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByTypeResponse); i { case 0: return &v.state case 1: @@ -7074,8 +9544,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[38].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionByTypeAndNameRequest); i { case 0: return &v.state case 1: @@ -7086,8 +9556,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[39].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionByTypeAndNameResponse); i { case 0: return &v.state case 1: @@ -7098,8 +9568,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[40].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypesRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByIDRequest); i { case 0: return &v.state case 1: @@ -7110,8 +9580,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[41].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypesResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByIDResponse); i { case 0: return &v.state case 1: @@ -7122,8 +9592,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[42].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypesRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypeRequest); i { case 0: return &v.state case 1: @@ -7134,8 +9604,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[43].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypesResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypeResponse); i { case 0: return &v.state case 1: @@ -7146,8 +9616,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[44].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypesRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetEventsByExecutionIDsRequest); i { case 0: return &v.state case 1: @@ -7158,8 +9628,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[45].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypesResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetEventsByExecutionIDsResponse); i { case 0: return &v.state case 1: @@ -7170,8 +9640,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[46].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetEventsByArtifactIDsRequest); i { case 0: return &v.state case 1: @@ -7182,8 +9652,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[47].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetEventsByArtifactIDsResponse); i { case 0: return &v.state case 1: @@ -7194,8 +9664,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[48].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionByTypeAndNameRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesByIDRequest); i { case 0: return &v.state case 1: @@ -7206,8 +9676,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[49].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionByTypeAndNameResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactTypesByIDResponse); i { case 0: return &v.state case 1: @@ -7218,8 +9688,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[50].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesByIDRequest); i { case 0: return &v.state case 1: @@ -7230,8 +9700,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[51].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionTypesByIDResponse); i { case 0: return &v.state case 1: @@ -7242,8 +9712,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[52].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypeRequest); i { case 0: return &v.state case 1: @@ -7254,8 +9724,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[53].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypeResponse); i { case 0: return &v.state case 1: @@ -7266,8 +9736,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[54].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetEventsByExecutionIDsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesByIDRequest); i { case 0: return &v.state case 1: @@ -7278,8 +9748,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[55].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetEventsByExecutionIDsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextTypesByIDResponse); i { case 0: return &v.state case 1: @@ -7290,8 +9760,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[56].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetEventsByArtifactIDsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsRequest); i { case 0: return &v.state case 1: @@ -7302,8 +9772,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[57].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetEventsByArtifactIDsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsResponse); i { case 0: return &v.state case 1: @@ -7314,8 +9784,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[58].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypesByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByTypeRequest); i { case 0: return &v.state case 1: @@ -7326,8 +9796,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[59].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactTypesByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByTypeResponse); i { case 0: return &v.state case 1: @@ -7338,8 +9808,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[60].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypesByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextByTypeAndNameRequest); i { case 0: return &v.state case 1: @@ -7350,8 +9820,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[61].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionTypesByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextByTypeAndNameResponse); i { case 0: return &v.state case 1: @@ -7362,8 +9832,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[62].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByIDRequest); i { case 0: return &v.state case 1: @@ -7374,8 +9844,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[63].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByIDResponse); i { case 0: return &v.state case 1: @@ -7386,8 +9856,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[64].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypesByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[88].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByArtifactRequest); i { case 0: return &v.state case 1: @@ -7398,8 +9868,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[65].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextTypesByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByArtifactResponse); i { case 0: return &v.state case 1: @@ -7410,8 +9880,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[66].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByExecutionRequest); i { case 0: return &v.state case 1: @@ -7422,8 +9892,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[67].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetContextsByExecutionResponse); i { case 0: return &v.state case 1: @@ -7434,8 +9904,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[68].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByTypeRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[92].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetParentContextsByContextRequest); i { case 0: return &v.state case 1: @@ -7446,8 +9916,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[69].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByTypeResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[93].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetParentContextsByContextResponse); i { case 0: return &v.state case 1: @@ -7458,8 +9928,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[70].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextByTypeAndNameRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[94].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetChildrenContextsByContextRequest); i { case 0: return &v.state case 1: @@ -7470,8 +9940,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[71].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextByTypeAndNameResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[95].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetChildrenContextsByContextResponse); i { case 0: return &v.state case 1: @@ -7482,8 +9952,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[72].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByIDRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[96].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetParentContextsByContextsRequest); i { case 0: return &v.state case 1: @@ -7494,8 +9964,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[73].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByIDResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[97].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetParentContextsByContextsResponse); i { case 0: return &v.state case 1: @@ -7506,8 +9976,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[74].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByArtifactRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[98].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetChildrenContextsByContextsRequest); i { case 0: return &v.state case 1: @@ -7518,8 +9988,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[75].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByArtifactResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[99].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetChildrenContextsByContextsResponse); i { case 0: return &v.state case 1: @@ -7530,8 +10000,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[76].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByExecutionRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[100].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByContextRequest); i { case 0: return &v.state case 1: @@ -7542,8 +10012,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[77].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetContextsByExecutionResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[101].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetArtifactsByContextResponse); i { case 0: return &v.state case 1: @@ -7554,8 +10024,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[78].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetParentContextsByContextRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[102].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByContextRequest); i { case 0: return &v.state case 1: @@ -7566,8 +10036,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[79].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetParentContextsByContextResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[103].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetExecutionsByContextResponse); i { case 0: return &v.state case 1: @@ -7578,8 +10048,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[80].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetChildrenContextsByContextRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[104].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetLineageGraphRequest); i { case 0: return &v.state case 1: @@ -7590,8 +10060,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[81].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetChildrenContextsByContextResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[105].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetLineageGraphResponse); i { case 0: return &v.state case 1: @@ -7602,8 +10072,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[82].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByContextRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[106].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetLineageSubgraphRequest); i { case 0: return &v.state case 1: @@ -7614,8 +10084,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[83].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetArtifactsByContextResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[107].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetLineageSubgraphResponse); i { case 0: return &v.state case 1: @@ -7626,8 +10096,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[84].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByContextRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[109].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutArtifactsRequest_Options); i { case 0: return &v.state case 1: @@ -7638,8 +10108,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[85].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetExecutionsByContextResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[110].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionRequest_ArtifactAndEvent); i { case 0: return &v.state case 1: @@ -7650,8 +10120,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[86].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetLineageGraphRequest); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[111].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutExecutionRequest_Options); i { case 0: return &v.state case 1: @@ -7662,8 +10132,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[87].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetLineageGraphResponse); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[112].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutLineageSubgraphRequest_EventEdge); i { case 0: return &v.state case 1: @@ -7674,8 +10144,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[89].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutArtifactsRequest_Options); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[113].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutLineageSubgraphRequest_Options); i { case 0: return &v.state case 1: @@ -7686,8 +10156,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[90].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionRequest_ArtifactAndEvent); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[114].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetParentContextsByContextsResponse_ParentContextsPerChild); i { case 0: return &v.state case 1: @@ -7698,8 +10168,8 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { return nil } } - file_ml_metadata_proto_metadata_store_service_proto_msgTypes[91].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PutExecutionRequest_Options); i { + file_ml_metadata_proto_metadata_store_service_proto_msgTypes[116].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetChildrenContextsByContextsResponse_ChildrenContextsPerParent); i { case 0: return &v.state case 1: @@ -7722,7 +10192,7 @@ func file_ml_metadata_proto_metadata_store_service_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_ml_metadata_proto_metadata_store_service_proto_rawDesc, NumEnums: 0, - NumMessages: 92, + NumMessages: 118, NumExtensions: 0, NumServices: 1, }, diff --git a/third_party/ml-metadata/go/ml_metadata/metadata_store_service_grpc.pb.go b/third_party/ml-metadata/go/ml_metadata/metadata_store_service_grpc.pb.go index a9426e0102..9417ff46a2 100644 --- a/third_party/ml-metadata/go/ml_metadata/metadata_store_service_grpc.pb.go +++ b/third_party/ml-metadata/go/ml_metadata/metadata_store_service_grpc.pb.go @@ -83,6 +83,10 @@ type MetadataStoreServiceClient interface { // For new artifacts, type must be specified. // For old artifacts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated artifacts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // artifacts: A list of artifacts to insert or update. // @@ -96,6 +100,10 @@ type MetadataStoreServiceClient interface { // For new executions, type must be specified. // For old executions, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated executions will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // executions: A list of executions to insert or update. // @@ -107,6 +115,10 @@ type MetadataStoreServiceClient interface { // // The execution_id and artifact_id must already exist. // Once created, events cannot be modified. + // AlreadyExists error will be raised if duplicated events are found. + // + // It is not guaranteed that the created or updated events will share the + // same `milliseconds_since_epoch` timestamps. // // Args: // events: A list of events to insert or update. @@ -117,9 +129,16 @@ type MetadataStoreServiceClient interface { // input/output Event. The `contexts` describe the associations of the // execution and the attributions of the artifacts. // - // If an execution_id, artifact_id or context_id is specified, it is an - // update, otherwise it does an insertion. For insertion, type must be - // specified. + // If an execution_id is specified, it is an update on the corresponding + // execution, otherwise it does an insertion. + // For insertion, type must be specified. Same rule applies to artifacts + // and contexts in the request. Corresponding errors may raised. For example: + // AlreadyExists error will be raised if duplicated executions, artifacts + // or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. // // Args: // execution: An execution to insert or update. @@ -130,6 +149,36 @@ type MetadataStoreServiceClient interface { // An execution id and a list of artifacts and contexts ids index-aligned // with the input. PutExecution(ctx context.Context, in *PutExecutionRequest, opts ...grpc.CallOption) (*PutExecutionResponse, error) + // Inserts or updates a lineage subgraph (i.e. a collection of event edges + // and its executions, artifacts, and related contexts) atomically. The + // `event_edges` include an Event and the indices of the corresponding + // execution and artifact from the input list of executions and artifacts. The + // `contexts` describe the associations of the Execution and the attributions + // of the Artifact. + // + // If an execution_id is specified, it is an update on the corresponding + // Execution, otherwise it does an insertion. For insertion, type must be + // specified. These rules apply to Artifacts and Contexts as well. + // Corresponding errors may be raised. For example: AlreadyExists error will + // be raised if duplicated executions, artifacts, or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. + // + // Args: + // executions: A list of executions to insert or update. + // artifacts: A list of artifacts to insert or update. + // contexts: A list of contexts to insert and/or create associations and + // attributions with. + // event_edges: A list of events to insert with the indices of the + // corresponding execution and artifact from the input lists of + // executions and artifacts. + // + // Returns: + // Lists of execution, artifact, and context ids index-aligned with the + // inputs. + PutLineageSubgraph(ctx context.Context, in *PutLineageSubgraphRequest, opts ...grpc.CallOption) (*PutLineageSubgraphResponse, error) // Inserts or updates contexts in database and returns a list of context ids. // // If an context_id is specified for a context, it is an update. @@ -137,6 +186,10 @@ type MetadataStoreServiceClient interface { // For new contexts, type must be specified. // For old contexts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated contexts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // contexts: A list of contexts to insert or update. // @@ -229,6 +282,18 @@ type MetadataStoreServiceClient interface { GetEventsByExecutionIDs(ctx context.Context, in *GetEventsByExecutionIDsRequest, opts ...grpc.CallOption) (*GetEventsByExecutionIDsResponse, error) // Gets all events with matching artifact ids. GetEventsByArtifactIDs(ctx context.Context, in *GetEventsByArtifactIDsRequest, opts ...grpc.CallOption) (*GetEventsByArtifactIDsResponse, error) + // Gets all the artifacts with matching external ids. + GetArtifactsByExternalIds(ctx context.Context, in *GetArtifactsByExternalIdsRequest, opts ...grpc.CallOption) (*GetArtifactsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetExecutionsByExternalIds(ctx context.Context, in *GetExecutionsByExternalIdsRequest, opts ...grpc.CallOption) (*GetExecutionsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetContextsByExternalIds(ctx context.Context, in *GetContextsByExternalIdsRequest, opts ...grpc.CallOption) (*GetContextsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetArtifactTypesByExternalIds(ctx context.Context, in *GetArtifactTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetArtifactTypesByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetExecutionTypesByExternalIds(ctx context.Context, in *GetExecutionTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetExecutionTypesByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetContextTypesByExternalIds(ctx context.Context, in *GetContextTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetContextTypesByExternalIdsResponse, error) // Gets all context that an artifact is attributed to. GetContextsByArtifact(ctx context.Context, in *GetContextsByArtifactRequest, opts ...grpc.CallOption) (*GetContextsByArtifactResponse, error) // Gets all context that an execution is associated with. @@ -237,14 +302,26 @@ type MetadataStoreServiceClient interface { GetParentContextsByContext(ctx context.Context, in *GetParentContextsByContextRequest, opts ...grpc.CallOption) (*GetParentContextsByContextResponse, error) // Gets all children contexts that a context is related. GetChildrenContextsByContext(ctx context.Context, in *GetChildrenContextsByContextRequest, opts ...grpc.CallOption) (*GetChildrenContextsByContextResponse, error) + // Batch getting all the parent contexts that a list of contexts are related. + GetParentContextsByContexts(ctx context.Context, in *GetParentContextsByContextsRequest, opts ...grpc.CallOption) (*GetParentContextsByContextsResponse, error) + // Batch getting all the children contexts that a list of contexts are + // related. + GetChildrenContextsByContexts(ctx context.Context, in *GetChildrenContextsByContextsRequest, opts ...grpc.CallOption) (*GetChildrenContextsByContextsResponse, error) // Gets all direct artifacts that a context attributes to. GetArtifactsByContext(ctx context.Context, in *GetArtifactsByContextRequest, opts ...grpc.CallOption) (*GetArtifactsByContextResponse, error) // Gets all direct executions that a context associates with. GetExecutionsByContext(ctx context.Context, in *GetExecutionsByContextRequest, opts ...grpc.CallOption) (*GetExecutionsByContextResponse, error) + // TODO(b/283852485): Deprecate GetLineageGraph API after migration to + // GetLineageSubgraph API. // The transaction performs a constrained transitive closure and returns a // lineage subgraph satisfying the conditions and constraints specified in // the GetLineageGraphRequest. GetLineageGraph(ctx context.Context, in *GetLineageGraphRequest, opts ...grpc.CallOption) (*GetLineageGraphResponse, error) + // Gets a lineage subgraph by performing graph traversal from a list of + // interested nodes. + // A lineage subgraph without node details (e.g., external_id, properties) + // will be returned. + GetLineageSubgraph(ctx context.Context, in *GetLineageSubgraphRequest, opts ...grpc.CallOption) (*GetLineageSubgraphResponse, error) } type metadataStoreServiceClient struct { @@ -327,6 +404,15 @@ func (c *metadataStoreServiceClient) PutExecution(ctx context.Context, in *PutEx return out, nil } +func (c *metadataStoreServiceClient) PutLineageSubgraph(ctx context.Context, in *PutLineageSubgraphRequest, opts ...grpc.CallOption) (*PutLineageSubgraphResponse, error) { + out := new(PutLineageSubgraphResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/PutLineageSubgraph", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *metadataStoreServiceClient) PutContexts(ctx context.Context, in *PutContextsRequest, opts ...grpc.CallOption) (*PutContextsResponse, error) { out := new(PutContextsResponse) err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/PutContexts", in, out, opts...) @@ -570,6 +656,60 @@ func (c *metadataStoreServiceClient) GetEventsByArtifactIDs(ctx context.Context, return out, nil } +func (c *metadataStoreServiceClient) GetArtifactsByExternalIds(ctx context.Context, in *GetArtifactsByExternalIdsRequest, opts ...grpc.CallOption) (*GetArtifactsByExternalIdsResponse, error) { + out := new(GetArtifactsByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetArtifactsByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetExecutionsByExternalIds(ctx context.Context, in *GetExecutionsByExternalIdsRequest, opts ...grpc.CallOption) (*GetExecutionsByExternalIdsResponse, error) { + out := new(GetExecutionsByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetExecutionsByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetContextsByExternalIds(ctx context.Context, in *GetContextsByExternalIdsRequest, opts ...grpc.CallOption) (*GetContextsByExternalIdsResponse, error) { + out := new(GetContextsByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetContextsByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetArtifactTypesByExternalIds(ctx context.Context, in *GetArtifactTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetArtifactTypesByExternalIdsResponse, error) { + out := new(GetArtifactTypesByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetArtifactTypesByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetExecutionTypesByExternalIds(ctx context.Context, in *GetExecutionTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetExecutionTypesByExternalIdsResponse, error) { + out := new(GetExecutionTypesByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetExecutionTypesByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetContextTypesByExternalIds(ctx context.Context, in *GetContextTypesByExternalIdsRequest, opts ...grpc.CallOption) (*GetContextTypesByExternalIdsResponse, error) { + out := new(GetContextTypesByExternalIdsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetContextTypesByExternalIds", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *metadataStoreServiceClient) GetContextsByArtifact(ctx context.Context, in *GetContextsByArtifactRequest, opts ...grpc.CallOption) (*GetContextsByArtifactResponse, error) { out := new(GetContextsByArtifactResponse) err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetContextsByArtifact", in, out, opts...) @@ -606,6 +746,24 @@ func (c *metadataStoreServiceClient) GetChildrenContextsByContext(ctx context.Co return out, nil } +func (c *metadataStoreServiceClient) GetParentContextsByContexts(ctx context.Context, in *GetParentContextsByContextsRequest, opts ...grpc.CallOption) (*GetParentContextsByContextsResponse, error) { + out := new(GetParentContextsByContextsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetParentContextsByContexts", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *metadataStoreServiceClient) GetChildrenContextsByContexts(ctx context.Context, in *GetChildrenContextsByContextsRequest, opts ...grpc.CallOption) (*GetChildrenContextsByContextsResponse, error) { + out := new(GetChildrenContextsByContextsResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetChildrenContextsByContexts", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + func (c *metadataStoreServiceClient) GetArtifactsByContext(ctx context.Context, in *GetArtifactsByContextRequest, opts ...grpc.CallOption) (*GetArtifactsByContextResponse, error) { out := new(GetArtifactsByContextResponse) err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetArtifactsByContext", in, out, opts...) @@ -633,6 +791,15 @@ func (c *metadataStoreServiceClient) GetLineageGraph(ctx context.Context, in *Ge return out, nil } +func (c *metadataStoreServiceClient) GetLineageSubgraph(ctx context.Context, in *GetLineageSubgraphRequest, opts ...grpc.CallOption) (*GetLineageSubgraphResponse, error) { + out := new(GetLineageSubgraphResponse) + err := c.cc.Invoke(ctx, "/ml_metadata.MetadataStoreService/GetLineageSubgraph", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // MetadataStoreServiceServer is the server API for MetadataStoreService service. // All implementations must embed UnimplementedMetadataStoreServiceServer // for forward compatibility @@ -702,6 +869,10 @@ type MetadataStoreServiceServer interface { // For new artifacts, type must be specified. // For old artifacts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated artifacts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // artifacts: A list of artifacts to insert or update. // @@ -715,6 +886,10 @@ type MetadataStoreServiceServer interface { // For new executions, type must be specified. // For old executions, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated executions will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // executions: A list of executions to insert or update. // @@ -726,6 +901,10 @@ type MetadataStoreServiceServer interface { // // The execution_id and artifact_id must already exist. // Once created, events cannot be modified. + // AlreadyExists error will be raised if duplicated events are found. + // + // It is not guaranteed that the created or updated events will share the + // same `milliseconds_since_epoch` timestamps. // // Args: // events: A list of events to insert or update. @@ -736,9 +915,16 @@ type MetadataStoreServiceServer interface { // input/output Event. The `contexts` describe the associations of the // execution and the attributions of the artifacts. // - // If an execution_id, artifact_id or context_id is specified, it is an - // update, otherwise it does an insertion. For insertion, type must be - // specified. + // If an execution_id is specified, it is an update on the corresponding + // execution, otherwise it does an insertion. + // For insertion, type must be specified. Same rule applies to artifacts + // and contexts in the request. Corresponding errors may raised. For example: + // AlreadyExists error will be raised if duplicated executions, artifacts + // or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. // // Args: // execution: An execution to insert or update. @@ -749,6 +935,36 @@ type MetadataStoreServiceServer interface { // An execution id and a list of artifacts and contexts ids index-aligned // with the input. PutExecution(context.Context, *PutExecutionRequest) (*PutExecutionResponse, error) + // Inserts or updates a lineage subgraph (i.e. a collection of event edges + // and its executions, artifacts, and related contexts) atomically. The + // `event_edges` include an Event and the indices of the corresponding + // execution and artifact from the input list of executions and artifacts. The + // `contexts` describe the associations of the Execution and the attributions + // of the Artifact. + // + // If an execution_id is specified, it is an update on the corresponding + // Execution, otherwise it does an insertion. For insertion, type must be + // specified. These rules apply to Artifacts and Contexts as well. + // Corresponding errors may be raised. For example: AlreadyExists error will + // be raised if duplicated executions, artifacts, or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. + // + // Args: + // executions: A list of executions to insert or update. + // artifacts: A list of artifacts to insert or update. + // contexts: A list of contexts to insert and/or create associations and + // attributions with. + // event_edges: A list of events to insert with the indices of the + // corresponding execution and artifact from the input lists of + // executions and artifacts. + // + // Returns: + // Lists of execution, artifact, and context ids index-aligned with the + // inputs. + PutLineageSubgraph(context.Context, *PutLineageSubgraphRequest) (*PutLineageSubgraphResponse, error) // Inserts or updates contexts in database and returns a list of context ids. // // If an context_id is specified for a context, it is an update. @@ -756,6 +972,10 @@ type MetadataStoreServiceServer interface { // For new contexts, type must be specified. // For old contexts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated contexts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // contexts: A list of contexts to insert or update. // @@ -848,6 +1068,18 @@ type MetadataStoreServiceServer interface { GetEventsByExecutionIDs(context.Context, *GetEventsByExecutionIDsRequest) (*GetEventsByExecutionIDsResponse, error) // Gets all events with matching artifact ids. GetEventsByArtifactIDs(context.Context, *GetEventsByArtifactIDsRequest) (*GetEventsByArtifactIDsResponse, error) + // Gets all the artifacts with matching external ids. + GetArtifactsByExternalIds(context.Context, *GetArtifactsByExternalIdsRequest) (*GetArtifactsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetExecutionsByExternalIds(context.Context, *GetExecutionsByExternalIdsRequest) (*GetExecutionsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetContextsByExternalIds(context.Context, *GetContextsByExternalIdsRequest) (*GetContextsByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetArtifactTypesByExternalIds(context.Context, *GetArtifactTypesByExternalIdsRequest) (*GetArtifactTypesByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetExecutionTypesByExternalIds(context.Context, *GetExecutionTypesByExternalIdsRequest) (*GetExecutionTypesByExternalIdsResponse, error) + // Gets all the artifacts with matching external ids. + GetContextTypesByExternalIds(context.Context, *GetContextTypesByExternalIdsRequest) (*GetContextTypesByExternalIdsResponse, error) // Gets all context that an artifact is attributed to. GetContextsByArtifact(context.Context, *GetContextsByArtifactRequest) (*GetContextsByArtifactResponse, error) // Gets all context that an execution is associated with. @@ -856,14 +1088,26 @@ type MetadataStoreServiceServer interface { GetParentContextsByContext(context.Context, *GetParentContextsByContextRequest) (*GetParentContextsByContextResponse, error) // Gets all children contexts that a context is related. GetChildrenContextsByContext(context.Context, *GetChildrenContextsByContextRequest) (*GetChildrenContextsByContextResponse, error) + // Batch getting all the parent contexts that a list of contexts are related. + GetParentContextsByContexts(context.Context, *GetParentContextsByContextsRequest) (*GetParentContextsByContextsResponse, error) + // Batch getting all the children contexts that a list of contexts are + // related. + GetChildrenContextsByContexts(context.Context, *GetChildrenContextsByContextsRequest) (*GetChildrenContextsByContextsResponse, error) // Gets all direct artifacts that a context attributes to. GetArtifactsByContext(context.Context, *GetArtifactsByContextRequest) (*GetArtifactsByContextResponse, error) // Gets all direct executions that a context associates with. GetExecutionsByContext(context.Context, *GetExecutionsByContextRequest) (*GetExecutionsByContextResponse, error) + // TODO(b/283852485): Deprecate GetLineageGraph API after migration to + // GetLineageSubgraph API. // The transaction performs a constrained transitive closure and returns a // lineage subgraph satisfying the conditions and constraints specified in // the GetLineageGraphRequest. GetLineageGraph(context.Context, *GetLineageGraphRequest) (*GetLineageGraphResponse, error) + // Gets a lineage subgraph by performing graph traversal from a list of + // interested nodes. + // A lineage subgraph without node details (e.g., external_id, properties) + // will be returned. + GetLineageSubgraph(context.Context, *GetLineageSubgraphRequest) (*GetLineageSubgraphResponse, error) mustEmbedUnimplementedMetadataStoreServiceServer() } @@ -895,6 +1139,9 @@ func (UnimplementedMetadataStoreServiceServer) PutEvents(context.Context, *PutEv func (UnimplementedMetadataStoreServiceServer) PutExecution(context.Context, *PutExecutionRequest) (*PutExecutionResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method PutExecution not implemented") } +func (UnimplementedMetadataStoreServiceServer) PutLineageSubgraph(context.Context, *PutLineageSubgraphRequest) (*PutLineageSubgraphResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method PutLineageSubgraph not implemented") +} func (UnimplementedMetadataStoreServiceServer) PutContexts(context.Context, *PutContextsRequest) (*PutContextsResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method PutContexts not implemented") } @@ -976,6 +1223,24 @@ func (UnimplementedMetadataStoreServiceServer) GetEventsByExecutionIDs(context.C func (UnimplementedMetadataStoreServiceServer) GetEventsByArtifactIDs(context.Context, *GetEventsByArtifactIDsRequest) (*GetEventsByArtifactIDsResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetEventsByArtifactIDs not implemented") } +func (UnimplementedMetadataStoreServiceServer) GetArtifactsByExternalIds(context.Context, *GetArtifactsByExternalIdsRequest) (*GetArtifactsByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetArtifactsByExternalIds not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetExecutionsByExternalIds(context.Context, *GetExecutionsByExternalIdsRequest) (*GetExecutionsByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetExecutionsByExternalIds not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetContextsByExternalIds(context.Context, *GetContextsByExternalIdsRequest) (*GetContextsByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetContextsByExternalIds not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetArtifactTypesByExternalIds(context.Context, *GetArtifactTypesByExternalIdsRequest) (*GetArtifactTypesByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetArtifactTypesByExternalIds not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetExecutionTypesByExternalIds(context.Context, *GetExecutionTypesByExternalIdsRequest) (*GetExecutionTypesByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetExecutionTypesByExternalIds not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetContextTypesByExternalIds(context.Context, *GetContextTypesByExternalIdsRequest) (*GetContextTypesByExternalIdsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetContextTypesByExternalIds not implemented") +} func (UnimplementedMetadataStoreServiceServer) GetContextsByArtifact(context.Context, *GetContextsByArtifactRequest) (*GetContextsByArtifactResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetContextsByArtifact not implemented") } @@ -988,6 +1253,12 @@ func (UnimplementedMetadataStoreServiceServer) GetParentContextsByContext(contex func (UnimplementedMetadataStoreServiceServer) GetChildrenContextsByContext(context.Context, *GetChildrenContextsByContextRequest) (*GetChildrenContextsByContextResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetChildrenContextsByContext not implemented") } +func (UnimplementedMetadataStoreServiceServer) GetParentContextsByContexts(context.Context, *GetParentContextsByContextsRequest) (*GetParentContextsByContextsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetParentContextsByContexts not implemented") +} +func (UnimplementedMetadataStoreServiceServer) GetChildrenContextsByContexts(context.Context, *GetChildrenContextsByContextsRequest) (*GetChildrenContextsByContextsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetChildrenContextsByContexts not implemented") +} func (UnimplementedMetadataStoreServiceServer) GetArtifactsByContext(context.Context, *GetArtifactsByContextRequest) (*GetArtifactsByContextResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetArtifactsByContext not implemented") } @@ -997,6 +1268,9 @@ func (UnimplementedMetadataStoreServiceServer) GetExecutionsByContext(context.Co func (UnimplementedMetadataStoreServiceServer) GetLineageGraph(context.Context, *GetLineageGraphRequest) (*GetLineageGraphResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetLineageGraph not implemented") } +func (UnimplementedMetadataStoreServiceServer) GetLineageSubgraph(context.Context, *GetLineageSubgraphRequest) (*GetLineageSubgraphResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetLineageSubgraph not implemented") +} func (UnimplementedMetadataStoreServiceServer) mustEmbedUnimplementedMetadataStoreServiceServer() {} // UnsafeMetadataStoreServiceServer may be embedded to opt out of forward compatibility for this service. @@ -1154,6 +1428,24 @@ func _MetadataStoreService_PutExecution_Handler(srv interface{}, ctx context.Con return interceptor(ctx, in, info, handler) } +func _MetadataStoreService_PutLineageSubgraph_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(PutLineageSubgraphRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).PutLineageSubgraph(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/PutLineageSubgraph", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).PutLineageSubgraph(ctx, req.(*PutLineageSubgraphRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _MetadataStoreService_PutContexts_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(PutContextsRequest) if err := dec(in); err != nil { @@ -1640,6 +1932,114 @@ func _MetadataStoreService_GetEventsByArtifactIDs_Handler(srv interface{}, ctx c return interceptor(ctx, in, info, handler) } +func _MetadataStoreService_GetArtifactsByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetArtifactsByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetArtifactsByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetArtifactsByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetArtifactsByExternalIds(ctx, req.(*GetArtifactsByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetExecutionsByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetExecutionsByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetExecutionsByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetExecutionsByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetExecutionsByExternalIds(ctx, req.(*GetExecutionsByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetContextsByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetContextsByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetContextsByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetContextsByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetContextsByExternalIds(ctx, req.(*GetContextsByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetArtifactTypesByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetArtifactTypesByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetArtifactTypesByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetArtifactTypesByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetArtifactTypesByExternalIds(ctx, req.(*GetArtifactTypesByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetExecutionTypesByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetExecutionTypesByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetExecutionTypesByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetExecutionTypesByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetExecutionTypesByExternalIds(ctx, req.(*GetExecutionTypesByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetContextTypesByExternalIds_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetContextTypesByExternalIdsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetContextTypesByExternalIds(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetContextTypesByExternalIds", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetContextTypesByExternalIds(ctx, req.(*GetContextTypesByExternalIdsRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _MetadataStoreService_GetContextsByArtifact_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(GetContextsByArtifactRequest) if err := dec(in); err != nil { @@ -1712,6 +2112,42 @@ func _MetadataStoreService_GetChildrenContextsByContext_Handler(srv interface{}, return interceptor(ctx, in, info, handler) } +func _MetadataStoreService_GetParentContextsByContexts_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetParentContextsByContextsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetParentContextsByContexts(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetParentContextsByContexts", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetParentContextsByContexts(ctx, req.(*GetParentContextsByContextsRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _MetadataStoreService_GetChildrenContextsByContexts_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetChildrenContextsByContextsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetChildrenContextsByContexts(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetChildrenContextsByContexts", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetChildrenContextsByContexts(ctx, req.(*GetChildrenContextsByContextsRequest)) + } + return interceptor(ctx, in, info, handler) +} + func _MetadataStoreService_GetArtifactsByContext_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(GetArtifactsByContextRequest) if err := dec(in); err != nil { @@ -1766,6 +2202,24 @@ func _MetadataStoreService_GetLineageGraph_Handler(srv interface{}, ctx context. return interceptor(ctx, in, info, handler) } +func _MetadataStoreService_GetLineageSubgraph_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetLineageSubgraphRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(MetadataStoreServiceServer).GetLineageSubgraph(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/ml_metadata.MetadataStoreService/GetLineageSubgraph", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(MetadataStoreServiceServer).GetLineageSubgraph(ctx, req.(*GetLineageSubgraphRequest)) + } + return interceptor(ctx, in, info, handler) +} + // MetadataStoreService_ServiceDesc is the grpc.ServiceDesc for MetadataStoreService service. // It's only intended for direct use with grpc.RegisterService, // and not to be introspected or modified (even as a copy) @@ -1805,6 +2259,10 @@ var MetadataStoreService_ServiceDesc = grpc.ServiceDesc{ MethodName: "PutExecution", Handler: _MetadataStoreService_PutExecution_Handler, }, + { + MethodName: "PutLineageSubgraph", + Handler: _MetadataStoreService_PutLineageSubgraph_Handler, + }, { MethodName: "PutContexts", Handler: _MetadataStoreService_PutContexts_Handler, @@ -1913,6 +2371,30 @@ var MetadataStoreService_ServiceDesc = grpc.ServiceDesc{ MethodName: "GetEventsByArtifactIDs", Handler: _MetadataStoreService_GetEventsByArtifactIDs_Handler, }, + { + MethodName: "GetArtifactsByExternalIds", + Handler: _MetadataStoreService_GetArtifactsByExternalIds_Handler, + }, + { + MethodName: "GetExecutionsByExternalIds", + Handler: _MetadataStoreService_GetExecutionsByExternalIds_Handler, + }, + { + MethodName: "GetContextsByExternalIds", + Handler: _MetadataStoreService_GetContextsByExternalIds_Handler, + }, + { + MethodName: "GetArtifactTypesByExternalIds", + Handler: _MetadataStoreService_GetArtifactTypesByExternalIds_Handler, + }, + { + MethodName: "GetExecutionTypesByExternalIds", + Handler: _MetadataStoreService_GetExecutionTypesByExternalIds_Handler, + }, + { + MethodName: "GetContextTypesByExternalIds", + Handler: _MetadataStoreService_GetContextTypesByExternalIds_Handler, + }, { MethodName: "GetContextsByArtifact", Handler: _MetadataStoreService_GetContextsByArtifact_Handler, @@ -1929,6 +2411,14 @@ var MetadataStoreService_ServiceDesc = grpc.ServiceDesc{ MethodName: "GetChildrenContextsByContext", Handler: _MetadataStoreService_GetChildrenContextsByContext_Handler, }, + { + MethodName: "GetParentContextsByContexts", + Handler: _MetadataStoreService_GetParentContextsByContexts_Handler, + }, + { + MethodName: "GetChildrenContextsByContexts", + Handler: _MetadataStoreService_GetChildrenContextsByContexts_Handler, + }, { MethodName: "GetArtifactsByContext", Handler: _MetadataStoreService_GetArtifactsByContext_Handler, @@ -1941,6 +2431,10 @@ var MetadataStoreService_ServiceDesc = grpc.ServiceDesc{ MethodName: "GetLineageGraph", Handler: _MetadataStoreService_GetLineageGraph_Handler, }, + { + MethodName: "GetLineageSubgraph", + Handler: _MetadataStoreService_GetLineageSubgraph_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "ml_metadata/proto/metadata_store_service.proto", diff --git a/third_party/ml-metadata/ml_metadata/proto/metadata_store.proto b/third_party/ml-metadata/ml_metadata/proto/metadata_store.proto index b015dd22b1..3bdeae8791 100644 --- a/third_party/ml-metadata/ml_metadata/proto/metadata_store.proto +++ b/third_party/ml-metadata/ml_metadata/proto/metadata_store.proto @@ -16,6 +16,7 @@ syntax = "proto2"; package ml_metadata; +import "google/protobuf/any.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/descriptor.proto"; @@ -36,11 +37,13 @@ message Value { double double_value = 2; string string_value = 3; google.protobuf.Struct struct_value = 4; + google.protobuf.Any proto_value = 5; + bool bool_value = 6; } } message Artifact { - // Output only. The globally unique server generated id of the artifact. + // Output only. The unique server generated id of the artifact. optional int64 id = 1; // The client provided name of the artifact. This field is optional. If set, // it must be unique among all the artifacts of the same artifact type within @@ -54,6 +57,9 @@ message Artifact { // The uniform resource identifier of the physical artifact. // May be empty if there is no physical artifact. optional string uri = 3; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all artifacts within a database instance. + optional string external_id = 11; // Properties of the artifact. // Properties must be specified in the ArtifactType. map properties = 4; @@ -71,6 +77,17 @@ message Artifact { MARKED_FOR_DELETION = 3; // A state indicating that the artifact has been deleted. DELETED = 4; + // A state indicating that the artifact has been abandoned, which may be + // due to a failed or cancelled execution. + ABANDONED = 5; + // A state indicating that the artifact is a reference artifact. At + // execution start time, the orchestrator produces an output artifact for + // each output key with state PENDING. However, for an intermediate + // artifact, this first artifact's state will be REFERENCE. Intermediate + // artifacts emitted during a component's execution will copy the REFERENCE + // artifact's attributes. At the end of an execution, the artifact state + // should remain REFERENCE instead of being changed to LIVE. + REFERENCE = 6; } // The state of the artifact known to the system. @@ -80,6 +97,9 @@ message Artifact { // Output only. Last update time of the artifact since epoch in millisecond // since epoch. optional int64 last_update_time_since_epoch = 10; + + // Output only. + optional google.protobuf.Any system_metadata = 12; } // The list of supported property value types. @@ -88,7 +108,11 @@ enum PropertyType { INT = 1; DOUBLE = 2; STRING = 3; + // Prefer to use `PROTO` to store structed data since this option has + // inefficient database storage usage. STRUCT = 4; + PROTO = 5; + BOOLEAN = 6; } message ArtifactType { @@ -101,6 +125,10 @@ message ArtifactType { optional string version = 4; // An optional description about the type. optional string description = 5; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all artifact types within a database + // instance. + optional string external_id = 7; // The schema of the type. // Properties are always optional in the artifact. // Properties of an artifact type can be expanded but not contracted (i.e., @@ -156,40 +184,57 @@ message ArtifactType { // type:DECLARED_OUTPUT, // path:{step:[{"key":"my_result"}]} // } -// Other event types include INPUT/OUTPUT and INTERNAL_INPUT/_OUTPUT. +// +// Other event types include INPUT/OUTPUT, INTERNAL_INPUT/_OUTPUT and +// PENDING_OUTPUT: +// // * The INPUT/OUTPUT is an event that actually reads/writes an artifact by an // execution. The input/output artifacts may not declared in the signature, // For example, the trainer may output multiple caches of the parameters // (as an OUTPUT), then finally write the SavedModel as a DECLARED_OUTPUT. +// // * The INTERNAL_INPUT/_OUTPUT are event types which are only meaningful to // an orchestration system to keep track of the details for later debugging. // For example, a fork happened conditioning on an artifact, then an execution -// is triggered, such fork implementating may need to log the read and write +// is triggered, such fork implementing may need to log the read and write // of artifacts and may not be worth displaying to the users. // -// For instance, in the above example, +// For instance, in the above example, // -// my_result = my_execution({"data":[3,7],"schema":8}) +// my_result = my_execution({"data":[3,7],"schema":8}) // -// there is another execution (id: 15), which represents a `garbage_collection` -// step in an orchestration system +// there is another execution (id: 15), which represents a +// `garbage_collection` step in an orchestration system // -// gc_result = garbage_collection(my_result) +// gc_result = garbage_collection(my_result) // -// that cleans `my_result` if needed. The details should be invisible to the -// end users and lineage tracking. The orchestrator can emit following events: +// that cleans `my_result` if needed. The details should be invisible to the +// end users and lineage tracking. The orchestrator can emit following events: // -// { -// artifact_id: 15, -// execution_id: 15, -// type:INTERNAL_INPUT, -// } -// { -// artifact_id:16, // New artifact containing the GC job result. -// execution_id: 15, -// type:INTERNAL_OUTPUT, -// path:{step:[{"key":"gc_result"}]} -// } +// { +// artifact_id: 15, +// execution_id: 15, +// type:INTERNAL_INPUT, +// } +// { +// artifact_id:16, // New artifact containing the GC job result. +// execution_id: 15, +// type:INTERNAL_OUTPUT, +// path:{step:[{"key":"gc_result"}]} +// } +// +// * The PENDING_OUTPUT event is used to indicate that an artifact is +// tentatively associated with an active execution which has not yet been +// finalized. For example, an orchestration system can register output +// artifacts of a running execution with PENDING_OUTPUT events to indicate +// the output artifacts the execution is expected to produce. When the +// execution is finished, the final set of output artifacts can be associated +// with the exeution using OUTPUT events, and any unused artifacts which were +// previously registered with PENDING_OUTPUT events can be updated to set +// their Artifact.State to ABANDONED. +// +// Events are unique of the same +// (artifact_id, execution_id, type) combination within a metadata store. message Event { // A simple path (e.g. {step{key:"foo"}}) can name an artifact in the context // of an execution. @@ -216,6 +261,7 @@ message Event { OUTPUT = 4; // An output of the execution. INTERNAL_INPUT = 5; // An internal input of the execution. INTERNAL_OUTPUT = 6; // An internal output of the execution. + PENDING_OUTPUT = 7; // A pending output of the execution. } // The artifact id is required for an event, and should refer to an // existing artifact. @@ -230,10 +276,13 @@ message Event { // Time the event occurred // Epoch is Jan 1, 1970, UTC optional int64 milliseconds_since_epoch = 5; + + // Output only. + optional google.protobuf.Any system_metadata = 6; } message Execution { - // Output only. The globally unique server generated id of the execution. + // Output only. The unique server generated id of the execution. optional int64 id = 1; // The client provided name of the execution. This field is optional. If set, // it must be unique among all the executions of the same execution type @@ -245,6 +294,9 @@ message Execution { optional int64 type_id = 2; // Output only. The name of an ExecutionType. optional string type = 7; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all executions within a database instance. + optional string external_id = 10; // The state of the Execution. The state transitions are // NEW -> RUNNING -> COMPLETE | CACHED | FAILED | CANCELED // CACHED means the execution is skipped due to cached results. @@ -272,6 +324,9 @@ message Execution { optional int64 create_time_since_epoch = 8; // Output only. Last update time of the execution in millisecond since epoch. optional int64 last_update_time_since_epoch = 9; + + // Output only. + optional google.protobuf.Any system_metadata = 11; } message ExecutionType { @@ -284,6 +339,10 @@ message ExecutionType { optional string version = 6; // An optional description about the type. optional string description = 7; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all execution types within a database + // instance. + optional string external_id = 9; // The schema of the type. // Properties are always optional in the execution. map properties = 3; @@ -335,6 +394,10 @@ message ContextType { optional string version = 4; // An optional description about the type. optional string description = 5; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all context types within a database + // instance. + optional string external_id = 7; // The schema of the type, e.g., name: string, owner: string // Properties are always optional in the context. // Properties of an context type can be expanded but not contracted (i.e., @@ -354,7 +417,7 @@ message ContextType { } message Context { - // Output Only. The globally unique server generated id of the context. + // Output Only. The unique server generated id of the context. optional int64 id = 1; // The client provided name of the context. It must be unique within a // database instance. @@ -364,6 +427,9 @@ message Context { optional int64 type_id = 2; // Output only. The name of a ContextType. optional string type = 6; + // The external id that come from the clients’ system. This field is optional. + // If set, it must be unique among all contexts within a virtual database. + optional string external_id = 9; // Values of the properties, which must be specified in the ContextType. map properties = 4; // User provided custom properties which are not defined by its type. @@ -372,6 +438,9 @@ message Context { optional int64 create_time_since_epoch = 7; // Output only. Last update time of the context in millisecond since epoch. optional int64 last_update_time_since_epoch = 8; + + // Output only system metadata. + optional google.protobuf.Any system_metadata = 10; } // the Attribution edges between Context and Artifact instances. @@ -534,7 +603,7 @@ message MySQLDatabaseConfig { optional string host = 1; // The TCP Port number that the MYSQL server accepts connections on. // If unspecified, the default MYSQL port (3306) is used. - optional uint32 port = 2; + optional int64 port = 2; // The database to connect to. Must be specified. // After connecting to the MYSQL server, this database is created if not // already present unless skip_db_creation is set. @@ -609,6 +678,71 @@ message SqliteMetadataSourceConfig { optional ConnectionMode connection_mode = 2; } +// A config contains the parameters when using with PostgreSQLMetadatSource. +// Next index: 10 +message PostgreSQLDatabaseConfig { + // Name of host to connect to. If the host name starts with /, it is taken as + // a Unix-domain socket in the abstract namespace. + optional string host = 1; + + // Numeric IP address of host to connect to. If this field is provided, `host` + // field is ignored. + optional string hostaddr = 2; + + // Port number to connect to at the server host, or socket file name extension + // for Unix-domain connections. + optional string port = 3; + + // PostgreSQL user name to connect as. Defaults to be the same as the + // operating system name of the user running the application. + optional string user = 4; + + // Password to be used if the server demands password authentication. + optional string password = 5; + + // Specifies the name of the file used to store passwords. + optional string passfile = 6; + + // The database name. Defaults to be the same as the user name. + optional string dbname = 7; + + // A config to skip the database creation if not exist when connecting the + // db instance. It is useful when the db creation is handled by an admin + // process, while the lib user should not issue db creation clauses. + optional bool skip_db_creation = 8; + + message SSLOptions { + // disable, allow, verify-ca, verify-full, etc. Reference: + // https://www.postgresql.org/docs/current/libpq-connect.html + optional string sslmode = 1; + + // This parameter specifies the file name of the client SSL certificate, + // replacing the default ~/.postgresql/postgresql.crt. This parameter is + // ignored if an SSL connection is not made. + optional string sslcert = 2; + + // This parameter specifies the location for the secret key used for the + // client certificate. It can either specify a file name that will be used + // instead of the default ~/.postgresql/postgresql.key, this parameter is + // ignored if an SSL connection is not made. + optional string sslkey = 3; + + // This parameter specifies the password for the secret key specified in + // sslkey, allowing client certificate private keys to be stored in + // encrypted form on disk even when interactive passphrase input is not + // practical. + optional string sslpassword = 4; + + // This parameter specifies the name of a file containing SSL certificate + // authority (CA) certificate(s). If the file exists, the server's + // certificate will be verified to be signed by one of these authorities. + // The default is ~/.postgresql/root.crt. + optional string sslrootcert = 5; + } + + optional SSLOptions ssloption = 9; +} + message MigrationOptions { // If not set, by default the upgrade migration is disabled. MLMD only @@ -647,6 +781,10 @@ message ConnectionConfig { FakeDatabaseConfig fake_database = 1; MySQLDatabaseConfig mysql = 2; SqliteMetadataSourceConfig sqlite = 3; + + // PostgreSQL database connection config. + PostgreSQLDatabaseConfig postgresql = 5; + } // Options for overwriting the default retry setting when MLMD transactions @@ -672,7 +810,7 @@ message MetadataStoreClientConfig { optional string host = 1; // The TCP Port number that the gRPC server accepts connections on. // Must be specified. - optional uint32 port = 2; + optional int64 port = 2; message SSLConfig { // The PEM-encoded private key as a byte string, or Empty if no private key @@ -757,12 +895,14 @@ message ListOperationOptions { optional string next_page_token = 3; // A boolean expression in SQL syntax that is used to specify the conditions - // on nodes' attributes and 1-hop neighborhood. + // on node attributes and directly connected assets. // // In the current implementation, filtering Artifact/Execution/Context with // the following attributes and neighborhood is supported: // - // Attributes: id:int64, type_id:int64, type:string, uri:string, name: string, + // Attributes: + // id:int64, type_id:int64, type:string, + // uri:string, name: string, external_id: string, // create_time_since_epoch:int64, last_update_time_since_epoch:int64 // state:ENUM (Artifact only) last_known_state:ENUM (Execution only) // @@ -772,6 +912,7 @@ message ListOperationOptions { // custom_properties.$name ($name is the custom property name) // attributes: the following attributes can be used // int_value: int64, double_value: double, string_value: string + // bool_value: bool // // - Context (for Artifact and Execution): // syntax: contexts_$alias ($alias can be [0-9A-Za-z_]) @@ -799,6 +940,7 @@ message ListOperationOptions { // - type = 'my_type_name' // - name = 'foo' // - type = 'bar' AND name LIKE 'foo%' + // - external_id = 'my_external_id' // - NOT(create_time_since_epoch < 1 OR last_update_time_since_epoch < 1) // // b) to filter artifacts' uri @@ -809,10 +951,12 @@ message ListOperationOptions { // c) to filter artifact's state or execution's last_known_state // - state = LIVE // - state IS NULL + // - state IN (PENDING, LIVE) // - last_known_state = RUNNING // - last_known_state != RUNNING + // - last_known_state NOT IN (FAILED, CANCELED) // - // d) to filter nodes having a specific context + // d) to filter nodes having a specific context, artifact, or execution // - contexts_a.id = 5 // - contexts_a.type = 'RunContext' // - contexts_a.name = 'my_run' @@ -820,6 +964,29 @@ message ListOperationOptions { // - contexts_a.last_update_time_since_epoch = 1626761453 // To filter nodes with conditions on multiple contexts: // - contexts_a.name = 'my_run' AND contexts_b.name = 'my_pipeline' + // To filter context with artifacts: + // - artifacts_a.id = 5 + // - artifacts_a.type = 'Dataset' + // - artifacts_a.name = 'my_dataset' + // - artifacts_a.uri = 'exact_path_string' + // - artifacts_a.state = LIVE + // - artifacts_a.state IN (PENDING, LIVE) + // - artifacts_a.external_id = "my_external_id" + // - artifacts_a.create_time_since_epoch = 1626761453 + // - artifacts_a.last_update_time_since_epoch = 1626761453 + // To filter contexts with conditions on multiple artifacts: + // - artifacts_a.name = 'my_run' AND artifacts_b.name = 'my_pipeline' + // To filter context with executions: + // - executions_a.id = 5 + // - executions_a.type = 'Dataset' + // - executions_a.name = 'my_dataset' + // - executions_a.last_known_state = RUNNING + //. - executions_a.last_known_state IN (NEW, RUNNING) + // - executions_a.external_id = "my_external_id" + // - executions_a.create_time_since_epoch = 1626761453 + // - executions_a.last_update_time_since_epoch = 1626761453 + // To filter contexts with conditions on multiple executions: + // - executions_a.name = 'my_run' AND executions_b.name = 'my_pipeline' // // e) to filter nodes condition on their properties // - properties.accuracy.double_value > 0.95 @@ -828,7 +995,7 @@ message ListOperationOptions { // other than [0-9A-Za-z_], then the name need to be backquoted, // e.g., // - properties.`my property`.int_value > 0 - // - custom_properties.`my:custom.property`.string_value = 'foo' + // - custom_properties.`my:custom.property`.bool_value = true // // f) complex query to filter both node attributes and neighborhood // - type = 'DataSet' AND @@ -846,7 +1013,7 @@ message ListOperationOptions { // - events_0.milliseconds_since_epoch = 1 // to filter Executions on Events // - events_0.artifact_id = 1 - // - events_0.type = INPUT + // - events_0.type IN (INPUT, INTERNAL_INPUT) // - events_0.milliseconds_since_epoch = 1 // TODO(b/145945460) Support filtering on event step fields. optional string filter_query = 4; @@ -877,48 +1044,190 @@ message ListOperationNextPageToken { // the same order_by field value. // This field is currently only set whe order_by field is LAST_UPDATE_TIME. repeated int64 listed_ids = 4; + } // Options for transactions. // Note: This is under development. Clients should not use it. message TransactionOptions { extensions 1000 to max; + + // Transaction tag for debug use only. + optional string tag = 1; } -// The query options for list lineage graph operation. It allows specifying the -// `query_nodes` of interests and the `stop_conditions` when querying a -// lineage graph. The query option is used for exporting provenance information -// from a source MLMD instance. +// TODO(b/283852485): Deprecate GetLineageGraph API after migration to +// GetLineageSubgraph API. +// The query options for `get_lineage_graph` operation. +// `query_nodes` is a list of nodes of interest. +// Currently only artifacts are supported as `query_nodes`. +// `stop_conditions` defines the filtering rules when querying a lineage graph. +// `max_node_size` defines the total number of artifacts and executions returned +// in the subgraph. message LineageGraphQueryOptions { - // A query to specify the nodes of interests. + // A query to specify the nodes of interest. + // `ListOperationOptions.max_result_size` sets the maximum number of nodes to + // begin with the graph search. // TODO(b/178491112) Support query_nodes for Executions. oneof query_nodes { ListOperationOptions artifacts_options = 1; } - // Boundary conditions to stop the traversal when return the `subgraph`. + // Filtering conditions for retrieving the lineage graph. message BoundaryConstraint { - // The maximum number of hops from the `query_nodes` to traverse. It should - // be non-negative. When zero, only the `query_nodes` are returned. + // The maximum number of hops from the `query_nodes` to traverse. + // A hop is defined as a jump to the next node following the path of + // node -> event -> next_node. + // For example, in the lineage graph a_1 -> e_1 -> a_2: + // a_2 is 2 hops away from a_1, and e_1 is 1 hop away from a_1. + // `max_num_hops` should be non-negative. + // When its value is set to 0, only the `query_nodes` are returned. optional int64 max_num_hops = 1; - // Conditions for the boundary node in the returned the subgraph. - // Please refer to ListOperationOptions.filter_query for the syntax. - // If set, it stops traversing further at the artifacts that do not satisfy - // `boundary_artifacts` and exclude them from the returned subgraph. - // For example, to look for the models related to a DataSet and ignore - // other artifacts derived from the DataSet: - // "type = 'DataSet' OR type = 'TransformGraph' OR type = 'Model'" + // Filtering conditions for retrieving the lineage graph. + // Please refer to `ListOperationOptions.filter_query` for the syntax. + // + // If set, the `boundary_artifacts` defines which artifacts to keep in the + // returned lineage graph during the graph search. + // Artifacts that do not satisfy the `boundary_artifacts` are filtered out, + // and the subgraphs starting at them will be pruned. + // If not set, no artifacts will be filtered out. + // Taking the following lineage graph as example: + // (`a` represents an Artifact, `e` represents an Execution, each arrow + // represents a hop.) + // a_0 a_1 a_3 + // | \ / \ + // \/ \/ \/ \/ + // e_0 e_1 e_3 + // / \ + // \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 + // To query all the upstream and downstream nodes 3 hops away from a_4, + // while excluding the upstream subgraph starting at a_3, then + // `stop_conditions` can be set as: + // { + // max_num_hops: 3 + // boundary_artifacts: 'id != 3' + // } + // With the `stop_conditions`, {a_3, e_1, a_1, a_0, e_0} will be filtered + // out. + // The returned lineage graph looks like: + // e_3 + // / \ + // \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 optional string boundary_artifacts = 2; - // If set, it stops traversing further at the executions that do not satisfy - // `boundary_executions` and exclude them from the returned subgraph. - // For example, two trainers may be connected due to the shared datasets, - // to exclude other trainers except a particular one (e.g., id = 5): - // "type != 'Trainer' OR (type = 'Trainer' AND id = 5)" + // If set, the `boundary_executions` defines which executions to keep in the + // returned lineage graph during the graph search. + // Executions that do not satisfy the `boundary_executions` are filtered out + // and the subgraphs starting at them will be pruned. + // If not set, no executions will be filtered out. + // In the example above, to query for all the upstream and downstream nodes + // 3 hops away from a_4, while excluding the upstream subgraph and the + // downstream subgraph starting at e_3, then `stop_conditions` can be set as + // { + // max_num_hops: 3 + // boundary_executions: 'id != 3' + // } + // With the `stop_conditions`, {e_3, a_5, a_3, e_1, a_1, a_0, e_0} will be + // filtered out. + // The returned lineage graph looks like: + // a_2 a_4 + // \ / + // \/ \/ + // e_2 + // However, for the following graph: + // a_0 a_1 a_3 + // | \ / \ + // \/ \/ \/ \/ + // e_0 e_1 e_3 + // \ / \ + // \/ \/ \/ + // a_2 a_4 a_5 + // \ / + // \/ \/ + // e_2 + // With the same `stop_conditions`, only {e_3, a_5, a_0, e_0} will be + // filtered out. + // The returned lineage graph looks like: + // a_1 a_3 + // \ / + // \/ \/ + // e_1 + // \ + // \/ + // a_2 a_4 + // \ / + // \/ \/ + // e_2 optional string boundary_executions = 3; } - // A constraint option to define the boundary of the returned subgraph. + // A constraint option to define the filtering rules when querying a lineage + // graph. optional BoundaryConstraint stop_conditions = 2; + + // Maximum total number of artifacts and executions in the whole returned + // lineage graph. + // If set to 0 or below, all related nodes will be returned without any + // number limitation. + // The number counts toward Artifacts and Executions. Nothing else considered. + // + // NOTE: There is no pagination supported. + optional int64 max_node_size = 3 [default = 20]; +} + +// The query options for lineage graph tracing from a list of interested nodes. +message LineageSubgraphQueryOptions { + // `starting_nodes` is a list of nodes of interest to start graph tracing. + // NOTE: The maximum number of starting nodes is 100 at most. + message StartingNodes { + // `filter_query` is a boolean expression in SQL syntax that is used to + // specify the conditions on starting nodes. + // Please refer to ListOperationOptions.filter_query for more details. + optional string filter_query = 1; + } + + oneof starting_nodes { + StartingNodes starting_artifacts = 1; + StartingNodes starting_executions = 2; + } + + // The maximum number of hops from the `starting_nodes` to traverse. + // A hop is defined as a jump to the next node following the path of + // node -> event -> next_node. + // For example, in the lineage graph a_1 -> e_1 -> a_2: + // a_2 is 2 hops away from a_1, and e_1 is 1 hop away from a_1. + // `max_num_hops` should be non-negative. + // When its value is set to 0, only the `starting_nodes` are returned. + optional int64 max_num_hops = 3; + + enum Direction { + // Direction is by defult DIRECTION_UNSPECIFIED, which is equivalent to + // BIDIRECTIONAL. + DIRECTION_UNSPECIFIED = 0; + // Indicates tracing the lineage graph by hops in upstream direction. + UPSTREAM = 1; + // Indicates tracing the lineage graph by hops in downstream direction. + DOWNSTREAM = 2; + // Indicates tracing the lineage graph in both directions. + BIDIRECTIONAL = 3; + } + // The direction of lineage graph tracing, which means the direction of all + // hops in the tracing. + // An UPSTREAM hop means an expansion following the path of + // execution -> output_event -> artifact or + // artifact -> input_event -> execution + // A DOWNSTREAM hop means an expansion following the path of + // execution -> input_event -> artifact or + // artifact -> output_event -> execution + // Please refer to `Direction` for more details. + optional Direction direction = 4; } diff --git a/third_party/ml-metadata/ml_metadata/proto/metadata_store_service.proto b/third_party/ml-metadata/ml_metadata/proto/metadata_store_service.proto index a1155eed78..773a968ab9 100644 --- a/third_party/ml-metadata/ml_metadata/proto/metadata_store_service.proto +++ b/third_party/ml-metadata/ml_metadata/proto/metadata_store_service.proto @@ -17,6 +17,7 @@ syntax = "proto2"; package ml_metadata; +import "google/protobuf/field_mask.proto"; import "ml_metadata/proto/metadata_store.proto"; // An artifact and type pair. Part of an artifact struct. @@ -69,6 +70,94 @@ message PutArtifactsRequest { // Additional options to change the behavior of the method. optional Options options = 2; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 3; + + // FieldMask for artifacts in the PUT update + // If `artifact.id` is not specified, it means a new artifact will be created + // and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the artifacts as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Examples that update `properties` / `custom_properties`: + // 1.1 Add a <'key', 'val'> pair into `custom_properties`: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 1.2 Set `custom_properties['key'].bool_value` to true: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // bool_value: true + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 1.3 Delete the complete <'key', 'val'> pair from `custom_properties`: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // custom_properties {} + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Examples that update fields such as `uri`, `external_id`, etc: + // 2.1 Update `external_id` field: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // external_id: "new_value" + // } + // update_mask { + // paths: "external_id" + // } + // } + // 2.2 Set `uri` field: + // { + // artifacts { + // id: 1234 + // type_id: 5678 + // uri: "set_value" + // } + // update_mask { + // paths: "uri" + // } + // } + // If `paths: "properties"` or `paths: "custom_properties"` are added to + // `update_mask`, the key-level updates will be ignored and we only perform + // field-level updates on the all `properties`/`custom_properties`. + // For example: + // If the mask is: {"properties", "properties.key1"}, the field path + // "properties.key1" will be ignored and all `properties` will be updated. + // (Do not suggest) + // If the mask is {"properties", "external_id"}, all + // `properties` and field `external_id` will be updated. (Do not suggest) + optional google.protobuf.FieldMask update_mask = 4; } message PutArtifactsResponse { @@ -97,6 +186,9 @@ message PutArtifactTypeRequest { // Deprecated fields. optional bool can_delete_fields = 3 [deprecated = true]; optional bool all_fields_match = 4 [default = true, deprecated = true]; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 6; } message PutArtifactTypeResponse { @@ -106,6 +198,46 @@ message PutArtifactTypeResponse { message PutExecutionsRequest { repeated Execution executions = 1; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; + + // FieldMask for executions in the PUT update + // If `execution.id` is not specified, it means a new execution will be + // created and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the executions as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Add a <'key', 'val'> pair into `custom_properties`: + // { + // executions { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Set `last_known_state` field: + // { + // executions { + // id: 1234 + // type_id: 5678 + // last_known_state: CACHED + // } + // update_mask { + // paths: "last_known_state" + // } + // } + // Please refer to `PutArtifactsRequest` for more details. + optional google.protobuf.FieldMask update_mask = 3; } message PutExecutionsResponse { @@ -134,6 +266,9 @@ message PutExecutionTypeRequest { // Deprecated fields. optional bool can_delete_fields = 3 [deprecated = true]; optional bool all_fields_match = 4 [default = true, deprecated = true]; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 6; } message PutExecutionTypeResponse { @@ -143,6 +278,9 @@ message PutExecutionTypeResponse { message PutEventsRequest { repeated Event events = 1; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; } message PutEventsResponse {} @@ -165,9 +303,19 @@ message PutExecutionRequest { message Options { // When there's a race to publish executions with a new context with the // same context.name, by default there'll be one writer succeeds and - // the rest of the writers returning AlreadyExists errors. If set the field, - // the failed writer will reuse the stored context in the transaction. + // the rest of the writers returning AlreadyExists errors. If set to true, + // the API will reuse the stored context in the transaction and perform + // an update. optional bool reuse_context_if_already_exist = 1; + + // When there's a race to publish executions with a new artifact with the + // same artifact.external_id, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. + // If set to true and an Artifact has non-empty external_id, + // the API will reuse the stored artifact in the transaction and + // perform an update. Otherwise, it will fall back to relying on `id` field + // to decide if it's update (if `id` exists) or insert (if `id` is empty). + optional bool reuse_artifact_if_already_exist_by_external_id = 2; } // The execution that produces many artifact and event pairs. optional Execution execution = 1; @@ -182,6 +330,9 @@ message PutExecutionRequest { repeated Context contexts = 3; // Additional options to change the behavior of the method. optional Options options = 4; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 5; } message PutExecutionResponse { @@ -195,6 +346,51 @@ message PutExecutionResponse { repeated int64 context_ids = 3; } +message PutLineageSubgraphRequest { + repeated Execution executions = 1; + repeated Artifact artifacts = 2; + repeated Context contexts = 3; + + message EventEdge { + // Index in the array of executions. + optional int32 execution_index = 1; + // Index in the array of artifacts. + optional int32 artifact_index = 2; + optional Event event = 3; + } + repeated EventEdge event_edges = 4; + + message Options { + // When there's a race to publish executions with a new context with the + // same context.name, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. If set to true, + // the API will reuse the stored context in the transaction and perform + // an update. + optional bool reuse_context_if_already_exist = 1; + + // When there's a race to publish executions with a new artifact with the + // same artifact.external_id, by default there'll be one writer succeeds and + // the rest of the writers returning AlreadyExists errors. + // If set to true and an Artifact has non-empty external_id, + // the API will reuse the stored artifact in the transaction and + // perform an update. Otherwise, it will fall back to relying on `id` field + // to decide if it's update (if `id` exists) or insert (if `id` is empty). + optional bool reuse_artifact_if_already_exist_by_external_id = 2; + } + optional Options options = 5; + + optional TransactionOptions transaction_options = 6; +} + +message PutLineageSubgraphResponse { + // A list of execution ids index-aligned with `executions` in the request + repeated int64 execution_ids = 1 [packed = true]; + // A list of artifact ids index-aligned with `artifacts` in the request + repeated int64 artifact_ids = 2 [packed = true]; + // A list of context ids index-aligned with `contexts` in the request + repeated int64 context_ids = 3 [packed = true]; +} + message PutTypesRequest { repeated ArtifactType artifact_types = 1; repeated ExecutionType execution_types = 2; @@ -214,6 +410,9 @@ message PutTypesRequest { // Deprecated fields. optional bool can_delete_fields = 5 [deprecated = true]; optional bool all_fields_match = 6 [default = true, deprecated = true]; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 8; } message PutTypesResponse { @@ -246,6 +445,9 @@ message PutContextTypeRequest { // Deprecated fields. optional bool can_delete_fields = 3 [deprecated = true]; optional bool all_fields_match = 4 [default = true, deprecated = true]; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 6; } message PutContextTypeResponse { @@ -255,6 +457,46 @@ message PutContextTypeResponse { message PutContextsRequest { repeated Context contexts = 1; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; + + // FieldMask for contexts in the PUT update + // If `context.id` is not specified, it means a new context will be + // created and `update_mask` will not be applied to the creation. + // If `update_mask` is empty, update the contexts as a whole. + // If `update_mask` is not empty, only update fields or properties specified + // in `update_mask`. + // Example request protos: + // 1. Add a <'key', 'val'> pair into `custom_properties`: + // { + // contexts { + // id: 1234 + // type_id: 5678 + // custom_properties { + // key: "key" + // value: { + // string_value: "val" + // } + // } + // } + // update_mask { + // paths: "custom_properties.key" + // } + // } + // 2. Set `name` field: + // { + // contexts { + // id: 1234 + // type_id: 5678 + // name: "set_name" + // } + // update_mask { + // paths: "name" + // } + // } + // Please refer to `PutArtifactsRequest` for more details. + optional google.protobuf.FieldMask update_mask = 3; } message PutContextsResponse { @@ -265,12 +507,18 @@ message PutContextsResponse { message PutAttributionsAndAssociationsRequest { repeated Attribution attributions = 1; repeated Association associations = 2; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 3; } message PutAttributionsAndAssociationsResponse {} message PutParentContextsRequest { repeated ParentContext parent_contexts = 1; + + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; } message PutParentContextsResponse {} @@ -283,6 +531,12 @@ message GetArtifactsByTypeRequest { // Currently supports: // 1. Field to order the results. // 2. Page size. + // If set, the request will + // first fetch all artifacts with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Artifacts of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. optional ListOperationOptions options = 3; // Options regarding transactions. optional TransactionOptions transaction_options = 4; @@ -312,23 +566,37 @@ message GetArtifactByTypeAndNameResponse { message GetArtifactsByIDRequest { // A list of artifact ids to retrieve. repeated int64 artifact_ids = 1; + // An option to populate all the ArtifactTypes in the response. + // If true, returns retrieved Artifacts and their artifact types, which can be + // matched by type_ids. + // If false, returns only the retrieved Artifacts. + // Example request proto: + // { + // artifact_ids: 101, + // populate_artifact_types: true, + // } + // The response will contain an artifact with id = 101 and an artifact type + // with id = artifact.type_id(). + optional bool populate_artifact_types = 3 [default = false]; // Options regarding transactions. optional TransactionOptions transaction_options = 2; } message GetArtifactsByIDResponse { // Artifacts with matching ids. - // This is not index-aligned: if an id is not found, it is not + // This is not index-aligned: if an id is not found, it is not returned. repeated Artifact artifacts = 1; + // ArtifactTypes populated with matching type_ids owned by `artifacts`. + // This is not index-aligned: if a type_id is not found, it is not returned. + repeated ArtifactType artifact_types = 2; } // Request to retrieve Artifacts using List options. // If option is not specified then all Artifacts are returned. message GetArtifactsRequest { // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. optional ListOperationOptions options = 1; // Options regarding transactions. optional TransactionOptions transaction_options = 2; @@ -360,9 +628,8 @@ message GetArtifactsByURIResponse { // If option is not specified then all Executions are returned. message GetExecutionsRequest { // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. optional ListOperationOptions options = 1; // Options regarding transactions. optional TransactionOptions transaction_options = 2; @@ -417,6 +684,66 @@ message GetContextTypesResponse { repeated ContextType context_types = 1; } +message GetArtifactsByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetArtifactsByExternalIdsResponse { + repeated Artifact artifacts = 1; +} + +message GetExecutionsByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetExecutionsByExternalIdsResponse { + repeated Execution executions = 1; +} + +message GetContextsByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetContextsByExternalIdsResponse { + repeated Context contexts = 1; +} + +message GetArtifactTypesByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetArtifactTypesByExternalIdsResponse { + repeated ArtifactType artifact_types = 1; +} + +message GetExecutionTypesByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetExecutionTypesByExternalIdsResponse { + repeated ExecutionType execution_types = 1; +} + +message GetContextTypesByExternalIdsRequest { + repeated string external_ids = 1; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetContextTypesByExternalIdsResponse { + repeated ContextType context_types = 1; +} + message GetExecutionsByTypeRequest { optional string type_name = 1; // If not set, it looks for the type with type_name with default type_version. @@ -425,6 +752,12 @@ message GetExecutionsByTypeRequest { // Currently supports: // 1. Field to order the results. // 2. Page size. + // If set, the request will + // first fetch all executions with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Executions of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. optional ListOperationOptions options = 3; // Options regarding transactions. optional TransactionOptions transaction_options = 4; @@ -552,9 +885,8 @@ message GetContextTypesByIDResponse { // If option is not specified then all Contexts are returned. message GetContextsRequest { // Specify options. - // Currently supports: - // 1. Field to order the results. - // 2. Page size. + // Please refer to the documentation of ListOperationOptions for the supported + // functionalities. optional ListOperationOptions options = 1; // Options regarding transactions. optional TransactionOptions transaction_options = 2; @@ -575,6 +907,12 @@ message GetContextsByTypeRequest { // Currently supports: // 1. Field to order the results. // 2. Page size. + // If set, the request will + // first fetch all contexts with specified `type_name` and `type_version`, + // then order by a specifield field + // finally find the correct page and return #Contexts of the page size. + // Higher-level APIs may only use the functionalies partially. + // Please reference the API documentation for the API behaviors. optional ListOperationOptions options = 2; // If not set, it looks for the type with type_name and options with default // type_version. @@ -657,6 +995,32 @@ message GetChildrenContextsByContextResponse { repeated Context contexts = 1; } +message GetParentContextsByContextsRequest { + repeated int64 context_ids = 1 [packed = true]; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetParentContextsByContextsResponse { + message ParentContextsPerChild { + repeated Context parent_contexts = 1; + } + map contexts = 2; +} + +message GetChildrenContextsByContextsRequest { + repeated int64 context_ids = 1 [packed = true]; + // Options regarding transactions. + optional TransactionOptions transaction_options = 2; +} + +message GetChildrenContextsByContextsResponse { + message ChildrenContextsPerParent { + repeated Context children_contexts = 1; + } + map contexts = 2; +} + message GetArtifactsByContextRequest { optional int64 context_id = 1; @@ -700,8 +1064,10 @@ message GetExecutionsByContextResponse { } -// A lineage query request to specify the query nodes of interests and the -// boundary conditions for the returned graph. +// TODO(b/283852485): Deprecate GetLineageGraph API after migration to +// GetLineageSubgraph API. +// A lineage query request to specify the query nodes of interest and the +// boundary conditions for pruning the returned graph. message GetLineageGraphRequest { optional LineageGraphQueryOptions options = 1; // Options regarding transactions. @@ -714,6 +1080,44 @@ message GetLineageGraphResponse { optional LineageGraph subgraph = 1; } +message GetLineageSubgraphRequest { + // Query options for lineage graph tracing from a list of interested + // nodes. + // A lineage subgraph without node details (e.g., external_id, properties) + // will be returned. Please refer to `LineageSubgraphQueryOptions` for more + // details. + optional LineageSubgraphQueryOptions lineage_subgraph_query_options = 1; + // `read_mask` contains user specified paths of fields that should be included + // in the returned lineage subgraph. + // Supported field paths are: 'artifacts', 'executions', 'contexts', + // 'artifact_types', 'execution_types', 'context_types', and 'events'. + // TODO(b/283852485): Include 'associations' or 'attributions' in the + // returned graph. + // If 'artifacts', 'executions', or 'contexts' is specified in `read_mask`, + // the dehydrated nodes will be included. + // Note: A dehydrated node means a node containing only its id and no + // other information. User should call GetNodesByID or other APIs to get + // node details later on. + // If 'artifact_types', 'execution_types', or 'context_types' is specified + // in `read_mask`, all the node types will be included. + // If 'events' is specified in `read_mask`, the events will be included. + // If `read_mask` is not set, the API will return all the fields in + // the returned graph. + // Note: Only paths of fields in LineageGraph message are supported. Paths + // of fields in the submessage, such as "artifacts.id", "contexts.name" are + // not acknowledged. + optional google.protobuf.FieldMask read_mask = 3; + optional TransactionOptions transaction_options = 2; +} + +message GetLineageSubgraphResponse { + // A lineage subgraph of MLMD nodes and relations retrieved from lineage + // graph tracing. + optional LineageGraph lineage_subgraph = 1; +} + + + // LINT.IfChange service MetadataStoreService { // Inserts or updates an ArtifactType. @@ -787,6 +1191,10 @@ service MetadataStoreService { // For new artifacts, type must be specified. // For old artifacts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated artifacts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // artifacts: A list of artifacts to insert or update. // @@ -801,6 +1209,10 @@ service MetadataStoreService { // For new executions, type must be specified. // For old executions, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated executions will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // executions: A list of executions to insert or update. // @@ -813,6 +1225,10 @@ service MetadataStoreService { // // The execution_id and artifact_id must already exist. // Once created, events cannot be modified. + // AlreadyExists error will be raised if duplicated events are found. + // + // It is not guaranteed that the created or updated events will share the + // same `milliseconds_since_epoch` timestamps. // // Args: // events: A list of events to insert or update. @@ -824,9 +1240,16 @@ service MetadataStoreService { // input/output Event. The `contexts` describe the associations of the // execution and the attributions of the artifacts. // - // If an execution_id, artifact_id or context_id is specified, it is an - // update, otherwise it does an insertion. For insertion, type must be - // specified. + // If an execution_id is specified, it is an update on the corresponding + // execution, otherwise it does an insertion. + // For insertion, type must be specified. Same rule applies to artifacts + // and contexts in the request. Corresponding errors may raised. For example: + // AlreadyExists error will be raised if duplicated executions, artifacts + // or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. // // Args: // execution: An execution to insert or update. @@ -838,6 +1261,38 @@ service MetadataStoreService { // with the input. rpc PutExecution(PutExecutionRequest) returns (PutExecutionResponse) {} + // Inserts or updates a lineage subgraph (i.e. a collection of event edges + // and its executions, artifacts, and related contexts) atomically. The + // `event_edges` include an Event and the indices of the corresponding + // execution and artifact from the input list of executions and artifacts. The + // `contexts` describe the associations of the Execution and the attributions + // of the Artifact. + // + // If an execution_id is specified, it is an update on the corresponding + // Execution, otherwise it does an insertion. For insertion, type must be + // specified. These rules apply to Artifacts and Contexts as well. + // Corresponding errors may be raised. For example: AlreadyExists error will + // be raised if duplicated executions, artifacts, or events are found. + // + // It is not guaranteed that the created or updated executions, artifacts, + // contexts and events will share the same `create_time_since_epoch`, + // `last_update_time_since_epoch`, or `milliseconds_since_epoch` timestamps. + // + // Args: + // executions: A list of executions to insert or update. + // artifacts: A list of artifacts to insert or update. + // contexts: A list of contexts to insert and/or create associations and + // attributions with. + // event_edges: A list of events to insert with the indices of the + // corresponding execution and artifact from the input lists of + // executions and artifacts. + // + // Returns: + // Lists of execution, artifact, and context ids index-aligned with the + // inputs. + rpc PutLineageSubgraph(PutLineageSubgraphRequest) + returns (PutLineageSubgraphResponse) {} + // Inserts or updates contexts in database and returns a list of context ids. // // If an context_id is specified for a context, it is an update. @@ -845,6 +1300,10 @@ service MetadataStoreService { // For new contexts, type must be specified. // For old contexts, type must be unchanged or unspecified. // + // It is not guaranteed that the created or updated contexts will share the + // same `create_time_since_epoch` or `last_update_time_since_epoch` + // timestamps. + // // Args: // contexts: A list of contexts to insert or update. // @@ -986,6 +1445,30 @@ service MetadataStoreService { rpc GetEventsByArtifactIDs(GetEventsByArtifactIDsRequest) returns (GetEventsByArtifactIDsResponse) {} + // Gets all the artifacts with matching external ids. + rpc GetArtifactsByExternalIds(GetArtifactsByExternalIdsRequest) + returns (GetArtifactsByExternalIdsResponse) {} + + // Gets all the artifacts with matching external ids. + rpc GetExecutionsByExternalIds(GetExecutionsByExternalIdsRequest) + returns (GetExecutionsByExternalIdsResponse) {} + + // Gets all the artifacts with matching external ids. + rpc GetContextsByExternalIds(GetContextsByExternalIdsRequest) + returns (GetContextsByExternalIdsResponse) {} + + // Gets all the artifacts with matching external ids. + rpc GetArtifactTypesByExternalIds(GetArtifactTypesByExternalIdsRequest) + returns (GetArtifactTypesByExternalIdsResponse) {} + + // Gets all the artifacts with matching external ids. + rpc GetExecutionTypesByExternalIds(GetExecutionTypesByExternalIdsRequest) + returns (GetExecutionTypesByExternalIdsResponse) {} + + // Gets all the artifacts with matching external ids. + rpc GetContextTypesByExternalIds(GetContextTypesByExternalIdsRequest) + returns (GetContextTypesByExternalIdsResponse) {} + // Gets all context that an artifact is attributed to. rpc GetContextsByArtifact(GetContextsByArtifactRequest) @@ -1003,6 +1486,15 @@ service MetadataStoreService { rpc GetChildrenContextsByContext(GetChildrenContextsByContextRequest) returns (GetChildrenContextsByContextResponse) {} + // Batch getting all the parent contexts that a list of contexts are related. + rpc GetParentContextsByContexts(GetParentContextsByContextsRequest) + returns (GetParentContextsByContextsResponse) {} + + // Batch getting all the children contexts that a list of contexts are + // related. + rpc GetChildrenContextsByContexts(GetChildrenContextsByContextsRequest) + returns (GetChildrenContextsByContextsResponse) {} + // Gets all direct artifacts that a context attributes to. rpc GetArtifactsByContext(GetArtifactsByContextRequest) returns (GetArtifactsByContextResponse) {} @@ -1012,10 +1504,21 @@ service MetadataStoreService { returns (GetExecutionsByContextResponse) {} + // TODO(b/283852485): Deprecate GetLineageGraph API after migration to + // GetLineageSubgraph API. // The transaction performs a constrained transitive closure and returns a // lineage subgraph satisfying the conditions and constraints specified in // the GetLineageGraphRequest. rpc GetLineageGraph(GetLineageGraphRequest) returns (GetLineageGraphResponse) {} + + // Gets a lineage subgraph by performing graph traversal from a list of + // interested nodes. + // A lineage subgraph without node details (e.g., external_id, properties) + // will be returned. + rpc GetLineageSubgraph(GetLineageSubgraphRequest) + returns (GetLineageSubgraphResponse) {} + + } // LINT.ThenChange(../metadata_store/metadata_store_service_interface.h) diff --git a/third_party/ml-metadata/update_version.sh b/third_party/ml-metadata/update_version.sh index 9b907e5759..feffc20751 100755 --- a/third_party/ml-metadata/update_version.sh +++ b/third_party/ml-metadata/update_version.sh @@ -20,7 +20,7 @@ image_files=( "${REPO_ROOT}/.cloudbuild.yaml" \ "${REPO_ROOT}/.release.cloudbuild.yaml" \ "${REPO_ROOT}/manifests/kustomize/base/metadata/base/metadata-grpc-deployment.yaml" \ "${REPO_ROOT}/test/tag_for_hosted.sh" \ - "${REPO_ROOT}/v2/Makefile" \ + "${REPO_ROOT}/backend/src/v2/Makefile" \ ) for i in "${image_files[@]}" do @@ -29,7 +29,7 @@ do done requirement_files=( "${REPO_ROOT}/backend/metadata_writer/requirements.in" \ - "${REPO_ROOT}/v2/test/requirements.txt" + "${REPO_ROOT}/backend/src/v2/test/requirements.txt" ) for i in "${requirement_files[@]}" do From f43272dee8c40563ee05c07d9e1de56c4ba7c08f Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 22 Aug 2023 13:48:35 -0700 Subject: [PATCH 120/253] feat(components): Update container URIs for embedding eval components PiperOrigin-RevId: 559213412 --- .../model_evaluation/llm_embedding_retrieval/component.py | 4 +--- .../llm_information_retrieval_preprocessor/component.py | 4 +--- .../model_evaluation/llm_retrieval_metrics/component.py | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py index 0a492e4b5c..ba0a22dd73 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py @@ -22,9 +22,7 @@ from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' -# TODO(b/290838262): Update the image URI to point to -# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. +_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' @container_component diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py index 1d9c728c81..63428f4485 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -20,9 +20,7 @@ from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' -# TODO(b/290838262): Update the image URI to point to -# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. +_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' @container_component diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py index ff71d2c496..7a4be67fec 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py @@ -9,9 +9,7 @@ from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:navekshasood-test' -# TODO(b/290838262): Update the image URI to point to -# us-docker.pkg.dev/vertex-evaluation/public/llm:va.b before we launch. +_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' @container_component From 9ce28665276a74184339ee86c8ca84f8368fb8b4 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 23 Aug 2023 11:35:37 -0700 Subject: [PATCH 121/253] feat(components): Update supported large model reference names that can be resolved by function based component in _implementation/llm PiperOrigin-RevId: 559493244 --- .../_implementation/llm/bulk_inferrer.py | 213 ------------------ .../_implementation/llm/function_based.py | 187 ++++++++++----- .../preview/llm/infer/component.py | 6 +- .../preview/llm/rlhf/component.py | 6 +- 4 files changed, 142 insertions(+), 270 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py index 4d605ff87b..15be32859a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py @@ -13,225 +13,12 @@ # limitations under the License. """KFP Container component that performs bulk inference.""" -from typing import NamedTuple, Optional - from google_cloud_pipeline_components import _image from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.llm import utils import kfp -@kfp.dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) -def get_default_bulk_inference_machine_specs( - large_model_reference: str, - use_gpu_defaults: bool = False, - accelerator_type_override: Optional[str] = None, - accelerator_count_override: Optional[int] = None, -) -> NamedTuple( - 'MachineSpec', accelerator_type=str, accelerator_count=int, machine_type=str -): - """Gets default machine specs for bulk inference and overrides params if provided. - - Args: - large_model_reference: Foundational model to use for default specs. - use_gpu_defaults: Whether to get default gpu specs (otherwise will get TPU - specs). - accelerator_type_override: Accelerator type to override the default. - accelerator_count_override: Accelerator count to override the default. - - Returns: - MachineSpec, including accelerator_type, accelerator_count, machine_type. - - Raises: - ValueError: If large_model_reference is invalid or overridden values are - invalid. - """ - # pylint: disable=g-import-not-at-top,redefined-outer-name,reimported - import collections - # pylint: enable=g-import-not-at-top,redefined-outer-name,reimported - - machine_spec = collections.namedtuple( - 'MachineSpec', ['accelerator_type', 'accelerator_count', 'machine_type'] - ) - - # machine types - cloud_tpu = 'cloud-tpu' - ultra_gpu_1g = 'a2-ultragpu-1g' - ultra_gpu_2g = 'a2-ultragpu-2g' - ultra_gpu_4g = 'a2-ultragpu-4g' - ultra_gpu_8g = 'a2-ultragpu-8g' - high_gpu_1g = 'a2-highgpu-1g' - high_gpu_2g = 'a2-highgpu-2g' - high_gpu_4g = 'a2-highgpu-4g' - high_gpu_8g = 'a2-highgpu-8g' - mega_gpu_16g = 'a2-megagpu-16g' - - # accelerator types - tpu_v2 = 'TPU_V2' - tpu_v3 = 'TPU_V3' - nvidia_a100_40g = 'NVIDIA_TESLA_A100' - nvidia_a100_80g = 'NVIDIA_A100_80GB' - tpu_accelerator_types = frozenset([tpu_v2, tpu_v3]) - gpu_accelerator_types = frozenset([nvidia_a100_40g, nvidia_a100_80g]) - valid_accelerator_types = frozenset( - list(gpu_accelerator_types) + list(tpu_accelerator_types) - ) - - # base models - palm_tiny = 'PALM_TINY' - gecko = 'GECKO' - otter = 'OTTER' - bison = 'BISON' - elephant = 'ELEPHANT' - t5_small = 'T5_SMALL' - t5_large = 'T5_LARGE' - t5_xl = 'T5_XL' - t5_xxl = 'T5_XXL' - - def _get_machine_type(accelerator_type: str, accelerator_count: int) -> str: - if accelerator_count < 1: - raise ValueError('accelerator_count must be at least 1.') - - if accelerator_type in tpu_accelerator_types: - return cloud_tpu - - elif accelerator_type == nvidia_a100_40g: - if accelerator_count == 1: - return high_gpu_1g - - elif accelerator_count == 2: - return high_gpu_2g - - elif accelerator_count <= 4: - return high_gpu_4g - - elif accelerator_count <= 8: - return high_gpu_8g - - elif accelerator_count <= 16: - return mega_gpu_16g - - else: - raise ValueError( - f'Too many {accelerator_type} requested. Must be <= 16.' - ) - - elif accelerator_type == nvidia_a100_80g: - if accelerator_count == 1: - return ultra_gpu_1g - - elif accelerator_count == 2: - return ultra_gpu_2g - - elif accelerator_count <= 4: - return ultra_gpu_4g - - elif accelerator_count <= 8: - return ultra_gpu_8g - - else: - raise ValueError( - f'Too many {accelerator_type} requested. Must be <= 8.' - ) - - else: - raise ValueError( - 'accelerator_type_override must be one of' - f' {sorted(valid_accelerator_types)}.' - ) - - accepted_reference_models = frozenset( - [palm_tiny, gecko, otter, bison, elephant, t5_small, t5_xxl] - ) - - # Default GPU specs are based on study here: - # https://docs.google.com/spreadsheets/d/1_ZKqfyLQ5vYrOQH5kfdMb_OoNT48r6vNbqv3dKDxDTw/edit?resourcekey=0-3kgDrn4XDdvlJAc8Kils-Q#gid=255356424 - reference_model_to_model_specs_gpu = { - palm_tiny: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=1, - machine_type=high_gpu_1g, - ), - gecko: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=1, - machine_type=high_gpu_1g, - ), - otter: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=2, - machine_type=high_gpu_2g, - ), - bison: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=8, - machine_type=high_gpu_8g, - ), - elephant: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=8, - machine_type=high_gpu_8g, - ), - t5_small: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=1, - machine_type=high_gpu_1g, - ), - t5_large: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=1, - machine_type=high_gpu_1g, - ), - t5_xl: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=1, - machine_type=high_gpu_1g, - ), - t5_xxl: machine_spec( - accelerator_type=nvidia_a100_40g, - accelerator_count=2, - machine_type=high_gpu_2g, - ), - } - - # Get defaults - if large_model_reference not in accepted_reference_models: - raise ValueError( - 'large_model_reference must be one of' - f' {sorted(accepted_reference_models)}.' - ) - - if use_gpu_defaults: - default_machine_spec = reference_model_to_model_specs_gpu[ - large_model_reference - ] - - else: - # This is the only config available for TPUs in our shared reservation pool. - default_machine_spec = machine_spec( - accelerator_type=tpu_v3, - accelerator_count=32, - machine_type=cloud_tpu, - ) - - # Override default behavior we defer validations of these to the resource - # provisioner. - if any([accelerator_type_override, accelerator_count_override]): - if not all([accelerator_type_override, accelerator_count_override]): - raise ValueError('Accelerator type and count must both be set.') - accelerator_type = accelerator_type_override - accelerator_count = accelerator_count_override - else: - accelerator_type = default_machine_spec.accelerator_type - accelerator_count = default_machine_spec.accelerator_count - - return machine_spec( - accelerator_type, - accelerator_count, - _get_machine_type(accelerator_type, accelerator_count), - ) - - @kfp.dsl.container_component def BulkInferrer( # pylint: disable=invalid-name project: str, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index 9dfb057c1c..5cf77e053a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -160,7 +160,7 @@ def resolve_reference_model_metadata( large_model_reference: str, reference_model_path: Optional[str] = None, ) -> NamedTuple( - 'BaseModelMetadata', + 'Outputs', large_model_reference=str, reference_model_path=str, reward_model_reference=str, @@ -181,75 +181,160 @@ def resolve_reference_model_metadata( Raises: ValueError: if no metadata exists for the given base model. """ - - # TODO(latture): Move this logic to a container component and use - # PredefinedModels enum to resolve model paths. - outputs = NamedTuple( - 'BaseModelMetadata', + reference_model_metadata = NamedTuple( + 'ReferenceModelMetadata', large_model_reference=str, reference_model_path=str, reward_model_reference=str, reward_model_path=str, + is_supported=bool, ) - reference_model_key = large_model_reference.upper().replace('-', '_') - predefined_model_paths = { - 'PALM_TINY': ( - 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny/' + + reference_models = { + 't5-small': reference_model_metadata( + large_model_reference='T5_SMALL', + reference_model_path=( + 'gs://t5-data/pretrained_models/t5x/flan_t5_small/' + ), + reward_model_reference='T5_SMALL', + reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_small', + is_supported=True, ), - 'GECKO': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko/', - 'OTTER': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter/', - 'BISON': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_bison/', - 'ELEPHANT': ( - 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_elephant/' + 't5-large': reference_model_metadata( + large_model_reference='T5_LARGE', + reference_model_path=( + 'gs://t5-data/pretrained_models/t5x/flan_t5_large/' + ), + reward_model_reference='T5_LARGE', + reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_large', + is_supported=True, ), - 'T5_SMALL': 'gs://t5-data/pretrained_models/t5x/flan_t5_small/', - 'T5_LARGE': 'gs://t5-data/pretrained_models/t5x/flan_t5_large/', - 'T5_XL': 'gs://t5-data/pretrained_models/t5x/flan_t5_xl/', - 'T5_XXL': 'gs://t5-data/pretrained_models/t5x/flan_t5_xxl/', - } - predefined_reward_model_paths = { - 'PALM_TINY': ( - 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny' + 't5-xl': reference_model_metadata( + large_model_reference='T5_XL', + reference_model_path='gs://t5-data/pretrained_models/t5x/flan_t5_xl/', + reward_model_reference='T5_XL', + reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_xl', + is_supported=True, + ), + 't5-xxl': reference_model_metadata( + large_model_reference='T5_XXL', + reference_model_path=( + 'gs://t5-data/pretrained_models/t5x/flan_t5_xxl/' + ), + reward_model_reference='T5_XXL', + reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_xxl', + is_supported=True, + ), + 'palm-tiny': reference_model_metadata( + large_model_reference='PALM_TINY', + reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny/', + reward_model_reference='PALM_TINY', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_palm_tiny/', + is_supported=False, + ), + 'gecko': reference_model_metadata( + large_model_reference='GECKO', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko/' + ), + reward_model_reference='GECKO', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko_pretrain/', + is_supported=False, + ), + 'otter': reference_model_metadata( + large_model_reference='OTTER', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter/' + ), + reward_model_reference='OTTER', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', + is_supported=False, + ), + 'bison': reference_model_metadata( + large_model_reference='BISON', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_bison/' + ), + reward_model_reference='OTTER', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', + is_supported=False, # Deprecated: Use text-bision@001 instead. + ), + 'text-bison@001': reference_model_metadata( + large_model_reference='BISON', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_bison/' + ), + reward_model_reference='OTTER', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', + is_supported=True, + ), + 'elephant': reference_model_metadata( + large_model_reference='ELEPHANT', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_elephant/' + ), + reward_model_reference='OTTER', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', + is_supported=False, ), - 'GECKO': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_gecko_pretrain', - 'OTTER': 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain', - 'ELEPHANT': ( - 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_elephant/' + 'llama-2-7b': reference_model_metadata( + large_model_reference='LLAMA_2_7B', + reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b/', + reward_model_reference='LLAMA_2_7B', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b/', + is_supported=True, + ), + 'llama-2-13b': reference_model_metadata( + large_model_reference='LLAMA_2_13B', + reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b/', + reward_model_reference='LLAMA_2_13B', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b/', + is_supported=True, + ), + 'llama-2-7b-chat': reference_model_metadata( + large_model_reference='LLAMA_2_7B_CHAT', + reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b_chat/', + reward_model_reference='LLAMA_2_7B_CHAT', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b_chat/', + is_supported=True, + ), + 'llama-2-13b-chat': reference_model_metadata( + large_model_reference='LLAMA_2_13B_CHAT', + reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b_chat/', + reward_model_reference='LLAMA_2_13B_CHAT', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b_chat/', + is_supported=True, ), - 'T5_SMALL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_small', - 'T5_LARGE': 'gs://t5-data/pretrained_models/t5x/t5_1_1_large', - 'T5_XL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_xl', - 'T5_XXL': 'gs://t5-data/pretrained_models/t5x/t5_1_1_xxl', } - if reference_model_key not in predefined_model_paths: + reference_model_key = large_model_reference.lower().replace('_', '-') + if reference_model_key not in reference_models: + supported_models = [ + k for k, v in reference_models.items() if v.is_supported + ] raise ValueError( - f'No metadata found for `{reference_model_key}`. ' - f'Base model must be one of {list(predefined_model_paths.keys())}.' + f'Unknown reference model {large_model_reference}.' + ' large_model_reference must be one of' + f' {sorted(supported_models)}.' ) - # Mapping from base model to its corresponding reward model. - reference_model_to_reward_model = { - 'PALM_TINY': 'PALM_TINY', - 'GECKO': 'GECKO', - 'OTTER': 'OTTER', - 'BISON': 'OTTER', - 'ELEPHANT': 'ELEPHANT', - 'T5_SMALL': 'T5_SMALL', - 'T5_LARGE': 'T5_LARGE', - 'T5_XL': 'T5_XL', - 'T5_XXL': 'T5_XXL', - } + reference_model = reference_models[reference_model_key] - reward_model_key = reference_model_to_reward_model[reference_model_key] + outputs = NamedTuple( + 'Outputs', + large_model_reference=str, + reference_model_path=str, + reward_model_reference=str, + reward_model_path=str, + ) return outputs( - large_model_reference=reference_model_key, + large_model_reference=reference_model.large_model_reference, reference_model_path=( - reference_model_path or predefined_model_paths[reference_model_key] + reference_model_path or reference_model.reference_model_path ), - reward_model_reference=reward_model_key, - reward_model_path=predefined_reward_model_paths[reward_model_key], + reward_model_reference=reference_model.reward_model_reference, + reward_model_path=reference_model.reward_model_path, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index ae7b50aaef..161b6202c2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -46,9 +46,9 @@ def infer_pipeline( Args: large_model_reference: Name of the base model. Supported values are - ``BISON``, ``T5_SMALL``, ``T5_LARGE``, ``T5_XL``, and ``T5_XXL``. - ``BISON`` and ``T5_SMALL`` are supported in ``us-central1` and - ``europe-west4``. ``T5_LARGE``, ``T5_XL`` and ``T5_XXL`` are only + ``text-bison@001``, ``t5-small``, ``t5-large``, ``t5-xl`` and ``t5-xxl``. + ``text-bison@001`` and ``t5-small`` are supported in ``us-central1` and + ``europe-west4``. ``t5-large``, ``t5-xl`` and ``t5-xxl`` are only supported in ``europe-west4``. model_checkpoint: Cloud storage path to the model checkpoint. prompt_dataset: Cloud storage path to an unlabled prompt dataset used for diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index 2c104f6fd2..d2b0da0f97 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -68,9 +68,9 @@ def rlhf_pipeline( the prompt, ``candidate_0`` and ``candidate_1`` that contain candidate responses, ``choice`` that specifies the preferred candidate. large_model_reference: Name of the base model. Supported values are - ``BISON``, ``T5_SMALL``, ``T5_LARGE``, ``T5_XL``, and ``T5_XXL``. - ``BISON`` and ``T5_SMALL`` are supported in ``us-central1` and - ``europe-west4``. ``T5_LARGE``, ``T5_XL`` and ``T5_XXL`` are only + ``text-bison@001``, ``t5-small``, ``t5-large``, ``t5-xl`` and ``t5-xxl``. + ``text-bison@001`` and ``t5-small`` are supported in ``us-central1` and + ``europe-west4``. ``t5-large``, ``t5-xl`` and ``t5-xxl`` are only supported in ``europe-west4``. model_display_name: Name of the fine-tuned model shown in the Model Registry. If not provided, a default name will be created. From b6be4ea79bd4828e48f78eba5d69ef332524f309 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Thu, 24 Aug 2023 10:16:16 -0700 Subject: [PATCH 122/253] feat(mlmd): Introduce PostgreSQL kustomization for MLMD. (#9927) --- .../base/cache/postgresql/kustomization.yaml | 13 -- .../generic/postgres/kustomization.yaml | 6 +- ...ret.yaml => postgres-secret-extended.yaml} | 2 +- .../overlays/postgres/kustomization.yaml | 38 +++++ .../postgres/metadata-db-deployment.yaml | 43 +++++ .../overlays/postgres/metadata-db-pvc.yaml | 10 ++ .../postgres/metadata-db-service.yaml | 14 ++ .../metadata/overlays/postgres/params.env | 2 + .../patches/metadata-grpc-deployment.yaml | 28 +++ .../metadata/overlays/postgres/secrets.env | 2 + .../base/pipeline/postgres/kustomization.yaml | 50 ------ .../ml-pipeline-apiserver-deployment.yaml | 161 ------------------ .../cache/cache-deployment-patch.yaml} | 32 +--- .../base/postgresql/cache/kustomization.yaml | 6 + .../postgresql/pipeline/kustomization.yaml | 6 + ...l-pipeline-apiserver-deployment-patch.yaml | 74 ++++++++ .../kustomization.yaml | 2 +- 17 files changed, 233 insertions(+), 256 deletions(-) delete mode 100644 manifests/kustomize/base/cache/postgresql/kustomization.yaml rename manifests/kustomize/base/installs/generic/postgres/{postgres-secret.yaml => postgres-secret-extended.yaml} (73%) create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/kustomization.yaml create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/metadata-db-deployment.yaml create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/metadata-db-pvc.yaml create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/metadata-db-service.yaml create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/params.env create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/patches/metadata-grpc-deployment.yaml create mode 100644 manifests/kustomize/base/metadata/overlays/postgres/secrets.env delete mode 100644 manifests/kustomize/base/pipeline/postgres/kustomization.yaml delete mode 100644 manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml rename manifests/kustomize/base/{cache/postgresql/cache-deployment.yaml => postgresql/cache/cache-deployment-patch.yaml} (75%) create mode 100644 manifests/kustomize/base/postgresql/cache/kustomization.yaml create mode 100644 manifests/kustomize/base/postgresql/pipeline/kustomization.yaml create mode 100644 manifests/kustomize/base/postgresql/pipeline/ml-pipeline-apiserver-deployment-patch.yaml diff --git a/manifests/kustomize/base/cache/postgresql/kustomization.yaml b/manifests/kustomize/base/cache/postgresql/kustomization.yaml deleted file mode 100644 index 89adcab9d9..0000000000 --- a/manifests/kustomize/base/cache/postgresql/kustomization.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - cache-deployment.yaml - - ../cache-role.yaml - - ../cache-rolebinding.yaml - - ../cache-sa.yaml - - ../cache-service.yaml -commonLabels: - app: cache-server -images: - - name: gcr.io/ml-pipeline/cache-server - newTag: 2.0.0 diff --git a/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml b/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml index bd435cc712..21ca80694a 100644 --- a/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml +++ b/manifests/kustomize/base/installs/generic/postgres/kustomization.yaml @@ -2,12 +2,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: kubeflow bases: -- ../../../pipeline/postgres -- ../../../cache/postgres +- ../../../postgresql/pipeline +- ../../../postgresql/cache - ../../../cache-deployer resources: - pipeline-install-config.yaml -- postgres-secret.yaml +- postgres-secret-extended.yaml vars: - name: kfp-namespace objref: diff --git a/manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml b/manifests/kustomize/base/installs/generic/postgres/postgres-secret-extended.yaml similarity index 73% rename from manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml rename to manifests/kustomize/base/installs/generic/postgres/postgres-secret-extended.yaml index 734ce0b5f5..b67369ba1c 100644 --- a/manifests/kustomize/base/installs/generic/postgres/postgres-secret.yaml +++ b/manifests/kustomize/base/installs/generic/postgres/postgres-secret-extended.yaml @@ -1,7 +1,7 @@ kind: Secret apiVersion: v1 metadata: - name: postgres-secret + name: postgres-secret-extended stringData: username: user password: "password" diff --git a/manifests/kustomize/base/metadata/overlays/postgres/kustomization.yaml b/manifests/kustomize/base/metadata/overlays/postgres/kustomization.yaml new file mode 100644 index 0000000000..9f78bf3bbc --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/kustomization.yaml @@ -0,0 +1,38 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow + +bases: +- ../../base +resources: +- metadata-db-pvc.yaml +- metadata-db-deployment.yaml +- metadata-db-service.yaml + +patchesStrategicMerge: +- patches/metadata-grpc-deployment.yaml + +configMapGenerator: +- name: metadata-postgres-db-parameters + envs: + - params.env +secretGenerator: +- name: metadata-postgres-db-secrets + envs: + - secrets.env +generatorOptions: + disableNameSuffixHash: true + +images: +- name: postgres + newName: postgres + newTag: 14.7-alpine3.17 + +vars: +- name: MLMD_DB_HOST + objref: + kind: Service + name: metadata-postgres-db + apiVersion: v1 + fieldref: + fieldpath: metadata.name diff --git a/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-deployment.yaml b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-deployment.yaml new file mode 100644 index 0000000000..061d109e1e --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-deployment.yaml @@ -0,0 +1,43 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: metadata-postgres-db + labels: + component: db +spec: + selector: + matchLabels: + component: db + replicas: 1 + strategy: + type: Recreate + template: + metadata: + name: db + labels: + component: db + annotations: + sidecar.istio.io/inject: "false" + spec: + containers: + - name: db-container + image: postgres + env: + - name: PGDATA + value: /var/lib/postgresql/data/pgdata + envFrom: + - configMapRef: + name: metadata-postgres-db-parameters + - secretRef: + name: metadata-postgres-db-secrets + ports: + - name: postgres + containerPort: 5432 + volumeMounts: + - name: metadata-postgres + mountPath: /var/lib/postgresql/data + volumes: + - name: metadata-postgres + persistentVolumeClaim: + claimName: metadata-postgres + diff --git a/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-pvc.yaml b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-pvc.yaml new file mode 100644 index 0000000000..13790489fa --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: metadata-postgres +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-service.yaml b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-service.yaml new file mode 100644 index 0000000000..63902a6661 --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/metadata-db-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: metadata-postgres-db + labels: + component: db +spec: + type: ClusterIP + ports: + - port: 5432 + protocol: TCP + name: postgres + selector: + component: db diff --git a/manifests/kustomize/base/metadata/overlays/postgres/params.env b/manifests/kustomize/base/metadata/overlays/postgres/params.env new file mode 100644 index 0000000000..fce7e26772 --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/params.env @@ -0,0 +1,2 @@ +POSTGRES_PORT=5432 +POSTGRES_DBNAME=mlmdpostgres \ No newline at end of file diff --git a/manifests/kustomize/base/metadata/overlays/postgres/patches/metadata-grpc-deployment.yaml b/manifests/kustomize/base/metadata/overlays/postgres/patches/metadata-grpc-deployment.yaml new file mode 100644 index 0000000000..9f3c052b7b --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/patches/metadata-grpc-deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: metadata-grpc-deployment +spec: + template: + spec: + containers: + - name: container + # Remove existing environment variables + env: + - $patch: replace + envFrom: + - configMapRef: + name: metadata-postgres-db-parameters + - secretRef: + name: metadata-postgres-db-secrets + - configMapRef: + name: metadata-grpc-configmap + args: ["--grpc_port=$(METADATA_GRPC_SERVICE_PORT)", + "--metadata_source_config_type=postgresql", + "--postgres_config_host=$(MLMD_DB_HOST)", + "--postgres_config_port=$(POSTGRES_PORT)", + "--postgres_config_dbname=$(POSTGRES_DBNAME)", + "--postgres_config_user=$(POSTGRES_USER)", + "--postgres_config_password=$(POSTGRES_PASSWORD)", + # "--postgres_config_skip_db_creation=true", + "--enable_database_upgrade=true"] diff --git a/manifests/kustomize/base/metadata/overlays/postgres/secrets.env b/manifests/kustomize/base/metadata/overlays/postgres/secrets.env new file mode 100644 index 0000000000..973d158283 --- /dev/null +++ b/manifests/kustomize/base/metadata/overlays/postgres/secrets.env @@ -0,0 +1,2 @@ +POSTGRES_USER=root +POSTGRES_PASSWORD=password \ No newline at end of file diff --git a/manifests/kustomize/base/pipeline/postgres/kustomization.yaml b/manifests/kustomize/base/pipeline/postgres/kustomization.yaml deleted file mode 100644 index 08e991d972..0000000000 --- a/manifests/kustomize/base/pipeline/postgres/kustomization.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -bases: - - ../metadata-writer -resources: - - ml-pipeline-apiserver-deployment.yaml - - ../ml-pipeline-apiserver-role.yaml - - ../ml-pipeline-apiserver-rolebinding.yaml - - ../ml-pipeline-apiserver-sa.yaml - - ../ml-pipeline-apiserver-service.yaml - - ../ml-pipeline-persistenceagent-deployment.yaml - - ../ml-pipeline-persistenceagent-role.yaml - - ../ml-pipeline-persistenceagent-rolebinding.yaml - - ../ml-pipeline-persistenceagent-sa.yaml - - ../ml-pipeline-scheduledworkflow-deployment.yaml - - ../ml-pipeline-scheduledworkflow-role.yaml - - ../ml-pipeline-scheduledworkflow-rolebinding.yaml - - ../ml-pipeline-scheduledworkflow-sa.yaml - - ../ml-pipeline-ui-deployment.yaml - - ../ml-pipeline-ui-configmap.yaml - - ../ml-pipeline-ui-role.yaml - - ../ml-pipeline-ui-rolebinding.yaml - - ../ml-pipeline-ui-sa.yaml - - ../ml-pipeline-ui-service.yaml - - ../ml-pipeline-viewer-crd-role.yaml - - ../ml-pipeline-viewer-crd-rolebinding.yaml - - ../ml-pipeline-viewer-crd-deployment.yaml - - ../ml-pipeline-viewer-crd-sa.yaml - - ../ml-pipeline-visualization-deployment.yaml - - ../ml-pipeline-visualization-sa.yaml - - ../ml-pipeline-visualization-service.yaml - - ../pipeline-runner-role.yaml - - ../pipeline-runner-rolebinding.yaml - - ../pipeline-runner-sa.yaml - - ../container-builder-sa.yaml - - ../viewer-sa.yaml - - ../kfp-launcher-configmap.yaml -images: - - name: gcr.io/ml-pipeline/api-server - newTag: 2.0.0 - - name: gcr.io/ml-pipeline/persistenceagent - newTag: 2.0.0 - - name: gcr.io/ml-pipeline/scheduledworkflow - newTag: 2.0.0 - - name: gcr.io/ml-pipeline/frontend - newTag: 2.0.0 - - name: gcr.io/ml-pipeline/viewer-crd-controller - newTag: 2.0.0 - - name: gcr.io/ml-pipeline/visualization-server - newTag: 2.0.0 diff --git a/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml b/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml deleted file mode 100644 index 0cbd4bd729..0000000000 --- a/manifests/kustomize/base/pipeline/postgres/ml-pipeline-apiserver-deployment.yaml +++ /dev/null @@ -1,161 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: ml-pipeline - name: ml-pipeline -spec: - selector: - matchLabels: - app: ml-pipeline - template: - metadata: - labels: - app: ml-pipeline - annotations: - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - spec: - containers: - - env: - - name: AUTO_UPDATE_PIPELINE_DEFAULT_VERSION - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: autoUpdatePipelineDefaultVersion - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: OBJECTSTORECONFIG_SECURE - value: "false" - - name: OBJECTSTORECONFIG_BUCKETNAME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: bucketName - # relic variables - - name: DBCONFIG_USER - valueFrom: - secretKeyRef: - name: mysql-secret - key: username - - name: DBCONFIG_PASSWORD - valueFrom: - secretKeyRef: - name: mysql-secret - key: password - - name: DBCONFIG_DBNAME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: pipelineDb - - name: DBCONFIG_HOST - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: dbHost - - name: DBCONFIG_PORT - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: dbPort - # end of relic variables - - name: DBCONFIG_CONMAXLIFETIME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: ConMaxLifeTime - - name: DB_DRIVER_NAME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: dbType - # PostgreSQL Config - - name: DBCONFIG_POSTGRESQLCONFIG_USER - valueFrom: - secretKeyRef: - name: postgres-secret - key: username - - name: DBCONFIG_POSTGRESQLCONFIG_PASSWORD - valueFrom: - secretKeyRef: - name: postgres-secret - key: password - - name: DBCONFIG_POSTGRESQLCONFIG_DBNAME - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: pipelineDb - - name: DBCONFIG_POSTGRESQLCONFIG_HOST - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: postgresHost - - name: DBCONFIG_POSTGRESQLCONFIG_PORT - valueFrom: - configMapKeyRef: - name: pipeline-install-config - key: postgresPort - # end of PostgreSQL variables - - name: OBJECTSTORECONFIG_ACCESSKEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: accesskey - - name: OBJECTSTORECONFIG_SECRETACCESSKEY - valueFrom: - secretKeyRef: - name: mlpipeline-minio-artifact - key: secretkey - image: gcr.io/ml-pipeline/api-server:dummy - imagePullPolicy: IfNotPresent - name: ml-pipeline-api-server - ports: - - name: http - containerPort: 8888 - - name: grpc - containerPort: 8887 - readinessProbe: - exec: - command: - - wget - - -q # quiet - - -S # show server response - - -O - - "-" # Redirect output to stdout - - http://localhost:8888/apis/v1beta1/healthz - initialDelaySeconds: 3 - periodSeconds: 5 - timeoutSeconds: 2 - livenessProbe: - exec: - command: - - wget - - -q # quiet - - -S # show server response - - -O - - "-" # Redirect output to stdout - - http://localhost:8888/apis/v1beta1/healthz - initialDelaySeconds: 3 - periodSeconds: 5 - timeoutSeconds: 2 - # This startup probe provides up to a 60 second grace window before the - # liveness probe takes over to accomodate the occasional database - # migration. - startupProbe: - exec: - command: - - wget - - -q # quiet - - -S # show server response - - -O - - "-" # Redirect output to stdout - - http://localhost:8888/apis/v1beta1/healthz - failureThreshold: 12 - periodSeconds: 5 - timeoutSeconds: 2 - resources: - requests: - cpu: 250m - memory: 500Mi - serviceAccountName: ml-pipeline diff --git a/manifests/kustomize/base/cache/postgresql/cache-deployment.yaml b/manifests/kustomize/base/postgresql/cache/cache-deployment-patch.yaml similarity index 75% rename from manifests/kustomize/base/cache/postgresql/cache-deployment.yaml rename to manifests/kustomize/base/postgresql/cache/cache-deployment-patch.yaml index 8e6cc316ce..324925767a 100644 --- a/manifests/kustomize/base/cache/postgresql/cache-deployment.yaml +++ b/manifests/kustomize/base/postgresql/cache/cache-deployment-patch.yaml @@ -2,22 +2,13 @@ apiVersion: apps/v1 kind: Deployment metadata: name: cache-server - labels: - app: cache-server spec: - replicas: 1 - selector: - matchLabels: - app: cache-server template: - metadata: - labels: - app: cache-server spec: containers: - name: server - image: gcr.io/ml-pipeline/cache-server:dummy env: + - $patch: replace - name: DEFAULT_CACHE_STALENESS valueFrom: configMapKeyRef: @@ -49,21 +40,21 @@ spec: valueFrom: configMapKeyRef: name: pipeline-install-config - key: postgresDbHost + key: postgresHost - name: DBCONFIG_PORT valueFrom: configMapKeyRef: name: pipeline-install-config - key: postgresDbPort + key: postgresPort - name: DBCONFIG_USER valueFrom: secretKeyRef: - name: postgres-secret + name: postgres-secret-extended key: username - name: DBCONFIG_PASSWORD valueFrom: secretKeyRef: - name: postgres-secret + name: postgres-secret-extended key: password - name: NAMESPACE_TO_WATCH valueFrom: @@ -82,16 +73,3 @@ spec: "--namespace_to_watch=$(NAMESPACE_TO_WATCH)", "--listen_port=$(WEBHOOK_PORT)", ] - imagePullPolicy: Always - ports: - - containerPort: 8443 - name: webhook-api - volumeMounts: - - name: webhook-tls-certs - mountPath: /etc/webhook/certs - readOnly: true - volumes: - - name: webhook-tls-certs - secret: - secretName: webhook-server-tls - serviceAccountName: kubeflow-pipelines-cache diff --git a/manifests/kustomize/base/postgresql/cache/kustomization.yaml b/manifests/kustomize/base/postgresql/cache/kustomization.yaml new file mode 100644 index 0000000000..d4935432cc --- /dev/null +++ b/manifests/kustomize/base/postgresql/cache/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +bases: +- ../../cache +patchesStrategicMerge: +- cache-deployment-patch.yaml diff --git a/manifests/kustomize/base/postgresql/pipeline/kustomization.yaml b/manifests/kustomize/base/postgresql/pipeline/kustomization.yaml new file mode 100644 index 0000000000..3f87400eb3 --- /dev/null +++ b/manifests/kustomize/base/postgresql/pipeline/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +bases: + - ../../pipeline +patchesStrategicMerge: + - ml-pipeline-apiserver-deployment-patch.yaml diff --git a/manifests/kustomize/base/postgresql/pipeline/ml-pipeline-apiserver-deployment-patch.yaml b/manifests/kustomize/base/postgresql/pipeline/ml-pipeline-apiserver-deployment-patch.yaml new file mode 100644 index 0000000000..7d621691c3 --- /dev/null +++ b/manifests/kustomize/base/postgresql/pipeline/ml-pipeline-apiserver-deployment-patch.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-pipeline +spec: + template: + spec: + containers: + - name: ml-pipeline-api-server + env: + - $patch: replace + - name: AUTO_UPDATE_PIPELINE_DEFAULT_VERSION + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: autoUpdatePipelineDefaultVersion + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: OBJECTSTORECONFIG_SECURE + value: "false" + - name: OBJECTSTORECONFIG_BUCKETNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: bucketName + - name: DBCONFIG_CONMAXLIFETIME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: ConMaxLifeTime + - name: DB_DRIVER_NAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: dbType + # PostgreSQL Config + - name: DBCONFIG_POSTGRESQLCONFIG_USER + valueFrom: + secretKeyRef: + name: postgres-secret-extended + key: username + - name: DBCONFIG_POSTGRESQLCONFIG_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret-extended + key: password + - name: DBCONFIG_POSTGRESQLCONFIG_DBNAME + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: pipelineDb + - name: DBCONFIG_POSTGRESQLCONFIG_HOST + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresHost + - name: DBCONFIG_POSTGRESQLCONFIG_PORT + valueFrom: + configMapKeyRef: + name: pipeline-install-config + key: postgresPort + # end of PostgreSQL variables + - name: OBJECTSTORECONFIG_ACCESSKEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: accesskey + - name: OBJECTSTORECONFIG_SECRETACCESSKEY + valueFrom: + secretKeyRef: + name: mlpipeline-minio-artifact + key: secretkey \ No newline at end of file diff --git a/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml index 99e03a3e6c..00a9d4613b 100644 --- a/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml +++ b/manifests/kustomize/env/platform-agnostic-postgresql/kustomization.yaml @@ -3,7 +3,7 @@ kind: Kustomization bases: - ../../base/installs/generic/postgres - - ../../base/metadata/base + - ../../base/metadata/overlays/postgres - ../../third-party/argo/installs/namespace - ../../third-party/minio/base - ../../third-party/postgresql/base From cf0e0cf87c7a33f906e2bc31da8c5356ed75a831 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 24 Aug 2023 11:31:16 -0700 Subject: [PATCH 123/253] feat(sdk): enable dependency-free runtime install of kfp (#9886) * handle problematic imports * separate runtime and compile-time symbols * wrap kfp/__init__.py imports unavailable at runtime * update component factory + tests * add runtime tests * add --no-deps flag to component factory * update release notes * clean up * handle containerized python components * update golden snapshots * update component_factory unit tests * respond to review feedback * fix runtime test and compilation logic * update tests --- sdk/RELEASE.md | 12 +- sdk/python/kfp/__init__.py | 12 +- sdk/python/kfp/cli/component.py | 2 - sdk/python/kfp/dsl/__init__.py | 86 ++++---- sdk/python/kfp/dsl/component_factory.py | 64 ++++-- sdk/python/kfp/dsl/component_factory_test.py | 103 +++++++++- sdk/python/kfp/dsl/executor.py | 8 +- sdk/python/kfp/dsl/structures.py | 15 +- sdk/python/kfp/dsl/types/type_utils.py | 3 +- sdk/python/kfp/dsl/v1_components.py | 44 ---- sdk/python/kfp/dsl/v1_structures.py | 12 -- .../test_data/components/add_numbers.yaml | 10 +- .../component_with_metadata_fields.yaml | 10 +- .../component_with_pip_install.yaml | 8 +- .../component_with_task_final_status.yaml | 10 +- .../test_data/components/concat_message.yaml | 10 +- .../containerized_python_component.py | 26 +++ .../containerized_python_component.yaml | 70 +++++++ .../test_data/components/dict_input.yaml | 10 +- sdk/python/test_data/components/identity.yaml | 10 +- .../test_data/components/input_artifact.yaml | 10 +- .../test_data/components/nested_return.yaml | 10 +- .../test_data/components/output_metrics.yaml | 10 +- .../test_data/components/preprocess.yaml | 10 +- .../component_with_optional_inputs.yaml | 10 +- .../component_with_pip_index_urls.yaml | 8 +- .../components_with_optional_artifacts.yaml | 18 +- ...lightweight_python_functions_pipeline.yaml | 18 +- ...tweight_python_functions_with_outputs.yaml | 34 ++-- .../parallelfor_fan_in/artifacts_complex.yaml | 42 ++-- .../parallelfor_fan_in/artifacts_simple.yaml | 18 +- .../conditional_producer_and_consumers.yaml | 18 +- .../nested_with_parameters.yaml | 34 ++-- .../parameters_complex.yaml | 58 ++++-- .../parallelfor_fan_in/parameters_simple.yaml | 18 +- .../pipeline_producer_consumer.yaml | 34 ++-- .../pipelines/pipeline_as_exit_task.yaml | 34 ++-- .../pipelines/pipeline_in_pipeline.yaml | 18 +- .../pipeline_in_pipeline_complex.yaml | 18 +- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 26 ++- .../pipelines/pipeline_with_condition.yaml | 42 ++-- ...peline_with_dynamic_importer_metadata.yaml | 10 +- .../pipelines/pipeline_with_env.yaml | 10 +- .../pipelines/pipeline_with_exit_handler.yaml | 26 ++- .../pipeline_with_google_artifact_type.yaml | 14 +- .../pipelines/pipeline_with_importer.yaml | 18 +- .../pipelines/pipeline_with_loops.yaml | 66 +++--- .../pipeline_with_loops_and_conditions.yaml | 106 ++++++---- .../pipeline_with_metadata_fields.yaml | 18 +- .../pipeline_with_metrics_outputs.yaml | 18 +- .../pipeline_with_multiple_exit_handlers.yaml | 58 ++++-- .../pipeline_with_nested_conditions.yaml | 66 +++--- .../pipelines/pipeline_with_nested_loops.yaml | 26 ++- .../pipelines/pipeline_with_outputs.yaml | 18 +- ...pipeline_with_parallelfor_parallelism.yaml | 50 +++-- ...ipeline_with_params_containing_format.yaml | 26 ++- .../pipelines/pipeline_with_placeholders.yaml | 42 ++-- .../pipelines/pipeline_with_retry.yaml | 10 +- .../pipeline_with_task_final_status.yaml | 26 ++- ...th_task_using_ignore_upstream_failure.yaml | 18 +- sdk/python/test_data/test_data_config.yaml | 3 + .../execute_commands_args_test.py | 163 +++++++++++++++ .../pipeline_with_task_final_status.py | 58 ++++++ .../pipeline_with_task_final_status.yaml | 189 ++++++++++++++++++ test/presubmit-test-kfp-runtime-code.sh | 33 +++ 65 files changed, 1478 insertions(+), 607 deletions(-) delete mode 100644 sdk/python/kfp/dsl/v1_components.py create mode 100644 sdk/python/test_data/components/containerized_python_component.py create mode 100644 sdk/python/test_data/components/containerized_python_component.yaml create mode 100644 sdk/runtime_tests/execute_commands_args_test.py create mode 100644 sdk/runtime_tests/test_data/pipeline_with_task_final_status.py create mode 100644 sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml create mode 100755 test/presubmit-test-kfp-runtime-code.sh diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index a30a3d809d..2f77057d37 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -1,18 +1,8 @@ # Current Version (in development) -## Features - -## Breaking changes - -## Deprecations - -## Bug fixes and other changes - -## Documentation updates -# 2.1.2 ## Features -* Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9738](https://github.com/kubeflow/pipelines/pull/9738) +* Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9886](https://github.com/kubeflow/pipelines/pull/9886) ## Breaking changes diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 31a1d8253d..5bcc914a18 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -20,6 +20,12 @@ TYPE_CHECK = True -from kfp import components -from kfp import dsl -from kfp.client import Client +import os + +# compile-time only dependencies +if os.environ.get('_KFP_RUNTIME', 'false') != 'true': + # make `from kfp import components` and `from kfp import dsl` valid; + # related to namespace packaging issue + from kfp import components # noqa: keep unused import + from kfp import dsl # noqa: keep unused import + from kfp.client import Client # noqa: keep unused import diff --git a/sdk/python/kfp/cli/component.py b/sdk/python/kfp/cli/component.py index e09bd7b794..079c200fe3 100644 --- a/sdk/python/kfp/cli/component.py +++ b/sdk/python/kfp/cli/component.py @@ -39,8 +39,6 @@ _DOCKERFILE = 'Dockerfile' -# TODO: merge kfp_package_path into runtime-requirements.txt, once we have -# kfp_runtime package that is dependency-free. _DOCKERFILE_TEMPLATE = ''' FROM {base_image} diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index d3502a7287..a23b640fdb 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -14,31 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +# runtime dependencies __all__ = [ - 'component', - 'container_component', - 'pipeline', - 'importer', - 'ContainerSpec', - 'Condition', - 'ExitHandler', - 'ParallelFor', - 'Collected', 'Input', 'Output', 'InputPath', 'OutputPath', - 'IfPresentPlaceholder', - 'ConcatPlaceholder', 'PipelineTaskFinalStatus', - 'PIPELINE_JOB_NAME_PLACEHOLDER', - 'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER', - 'PIPELINE_JOB_ID_PLACEHOLDER', - 'PIPELINE_TASK_NAME_PLACEHOLDER', - 'PIPELINE_TASK_ID_PLACEHOLDER', - 'PIPELINE_ROOT_PLACEHOLDER', - 'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER', - 'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER', 'Artifact', 'ClassificationMetrics', 'Dataset', @@ -47,29 +29,18 @@ 'Metrics', 'Model', 'SlicedClassificationMetrics', - 'PipelineTask', + 'PIPELINE_JOB_NAME_PLACEHOLDER', + 'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER', + 'PIPELINE_JOB_ID_PLACEHOLDER', + 'PIPELINE_TASK_NAME_PLACEHOLDER', + 'PIPELINE_TASK_ID_PLACEHOLDER', + 'PIPELINE_ROOT_PLACEHOLDER', + 'PIPELINE_JOB_CREATE_TIME_UTC_PLACEHOLDER', + 'PIPELINE_JOB_SCHEDULE_TIME_UTC_PLACEHOLDER', ] +import os -try: - from typing import Annotated -except ImportError: - from typing_extensions import Annotated - -from typing import TypeVar - -from kfp.dsl.component_decorator import component -from kfp.dsl.container_component_decorator import container_component -from kfp.dsl.for_loop import Collected -from kfp.dsl.importer_node import importer -from kfp.dsl.pipeline_context import pipeline -from kfp.dsl.pipeline_task import PipelineTask -from kfp.dsl.placeholders import ConcatPlaceholder -from kfp.dsl.placeholders import IfPresentPlaceholder -from kfp.dsl.structures import ContainerSpec from kfp.dsl.task_final_status import PipelineTaskFinalStatus -from kfp.dsl.tasks_group import Condition -from kfp.dsl.tasks_group import ExitHandler -from kfp.dsl.tasks_group import ParallelFor from kfp.dsl.types.artifact_types import Artifact from kfp.dsl.types.artifact_types import ClassificationMetrics from kfp.dsl.types.artifact_types import Dataset @@ -83,8 +54,14 @@ from kfp.dsl.types.type_annotations import OutputAnnotation from kfp.dsl.types.type_annotations import OutputPath -# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated +from typing import TypeVar + +# hack: constants and custom type generics have to be defined here to be captured by autodoc and autodocsumm used in ./docs/conf.py PIPELINE_JOB_NAME_PLACEHOLDER = '{{$.pipeline_job_name}}' """A placeholder used to obtain a pipeline job name within a task at pipeline runtime. @@ -247,3 +224,32 @@ def my_pipeline(): producer_task = artifact_producer() artifact_consumer(model=producer_task.output) """ + +# compile-time only dependencies +if os.environ.get('_KFP_RUNTIME', 'false') != 'true': + from kfp.dsl.component_decorator import component + from kfp.dsl.container_component_decorator import container_component + from kfp.dsl.for_loop import Collected + from kfp.dsl.importer_node import importer + from kfp.dsl.pipeline_context import pipeline + from kfp.dsl.pipeline_task import PipelineTask + from kfp.dsl.placeholders import ConcatPlaceholder + from kfp.dsl.placeholders import IfPresentPlaceholder + from kfp.dsl.structures import ContainerSpec + from kfp.dsl.tasks_group import Condition + from kfp.dsl.tasks_group import ExitHandler + from kfp.dsl.tasks_group import ParallelFor + __all__.extend([ + 'component', + 'container_component', + 'pipeline', + 'importer', + 'ContainerSpec', + 'Condition', + 'ExitHandler', + 'ParallelFor', + 'Collected', + 'IfPresentPlaceholder', + 'ConcatPlaceholder', + 'PipelineTask', + ]) diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index 99d34f7828..cb43340b1c 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -21,6 +21,7 @@ import warnings import docstring_parser +import kfp from kfp.dsl import container_component_artifact_channel from kfp.dsl import container_component_class from kfp.dsl import graph_component @@ -109,24 +110,43 @@ def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str: def _get_packages_to_install_command( - package_list: Optional[List[str]] = None, - pip_index_urls: Optional[List[str]] = None) -> List[str]: + kfp_package_path: Optional[str] = None, + pip_index_urls: Optional[List[str]] = None, + packages_to_install: Optional[List[str]] = None, + install_kfp_package: bool = True, + target_image: Optional[str] = None, +) -> List[str]: + packages_to_install = packages_to_install or [] + kfp_in_user_pkgs = any(pkg.startswith('kfp') for pkg in packages_to_install) + # if the user doesn't say "don't install", they aren't building a + # container component, and they haven't already specified a KFP dep + # themselves, we install KFP for them + inject_kfp_install = install_kfp_package and target_image is None and not kfp_in_user_pkgs + if inject_kfp_install: + if kfp_package_path: + packages_to_install.append(kfp_package_path) + else: + packages_to_install.extend(_get_injected_kfp_imports()) + + if packages_to_install: + concat_package_list = ' '.join( + [repr(str(package)) for package in packages_to_install]) + index_url_options = make_index_url_options(pip_index_urls) - if not package_list: - return [] + install_python_packages_script = _install_python_packages_script_template.format( + index_url_options=index_url_options, + concat_package_list=concat_package_list) + return ['sh', '-c', install_python_packages_script] - concat_package_list = ' '.join( - [repr(str(package)) for package in package_list]) - index_url_options = make_index_url_options(pip_index_urls) - install_python_packages_script = _install_python_packages_script_template.format( - index_url_options=index_url_options, - concat_package_list=concat_package_list) - return ['sh', '-c', install_python_packages_script] + return [] -def _get_default_kfp_package_path() -> str: - import kfp - return f'kfp=={kfp.__version__}' +def _get_injected_kfp_imports() -> List[str]: + return [ + f'kfp=={kfp.__version__}', + '--no-deps', + 'typing-extensions>=3.7.4,<5; python_version<"3.9"', + ] def _get_function_source_definition(func: Callable) -> str: @@ -420,8 +440,9 @@ def _get_command_and_args_for_lightweight_component( '-ec', textwrap.dedent('''\ program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main \ + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main \ --component_module_path \ "$program_path/ephemeral_component.py" \ "$@" @@ -471,15 +492,14 @@ def create_component_from_func( The decorator is defined under component_decorator.py. See the decorator for the canonical documentation for this function. """ - packages_to_install = packages_to_install or [] - - if install_kfp_package and target_image is None: - if kfp_package_path is None: - kfp_package_path = _get_default_kfp_package_path() - packages_to_install.append(kfp_package_path) packages_to_install_command = _get_packages_to_install_command( - package_list=packages_to_install, pip_index_urls=pip_index_urls) + install_kfp_package=install_kfp_package, + target_image=target_image, + kfp_package_path=kfp_package_path, + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + ) command = [] args = [] diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py index 8f935ae3f0..883c406efd 100644 --- a/sdk/python/kfp/dsl/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -28,31 +28,112 @@ class TestGetPackagesToInstallCommand(unittest.TestCase): - def test_with_no_packages_to_install(self): + def test_with_no_user_packages_to_install(self): packages_to_install = [] command = component_factory._get_packages_to_install_command( - packages_to_install) + packages_to_install=packages_to_install) + + self.assertEqual(command, [ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ]) + + def test_with_no_user_packages_to_install_and_install_kfp_false(self): + packages_to_install = [] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + install_kfp_package=False, + ) + self.assertEqual(command, []) + + def test_with_no_user_packages_to_install_and_kfp_package_path(self): + packages_to_install = [] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python' + ) + + self.assertEqual(command, [ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n' + ]) + + def test_with_no_user_packages_to_install_and_kfp_package_path_and_install_kfp_false( + self): + packages_to_install = [] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python', + install_kfp_package=False, + ) self.assertEqual(command, []) - def test_with_packages_to_install_and_no_pip_index_url(self): + def test_with_user_packages_to_install_and_kfp_package_path_and_install_kfp_false( + self): + packages_to_install = ['sklearn'] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python', + install_kfp_package=False, + ) + + self.assertEqual(command, [ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n' + ]) + + def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image( + self): + packages_to_install = [] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + target_image='gcr.io/my-kfp-image', + kfp_package_path='./sdk/python') + + self.assertEqual(command, []) + + def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image_and_install_kfp_false( + self): + packages_to_install = [] + + command = component_factory._get_packages_to_install_command( + packages_to_install=packages_to_install, + target_image='gcr.io/my-kfp-image', + kfp_package_path='./sdk/python', + install_kfp_package=False) + + self.assertEqual(command, []) + + def test_with_user_packages_to_install_and_no_pip_index_url(self): packages_to_install = ['package1', 'package2'] command = component_factory._get_packages_to_install_command( - packages_to_install) - concat_command = ' '.join(command) - for package in packages_to_install: - self.assertTrue(package in concat_command) + packages_to_install=packages_to_install) + + self.assertEqual(command, [ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ]) def test_with_packages_to_install_with_pip_index_url(self): packages_to_install = ['package1', 'package2'] pip_index_urls = ['https://myurl.org/simple'] command = component_factory._get_packages_to_install_command( - packages_to_install, pip_index_urls) - concat_command = ' '.join(command) - for package in packages_to_install + pip_index_urls: - self.assertTrue(package in concat_command) + packages_to_install=packages_to_install, + pip_index_urls=pip_index_urls, + ) + + self.assertEqual(command, [ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ]) class TestInvalidParameterName(unittest.TestCase): diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp/dsl/executor.py index db8a8a89bd..e153f42f3f 100644 --- a/sdk/python/kfp/dsl/executor.py +++ b/sdk/python/kfp/dsl/executor.py @@ -16,7 +16,6 @@ import os from typing import Any, Callable, Dict, List, Optional, Union -from kfp.dsl import python_component from kfp.dsl import task_final_status from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations @@ -25,9 +24,10 @@ class Executor(): """Executor executes v2-based Python function components.""" - def __init__(self, executor_input: Dict, - function_to_execute: Union[Callable, - python_component.PythonComponent]): + def __init__( + self, executor_input: Dict, + function_to_execute: Union[Callable, + 'python_component.PythonComponent']): if hasattr(function_to_execute, 'python_func'): self._func = function_to_execute.python_func else: diff --git a/sdk/python/kfp/dsl/structures.py b/sdk/python/kfp/dsl/structures.py index 3e627617c8..d9e03dd947 100644 --- a/sdk/python/kfp/dsl/structures.py +++ b/sdk/python/kfp/dsl/structures.py @@ -25,7 +25,6 @@ import kfp from kfp.dsl import placeholders from kfp.dsl import utils -from kfp.dsl import v1_components from kfp.dsl import v1_structures from kfp.dsl.container_component_artifact_channel import \ ContainerComponentArtifactChannel @@ -872,7 +871,7 @@ def extract_description(component_yaml: str) -> Union[str, None]: is_v1 = 'implementation' in set(pipeline_spec_dict.keys()) if is_v1: - v1_component = v1_components._load_component_spec_from_component_text( + v1_component = _load_component_spec_from_component_text( component_yaml) return cls.from_v1_component_spec(v1_component) else: @@ -1073,3 +1072,15 @@ def load_documents_from_yaml(component_yaml: str) -> Tuple[dict, dict]: f'Expected one or two YAML documents in the IR YAML file. Got: {num_docs}.' ) return pipeline_spec_dict, platform_spec_dict + + +def _load_component_spec_from_component_text( + text) -> v1_structures.ComponentSpec: + component_dict = yaml.safe_load(text) + component_spec = v1_structures.ComponentSpec.from_dict(component_dict) + + # Calculating hash digest for the component + data = text if isinstance(text, bytes) else text.encode('utf-8') + data = data.replace(b'\r\n', b'\n') # Normalizing line endings + + return component_spec diff --git a/sdk/python/kfp/dsl/types/type_utils.py b/sdk/python/kfp/dsl/types/type_utils.py index 40723f4f1f..12a78eda38 100644 --- a/sdk/python/kfp/dsl/types/type_utils.py +++ b/sdk/python/kfp/dsl/types/type_utils.py @@ -20,7 +20,6 @@ import warnings import kfp -from kfp.dsl import structures from kfp.dsl import task_final_status from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations @@ -231,7 +230,7 @@ def _get_type_string_from_component_argument( def verify_type_compatibility( given_value: Union['pipeline_channel.PipelineChannel', str, bool, int, float, dict, list], - expected_spec: Union[structures.InputSpec, structures.OutputSpec], + expected_spec: Union['structures.InputSpec', 'structures.OutputSpec'], error_message_prefix: str, checks_input: bool = True, raise_on_error: bool = True, diff --git a/sdk/python/kfp/dsl/v1_components.py b/sdk/python/kfp/dsl/v1_components.py deleted file mode 100644 index 9714d56eef..0000000000 --- a/sdk/python/kfp/dsl/v1_components.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2018-2022 The Kubeflow Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import hashlib -import warnings - -from kfp.dsl import v1_structures -import yaml - - -def _load_component_spec_from_component_text( - text) -> v1_structures.ComponentSpec: - component_dict = yaml.safe_load(text) - component_spec = v1_structures.ComponentSpec.from_dict(component_dict) - - if isinstance(component_spec.implementation, - v1_structures.ContainerImplementation) and ( - component_spec.implementation.container.command is None): - warnings.warn( - 'Container component must specify command to be compatible with KFP ' - 'v2 compatible mode and emissary executor, which will be the default' - ' executor for KFP v2.' - 'https://www.kubeflow.org/docs/components/pipelines/installation/choose-executor/', - category=FutureWarning, - ) - - # Calculating hash digest for the component - data = text if isinstance(text, bytes) else text.encode('utf-8') - data = data.replace(b'\r\n', b'\n') # Normalizing line endings - digest = hashlib.sha256(data).hexdigest() - component_spec._digest = digest - - return component_spec diff --git a/sdk/python/kfp/dsl/v1_structures.py b/sdk/python/kfp/dsl/v1_structures.py index 661cef196f..57cc7c6375 100644 --- a/sdk/python/kfp/dsl/v1_structures.py +++ b/sdk/python/kfp/dsl/v1_structures.py @@ -16,7 +16,6 @@ from typing import Any, Dict, List, Mapping, Optional, Union from kfp.dsl.v1_modelbase import ModelBase -import yaml PrimitiveTypes = Union[str, int, float, bool] PrimitiveTypesIncludingNone = Optional[PrimitiveTypes] @@ -437,17 +436,6 @@ def verify_arg(arg): f'Argument "{argument}" references non-existing input.' ) - def save(self, file_path: str): - """Saves the component definition to file. - - It can be shared online and later loaded using the - load_component function. - """ - - component_yaml = yaml.dump(self.to_dict(), sort_keys=True) - with open(file_path, 'w') as f: - f.write(component_yaml) - class ComponentReference(ModelBase): """Component reference. diff --git a/sdk/python/test_data/components/add_numbers.yaml b/sdk/python/test_data/components/add_numbers.yaml index 5b5486da36..9831bb3943 100644 --- a/sdk/python/test_data/components/add_numbers.yaml +++ b/sdk/python/test_data/components/add_numbers.yaml @@ -32,15 +32,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -81,4 +83,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/component_with_metadata_fields.yaml b/sdk/python/test_data/components/component_with_metadata_fields.yaml index 61a41867cf..d83c24412d 100644 --- a/sdk/python/test_data/components/component_with_metadata_fields.yaml +++ b/sdk/python/test_data/components/component_with_metadata_fields.yaml @@ -48,15 +48,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -124,4 +126,4 @@ root: description: The concatenated string. parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/component_with_pip_install.yaml b/sdk/python/test_data/components/component_with_pip_install.yaml index 4e4335a204..5a867befd1 100644 --- a/sdk/python/test_data/components/component_with_pip_install.yaml +++ b/sdk/python/test_data/components/component_with_pip_install.yaml @@ -19,14 +19,16 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ + 3.9\"' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -46,4 +48,4 @@ root: taskInfo: name: component-with-pip-install schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/component_with_task_final_status.yaml b/sdk/python/test_data/components/component_with_task_final_status.yaml index ac138f7055..2f8f36a303 100644 --- a/sdk/python/test_data/components/component_with_task_final_status.yaml +++ b/sdk/python/test_data/components/component_with_task_final_status.yaml @@ -24,15 +24,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -61,4 +63,4 @@ root: isOptional: true parameterType: TASK_FINAL_STATUS schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/concat_message.yaml b/sdk/python/test_data/components/concat_message.yaml index 5dc62f9620..978f67b5d5 100644 --- a/sdk/python/test_data/components/concat_message.yaml +++ b/sdk/python/test_data/components/concat_message.yaml @@ -32,15 +32,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -82,4 +84,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/containerized_python_component.py b/sdk/python/test_data/components/containerized_python_component.py new file mode 100644 index 0000000000..041722d97f --- /dev/null +++ b/sdk/python/test_data/components/containerized_python_component.py @@ -0,0 +1,26 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from kfp import dsl + + +@dsl.component(base_image='python:3.7', target_image='kfp-image') +def concat_message(message1: str, message2: str) -> str: + return message1 + message2 + + +if __name__ == '__main__': + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=concat_message, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/components/containerized_python_component.yaml b/sdk/python/test_data/components/containerized_python_component.yaml new file mode 100644 index 0000000000..17c146a193 --- /dev/null +++ b/sdk/python/test_data/components/containerized_python_component.yaml @@ -0,0 +1,70 @@ +# PIPELINE DEFINITION +# Name: concat-message +# Inputs: +# message1: str +# message2: str +# Outputs: +# Output: str +components: + comp-concat-message: + executorLabel: exec-concat-message + inputDefinitions: + parameters: + message1: + parameterType: STRING + message2: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING +deploymentSpec: + executors: + exec-concat-message: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - concat_message + command: + - python3 + - -m + - kfp.dsl.executor_main + image: kfp-image +pipelineInfo: + name: concat-message +root: + dag: + outputs: + parameters: + Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: concat-message + tasks: + concat-message: + cachingOptions: + enableCache: true + componentRef: + name: comp-concat-message + inputs: + parameters: + message1: + componentInputParameter: message1 + message2: + componentInputParameter: message2 + taskInfo: + name: concat-message + inputDefinitions: + parameters: + message1: + parameterType: STRING + message2: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/dict_input.yaml b/sdk/python/test_data/components/dict_input.yaml index 977103a338..a3acf422be 100644 --- a/sdk/python/test_data/components/dict_input.yaml +++ b/sdk/python/test_data/components/dict_input.yaml @@ -23,15 +23,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -58,4 +60,4 @@ root: struct: parameterType: STRUCT schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/identity.yaml b/sdk/python/test_data/components/identity.yaml index b8a4551a9f..afb45e1bf4 100644 --- a/sdk/python/test_data/components/identity.yaml +++ b/sdk/python/test_data/components/identity.yaml @@ -29,15 +29,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -74,4 +76,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/input_artifact.yaml b/sdk/python/test_data/components/input_artifact.yaml index e029dd8161..935ccf999f 100644 --- a/sdk/python/test_data/components/input_artifact.yaml +++ b/sdk/python/test_data/components/input_artifact.yaml @@ -25,15 +25,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -63,4 +65,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/nested_return.yaml b/sdk/python/test_data/components/nested_return.yaml index 810215dcf3..db89274404 100644 --- a/sdk/python/test_data/components/nested_return.yaml +++ b/sdk/python/test_data/components/nested_return.yaml @@ -23,15 +23,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -61,4 +63,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/output_metrics.yaml b/sdk/python/test_data/components/output_metrics.yaml index 6a18a32d0b..59ff838903 100644 --- a/sdk/python/test_data/components/output_metrics.yaml +++ b/sdk/python/test_data/components/output_metrics.yaml @@ -27,15 +27,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -77,4 +79,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/components/preprocess.yaml b/sdk/python/test_data/components/preprocess.yaml index 03c46dbdac..8b117f75d2 100644 --- a/sdk/python/test_data/components/preprocess.yaml +++ b/sdk/python/test_data/components/preprocess.yaml @@ -56,15 +56,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,4 +173,4 @@ root: output_parameter_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index f53f6ae05d..c17a2dda7b 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,15 +29,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -68,4 +70,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index 59ebc83433..069b56c836 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -19,14 +19,16 @@ deploymentSpec: \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location --index-url\ \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ + 3.9\"' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -45,4 +47,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index 5bcf95a08e..be6e3b8456 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,15 +126,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -155,15 +157,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -237,4 +241,4 @@ root: schemaVersion: 0.0.1 isOptional: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index abc9a2995d..86942c1035 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -78,15 +78,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -130,15 +132,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -238,4 +242,4 @@ root: message: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index b7525f874c..34a2d445eb 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -81,15 +81,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -108,15 +110,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -135,15 +139,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -162,15 +168,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -273,4 +281,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml index ad5e32ce02..efaf520b65 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml @@ -285,15 +285,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -315,15 +317,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -345,15 +349,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -375,15 +381,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -403,15 +411,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -484,4 +494,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml index 55f5c8ae24..ebfe1626dc 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml @@ -90,15 +90,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -136,15 +138,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -209,4 +213,4 @@ root: schemaVersion: 0.0.1 isArtifactList: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index c2d8aae620..920854731b 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -132,15 +132,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -158,15 +160,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -225,4 +229,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index af4379d557..9d605894d6 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -150,15 +150,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -177,15 +179,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,15 +207,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -229,15 +235,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -283,4 +291,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index b76f1ad5b6..1c3ac78cff 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -224,15 +224,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -251,15 +253,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -277,15 +281,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -303,15 +309,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -330,15 +338,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -357,15 +367,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -383,15 +395,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -477,4 +491,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index 9bc16ff5b2..1775baf68f 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -75,15 +75,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -111,15 +113,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -180,4 +184,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index 18fc3aa052..84703103ae 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -206,15 +206,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -233,15 +235,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -259,15 +263,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -286,15 +292,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -356,4 +364,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index 42c88e3a68..acee25db35 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,15 +129,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -156,15 +158,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -183,15 +187,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -210,15 +216,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -262,4 +270,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index 9c8f5e0993..b5ccf82dc6 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,15 +74,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -101,15 +103,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -152,4 +156,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index 63ce9aceb0..89b94ee481 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,15 +161,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -188,15 +190,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -241,4 +245,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index ab7d67cac7..299f167fca 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,15 +152,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -179,15 +181,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -206,15 +210,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -264,4 +270,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index 5eed3984a5..fb3b2a18bf 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -88,15 +88,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -116,15 +118,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -144,15 +148,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,15 +177,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -198,15 +206,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -264,4 +274,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 6443b13909..881e90e849 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -94,15 +94,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -181,4 +183,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index 789a1e975d..190dcddb41 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -41,15 +41,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -79,4 +81,4 @@ root: taskInfo: name: print-env-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index b1c6091fe2..77b304058a 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,15 +65,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -92,15 +94,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -119,15 +123,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -171,4 +177,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index 6753ae29a0..ca47a62006 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -57,14 +57,16 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.1' 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ + 3.9\"' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -90,14 +92,16 @@ deploymentSpec: - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.0.1' && \"$0\" \"$@\"\n" + \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ + 3.9\"' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -150,4 +154,4 @@ root: taskInfo: name: model-producer schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index a7678237f6..7cbd1febcc 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -127,15 +127,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -159,15 +161,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -235,4 +239,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index 13999d852c..4ece667f08 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,15 +171,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -198,15 +200,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -224,15 +228,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -250,15 +256,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -276,15 +284,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -302,15 +312,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -328,15 +340,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -354,15 +368,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -424,4 +440,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index fbf6dd967b..2ee2812445 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,15 +602,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -631,15 +633,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -660,15 +664,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -688,15 +694,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -714,15 +722,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -741,15 +751,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -768,15 +780,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -795,15 +809,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -822,15 +838,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -849,15 +867,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -876,15 +896,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -903,15 +925,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -930,15 +954,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -1022,4 +1048,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 1aa009e344..66c29bd1f8 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,15 +60,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -95,15 +97,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -172,4 +176,4 @@ root: schemaVersion: 0.0.1 description: The final concatenated dataset. schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index d2091815bf..c77082feb7 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -60,15 +60,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -89,15 +91,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -148,4 +152,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index 3bbec7526c..f8f7a3a20b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,15 +125,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -152,15 +154,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -179,15 +183,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -206,15 +212,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -233,15 +241,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -260,15 +270,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -287,15 +299,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -389,4 +403,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index e81a303531..0acc74c83b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,15 +147,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -175,15 +177,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,15 +207,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -231,15 +237,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -259,15 +267,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -286,15 +296,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -313,15 +325,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -340,15 +354,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -426,4 +442,4 @@ root: taskInfo: name: print-op-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index 9b601893ed..32f83fc03b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -145,15 +145,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -172,15 +174,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -199,15 +203,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -256,4 +262,4 @@ root: isOptional: true parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index 1cba4dd0a2..478e3b776b 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,15 +104,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -131,15 +133,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -203,4 +207,4 @@ root: schemaTitle: system.Artifact schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index f1f3a5fa23..940b9e3673 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,15 +179,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -205,15 +207,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -231,15 +235,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -257,15 +263,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -283,15 +291,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -309,15 +319,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -357,4 +369,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index 6f31bc7deb..e00a15a3f0 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,15 +74,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -101,15 +103,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -128,15 +132,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -201,4 +207,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index 5a313c4ed4..df2aa2cfa3 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,15 +55,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -81,15 +83,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -107,15 +111,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -133,15 +139,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -159,15 +167,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -254,4 +264,4 @@ root: taskInfo: name: print-op-5 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index 34c474435b..137162068c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,15 +30,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -78,4 +80,4 @@ root: isOptional: true parameterType: NUMBER_DOUBLE schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index e53e19ac60..b95c0cebf4 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,15 +68,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -99,15 +101,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -126,15 +130,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -180,4 +186,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index 385cb4a1d4..da4c224ed7 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,15 +35,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -62,15 +64,17 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -117,4 +121,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.1 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/test_data_config.yaml b/sdk/python/test_data/test_data_config.yaml index 87958e130a..02aae9d1da 100644 --- a/sdk/python/test_data/test_data_config.yaml +++ b/sdk/python/test_data/test_data_config.yaml @@ -233,6 +233,9 @@ components: - module: component_with_task_final_status name: exit_comp execute: false + - module: containerized_python_component + name: concat_message + execute: false v1_components: test_data_dir: sdk/python/test_data/v1_component_yaml read: true diff --git a/sdk/runtime_tests/execute_commands_args_test.py b/sdk/runtime_tests/execute_commands_args_test.py new file mode 100644 index 0000000000..42b7672b32 --- /dev/null +++ b/sdk/runtime_tests/execute_commands_args_test.py @@ -0,0 +1,163 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import dataclasses +import json +import os +import re +import shutil +import subprocess +import tempfile +from typing import Any, Dict + +from absl.testing import parameterized +import yaml + +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'test_data') + + +@dataclasses.dataclass +class RuntimeTestConfig: + pipeline_file_relpath: str + executor_name: str + executor_input: Dict[str, Any] + + +TEST_CONFIGS = [ + RuntimeTestConfig( + pipeline_file_relpath=os.path.join( + TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), + executor_name='exec-print-op', + executor_input={ + 'inputs': { + 'parameterValues': { + 'message': 'Hello World!' + }, + 'parameters': { + 'message': { + 'stringValue': 'Hello World!' + } + } + }, + 'outputs': { + 'outputFile': + '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-18-50-32/print-op_-9063136771365142528/executor_output.json' + } + }, + ), + RuntimeTestConfig( + pipeline_file_relpath=os.path.join( + TEST_DATA_DIR, 'pipeline_with_task_final_status.yaml'), + executor_name='exec-exit-op', + executor_input={ + 'inputs': { + 'parameterValues': { + 'status': { + 'error': { + 'code': + 9, + 'message': + 'The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].' + }, + 'pipelineJobResourceName': + 'projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11', + 'pipelineTaskName': + 'my-pipeline', + 'state': + 'FAILED' + }, + 'user_input': 'Hello World!' + }, + 'parameters': { + 'status': { + 'stringValue': + "{\"error\":{\"code\":9,\"message\":\"The DAG failed because some tasks failed. The failed tasks are: [print-op, fail-op].\"},\"pipelineJobResourceName\":\"projects/271009669852/locations/us-central1/pipelineJobs/pipeline-with-task-final-status-07-14-2023-19-07-11\",\"pipelineTaskName\":\"my-pipeline\",\"state\":\"FAILED\"}" + }, + 'user_input': { + 'stringValue': 'Hello World!' + } + } + }, + 'outputs': { + 'outputFile': + '/gcs/cjmccarthy-kfp-default-bucket/271009669852/pipeline-with-task-final-status-07-14-2023-19-07-11/exit-op_-6100894116462198784/executor_output.json' + } + }, + ) +] + +PULL_NUMBER = None + + +def run_commands_and_args( + config: RuntimeTestConfig, + temp_dir: str, +) -> subprocess.CompletedProcess: + with open(config.pipeline_file_relpath) as f: + pipline_spec_dict = yaml.safe_load(f) + container = pipline_spec_dict['deploymentSpec']['executors'][ + config.executor_name]['container'] + + command_and_args = container['command'] + container['args'] + # https://docs.prow.k8s.io/docs/jobs/#job-environment-variables + # pip install from source in a container via a subprocess causes many + # permissions issue + # resolving by modifying the commands/args changes the commands/args + # so much that it renders the test less valuable, since the + # commands/args resemble the true runtime commands/args less well + # prefer the less invasive approach of installing from a PR + global PULL_NUMBER + if PULL_NUMBER is None: + if 'PULL_NUMBER' in os.environ: + PULL_NUMBER = os.environ['PULL_NUMBER'] + else: + PULL_NUMBER = input( + "Please provide the PR number for the kubeflow/pipelines PR that contains the changes you'd like to test:" + ) + + kfp_package_path = f'git+https://github.com/kubeflow/pipelines.git@refs/pull/{PULL_NUMBER}/merge#subdirectory=sdk/python' + command_and_args = [ + re.sub(r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", kfp_package_path, + cmd) for cmd in command_and_args + ] + executor_input_json = json.dumps(config.executor_input).replace( + '/gcs/', temp_dir) + command_and_args = [ + v.replace('{{$}}', executor_input_json) for v in command_and_args + ] + + return subprocess.run( + command_and_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + +class TestRuntime(parameterized.TestCase): + + @classmethod + def setUp(cls): + cls.temp_dir = tempfile.mkdtemp() + + @classmethod + def tearDown(cls): + shutil.rmtree(cls.temp_dir) + + @parameterized.parameters(TEST_CONFIGS) + def test(self, config: RuntimeTestConfig): + process = run_commands_and_args( + config=config, + temp_dir=self.temp_dir, + ) + self.assertEqual(process.returncode, 0, process.stderr) diff --git a/sdk/runtime_tests/test_data/pipeline_with_task_final_status.py b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.py new file mode 100644 index 0000000000..27d418a333 --- /dev/null +++ b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.py @@ -0,0 +1,58 @@ +# Copyright 2022 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Pipeline using ExitHandler with PipelineTaskFinalStatus.""" + +from kfp import compiler +from kfp import dsl +from kfp.dsl import component +from kfp.dsl import PipelineTaskFinalStatus + + +@component +def exit_op(user_input: str, status: PipelineTaskFinalStatus): + """Checks pipeline run status.""" + print('Pipeline status: ', status.state) + print('Job resource name: ', status.pipeline_job_resource_name) + print('Pipeline task name: ', status.pipeline_task_name) + print('Error code: ', status.error_code) + print('Error message: ', status.error_message) + + +@component +def print_op(message: str): + """Prints a message.""" + print(message) + + +@component +def fail_op(message: str): + """Fails.""" + import sys + print(message) + sys.exit(1) + + +@dsl.pipeline(name='pipeline-with-task-final-status') +def my_pipeline(message: str = 'Hello World!'): + exit_task = exit_op(user_input=message) + + with dsl.ExitHandler(exit_task, name='my-pipeline'): + print_op(message=message) + fail_op(message='Task failed.') + + +if __name__ == '__main__': + compiler.Compiler().compile( + pipeline_func=my_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml new file mode 100644 index 0000000000..b95c0cebf4 --- /dev/null +++ b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml @@ -0,0 +1,189 @@ +# PIPELINE DEFINITION +# Name: pipeline-with-task-final-status +# Inputs: +# message: str [Default: 'Hello World!'] +components: + comp-exit-handler-1: + dag: + tasks: + fail-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-fail-op + inputs: + parameters: + message: + runtimeValue: + constant: Task failed. + taskInfo: + name: fail-op + print-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-op + inputs: + parameters: + message: + componentInputParameter: pipelinechannel--message + taskInfo: + name: print-op + inputDefinitions: + parameters: + pipelinechannel--message: + parameterType: STRING + comp-exit-op: + executorLabel: exec-exit-op + inputDefinitions: + parameters: + status: + isOptional: true + parameterType: TASK_FINAL_STATUS + user_input: + parameterType: STRING + comp-fail-op: + executorLabel: exec-fail-op + inputDefinitions: + parameters: + message: + parameterType: STRING + comp-print-op: + executorLabel: exec-print-op + inputDefinitions: + parameters: + message: + parameterType: STRING +deploymentSpec: + executors: + exec-exit-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - exit_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef exit_op(user_input: str, status: PipelineTaskFinalStatus):\n\ + \ \"\"\"Checks pipeline run status.\"\"\"\n print('Pipeline status:\ + \ ', status.state)\n print('Job resource name: ', status.pipeline_job_resource_name)\n\ + \ print('Pipeline task name: ', status.pipeline_task_name)\n print('Error\ + \ code: ', status.error_code)\n print('Error message: ', status.error_message)\n\ + \n" + image: python:3.7 + exec-fail-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - fail_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n\ + \ print(message)\n sys.exit(1)\n\n" + image: python:3.7 + exec-print-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_op + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n\ + \ print(message)\n\n" + image: python:3.7 +pipelineInfo: + name: pipeline-with-task-final-status +root: + dag: + tasks: + exit-handler-1: + componentRef: + name: comp-exit-handler-1 + inputs: + parameters: + pipelinechannel--message: + componentInputParameter: message + taskInfo: + name: my-pipeline + exit-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-exit-op + dependentTasks: + - exit-handler-1 + inputs: + parameters: + status: + taskFinalStatus: + producerTask: exit-handler-1 + user_input: + componentInputParameter: message + taskInfo: + name: exit-op + triggerPolicy: + strategy: ALL_UPSTREAM_TASKS_COMPLETED + inputDefinitions: + parameters: + message: + defaultValue: Hello World! + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.1.2 diff --git a/test/presubmit-test-kfp-runtime-code.sh b/test/presubmit-test-kfp-runtime-code.sh new file mode 100755 index 0000000000..3e1196c647 --- /dev/null +++ b/test/presubmit-test-kfp-runtime-code.sh @@ -0,0 +1,33 @@ +#!/bin/bash -ex +# Copyright 2023 Kubeflow Pipelines contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex +source_root=$(pwd) + +pip install --upgrade pip +pip install pyyaml +pip install $(grep 'absl-py==' sdk/python/requirements-dev.txt) + +# precautionarilty uninstall typing-extensions, in case any of the test libs +# installed require this dep. we want to test that the kfp sdk installs it, so +# it cannot be present in the environment prior to test execution. +# we'd rather tests fail to execute (false positive failure) because a test +# lib was missing its dependency on typing-extensions than get a false +# negative from the actual kfp sdk test because typing-extensions was already +# present in the environment. +pip uninstall typing-extensions -y + +# run with unittest because pytest requires typing-extensions +python -m unittest discover -s sdk/runtime_tests -p '*_test.py' From 43a3c5c94bb15e6c46b5c537c32b0ba1b2341aa7 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Thu, 24 Aug 2023 15:44:53 -0700 Subject: [PATCH 124/253] chore(stalebot): Increase stalebot operations from 30 to 200. (#9928) --- .github/stale.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/stale.yml b/.github/stale.yml index ad4197e7f0..37fa74a5c8 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -19,3 +19,5 @@ markComment: > closeComment: > This issue has been automatically closed because it has not had recent activity. Please comment "/reopen" to reopen it. +# Learn more about operations: https://github.com/actions/stale#operations-per-run. +operations-per-run: 200 \ No newline at end of file From 63a0803e3a355b7cade2ddef69e7b57d96707436 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Fri, 25 Aug 2023 14:22:33 -0700 Subject: [PATCH 125/253] fix: Move stale GHA operation config to the right place (#9935) --- .github/stale.yml | 4 +--- .github/workflows/stale.yml | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/stale.yml b/.github/stale.yml index 37fa74a5c8..7232a69fed 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -18,6 +18,4 @@ markComment: > # Comment to post when closing a stale issue. Set to `false` to disable closeComment: > This issue has been automatically closed because it has not had recent - activity. Please comment "/reopen" to reopen it. -# Learn more about operations: https://github.com/actions/stale#operations-per-run. -operations-per-run: 200 \ No newline at end of file + activity. Please comment "/reopen" to reopen it. \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 725e6b632c..1d4fa1c740 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -35,3 +35,5 @@ jobs: stale-issue-label: 'lifecycle/stale' exempt-issue-labels: lifecycle/frozen exempt-pr-labels: lifecycle/frozen + # Learn more about operations: https://github.com/actions/stale#operations-per-run. + operations-per-run: 200 \ No newline at end of file From 110e0824812883b74c73b26603a78d8cc00548d5 Mon Sep 17 00:00:00 2001 From: Googler Date: Sun, 27 Aug 2023 03:44:58 -0700 Subject: [PATCH 126/253] feat(components): Update policy to reward model name mapping in function based component in _implementation/llm PiperOrigin-RevId: 560479276 --- .../_implementation/llm/function_based.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index 5cf77e053a..a71783d296 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -294,15 +294,15 @@ def resolve_reference_model_metadata( 'llama-2-7b-chat': reference_model_metadata( large_model_reference='LLAMA_2_7B_CHAT', reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b_chat/', - reward_model_reference='LLAMA_2_7B_CHAT', - reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b_chat/', + reward_model_reference='LLAMA_2_7B', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_7b/', is_supported=True, ), 'llama-2-13b-chat': reference_model_metadata( large_model_reference='LLAMA_2_13B_CHAT', reference_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b_chat/', - reward_model_reference='LLAMA_2_13B_CHAT', - reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b_chat/', + reward_model_reference='LLAMA_2_13B', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/llama/t5x_llama_2_13b/', is_supported=True, ), } From e3bf085997aabc9024eed1fd2e002f77cc4fc43e Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 28 Aug 2023 13:19:34 -0700 Subject: [PATCH 127/253] fix(components):Update batch_prediction_*_gcs_source to predictions_*_gcs_source in information retrieval preprocessor PiperOrigin-RevId: 560800444 --- .../component.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py index 63428f4485..47033772af 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -26,8 +26,8 @@ @container_component def llm_information_retrieval_preprocessor( gcp_resources: OutputPath(str), - batch_prediction_query_gcs_source: OutputPath(list), - batch_prediction_corpus_gcs_source: OutputPath(list), + predictions_query_gcs_source: OutputPath(list), + predictions_corpus_gcs_source: OutputPath(list), embedding_retrieval_gcs_source: OutputPath(str), project: str, location: str, @@ -103,10 +103,10 @@ def llm_information_retrieval_preprocessor( Returns: gcp_resources (str): Serialized gcp_resources proto tracking the custom job. - batch_prediction_query_gcs_source (list): + predictions_query_gcs_source (list): The GCS directory to save preprocessed query data to run batch prediction. - batch_prediction_corpus_gcs_source (list): + predictions_corpus_gcs_source (list): The GCS directory to save preprocessed corpus data to run batch prediction. embedding_retrieval_gcs_source (str): @@ -129,8 +129,8 @@ def llm_information_retrieval_preprocessor( f'--golden_docs_gcs_source={golden_docs_gcs_source}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--gcp_resources={gcp_resources}', - f'--batch_prediction_query_gcs_source={batch_prediction_query_gcs_source}', - f'--batch_prediction_corpus_gcs_source={batch_prediction_corpus_gcs_source}', + f'--predictions_query_gcs_source={predictions_query_gcs_source}', + f'--predictions_corpus_gcs_source={predictions_corpus_gcs_source}', f'--embedding_retrieval_gcs_source={embedding_retrieval_gcs_source}', f'--runner={runner}', f'--dataflow_service_account={dataflow_service_account}', From ba2440a842ca51966f4eccbbf35d6a54ed7c75ed Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Mon, 28 Aug 2023 19:07:33 -0700 Subject: [PATCH 128/253] chore(test): update marketplace snapshot (#9900) From bc5fe57378ab672ce87df468325d02da36eb580c Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Wed, 30 Aug 2023 13:28:07 -0700 Subject: [PATCH 129/253] chore(frontend): Refactor RecurringRunDetails to functional component (#9939) * Add alternative functional component for recurring run v2 details. * Remove unnecessary recurringRunDetailsV2FCProps * Add unit tests. Move the file to FC folder. Add updatebanner logic for error case Simplify the getInitialToolBar() helper. * Add new feature key "functional" to enable rendering functional component. (only for validation test now) * Remove handling error in useQuery. Change feature flags. Rename folder. * Resolve eslint warning * Avoid use recurringRun and experiment (object) as trigger for useEffect(). * Remove unused import. * Extract set() logic from useQuery. Add documentation for error handle useEffect(). --- frontend/src/features.ts | 11 +- .../src/pages/RecurringRunDetailsRouter.tsx | 8 +- .../src/pages/RecurringRunDetailsV2.test.tsx | 4 +- .../RecurringRunDetailsV2FC.test.tsx | 370 ++++++++++++++++++ .../RecurringRunDetailsV2FC.tsx | 249 ++++++++++++ 5 files changed, 639 insertions(+), 3 deletions(-) create mode 100644 frontend/src/pages/functional_components/RecurringRunDetailsV2FC.test.tsx create mode 100644 frontend/src/pages/functional_components/RecurringRunDetailsV2FC.tsx diff --git a/frontend/src/features.ts b/frontend/src/features.ts index a9e41f3567..769195139f 100644 --- a/frontend/src/features.ts +++ b/frontend/src/features.ts @@ -7,6 +7,9 @@ export interface Feature { export enum FeatureKey { V2 = 'v2', // Please start using V2_ALPHA instead of V2, because we have switched to V2_ALPHA as V2 feature is enabled by default. V2_ALPHA = 'v2_alpha', + FUNCTIONAL_COMPONENT = 'functional_component', + // We plan to refactor the class component to functional component. + // To avoid breacking current behavior, enable this feature to do the bugbash / validation test for functional components. } const FEATURE_V2 = { @@ -21,7 +24,13 @@ const FEATURE_V2_ALPHA = { active: true, }; -const features: Feature[] = [FEATURE_V2, FEATURE_V2_ALPHA]; +const FEATURE_FUNCTIONAL_COMPONENT = { + name: FeatureKey.FUNCTIONAL_COMPONENT, + description: 'Use functional component', + active: false, +}; + +const features: Feature[] = [FEATURE_V2, FEATURE_V2_ALPHA, FEATURE_FUNCTIONAL_COMPONENT]; declare global { var __FEATURE_FLAGS__: string; diff --git a/frontend/src/pages/RecurringRunDetailsRouter.tsx b/frontend/src/pages/RecurringRunDetailsRouter.tsx index c9a276056d..31919e3fa7 100644 --- a/frontend/src/pages/RecurringRunDetailsRouter.tsx +++ b/frontend/src/pages/RecurringRunDetailsRouter.tsx @@ -24,6 +24,8 @@ import * as WorkflowUtils from 'src/lib/v2/WorkflowUtils'; import { PageProps } from './Page'; import RecurringRunDetails from './RecurringRunDetails'; import RecurringRunDetailsV2 from './RecurringRunDetailsV2'; +import { RecurringRunDetailsV2FC } from 'src/pages/functional_components/RecurringRunDetailsV2FC'; +import { FeatureKey, isFeatureEnabled } from 'src/features'; // This is a router to determine whether to show V1 or V2 recurring run details page. export default function RecurringRunDetailsRouter(props: PageProps) { @@ -76,7 +78,11 @@ export default function RecurringRunDetailsRouter(props: PageProps) { if (getRecurringRunSuccess && v2RecurringRun && templateString) { const isV2Pipeline = WorkflowUtils.isPipelineSpec(templateString); if (isV2Pipeline) { - return ; + return isFeatureEnabled(FeatureKey.FUNCTIONAL_COMPONENT) ? ( + + ) : ( + + ); } } diff --git a/frontend/src/pages/RecurringRunDetailsV2.test.tsx b/frontend/src/pages/RecurringRunDetailsV2.test.tsx index e723c639a8..3e9eb3829a 100644 --- a/frontend/src/pages/RecurringRunDetailsV2.test.tsx +++ b/frontend/src/pages/RecurringRunDetailsV2.test.tsx @@ -98,7 +98,9 @@ describe('RecurringRunDetailsV2', () => { }; jest.clearAllMocks(); - jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + jest + .spyOn(features, 'isFeatureEnabled') + .mockImplementation(featureKey => featureKey === features.FeatureKey.V2_ALPHA); getRecurringRunSpy.mockImplementation(() => fullTestV2RecurringRun); getPipelineVersionSpy.mockImplementation(() => testPipelineVersion); diff --git a/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.test.tsx b/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.test.tsx new file mode 100644 index 0000000000..800a07e305 --- /dev/null +++ b/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.test.tsx @@ -0,0 +1,370 @@ +/* + * Copyright 2023 The Kubeflow Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { render, screen, waitFor } from '@testing-library/react'; +import * as React from 'react'; +import fs from 'fs'; +import * as JsYaml from 'js-yaml'; +import { CommonTestWrapper } from 'src/TestWrapper'; +import RecurringRunDetailsRouter from 'src/pages/RecurringRunDetailsRouter'; +import TestUtils from 'src/TestUtils'; +import { V2beta1RecurringRun, V2beta1RecurringRunStatus } from 'src/apisv2beta1/recurringrun'; +import { V2beta1PipelineVersion } from 'src/apisv2beta1/pipeline'; +import { Apis } from 'src/lib/Apis'; +import { PageProps } from 'src/pages/Page'; +import { RouteParams, RoutePage } from 'src/components/Router'; +import * as features from 'src/features'; + +const V2_PIPELINESPEC_PATH = 'src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml'; +const v2YamlTemplateString = fs.readFileSync(V2_PIPELINESPEC_PATH, 'utf8'); + +describe('RecurringRunDetailsV2FC', () => { + const updateBannerSpy = jest.fn(); + const updateDialogSpy = jest.fn(); + const updateSnackbarSpy = jest.fn(); + const updateToolbarSpy = jest.fn(); + const historyPushSpy = jest.fn(); + const historyReplaceSpy = jest.fn(); + const getRecurringRunSpy = jest.spyOn(Apis.recurringRunServiceApi, 'getRecurringRun'); + const deleteRecurringRunSpy = jest.spyOn(Apis.recurringRunServiceApi, 'deleteRecurringRun'); + const enableRecurringRunSpy = jest.spyOn(Apis.recurringRunServiceApi, 'enableRecurringRun'); + const disableRecurringRunSpy = jest.spyOn(Apis.recurringRunServiceApi, 'disableRecurringRun'); + const getExperimentSpy = jest.spyOn(Apis.experimentServiceApiV2, 'getExperiment'); + const getPipelineVersionSpy = jest.spyOn(Apis.pipelineServiceApiV2, 'getPipelineVersion'); + + let fullTestV2RecurringRun: V2beta1RecurringRun = {}; + let testPipelineVersion: V2beta1PipelineVersion = {}; + + function generateProps(): PageProps { + return { + history: { push: historyPushSpy, replace: historyReplaceSpy } as any, + location: '' as any, + match: { + params: { [RouteParams.recurringRunId]: fullTestV2RecurringRun.recurring_run_id }, + isExact: true, + path: '', + url: '', + }, + toolbarProps: { actions: {}, breadcrumbs: [], pageTitle: '' }, + updateBanner: updateBannerSpy, + updateDialog: updateDialogSpy, + updateSnackbar: updateSnackbarSpy, + updateToolbar: updateToolbarSpy, + }; + } + + beforeEach(() => { + fullTestV2RecurringRun = { + created_at: new Date(2018, 8, 5, 4, 3, 2), + description: 'test recurring run description', + display_name: 'test recurring run', + max_concurrency: '50', + no_catchup: true, + pipeline_version_reference: { + pipeline_id: 'test-pipeline-id', + pipeline_version_id: 'test-pipeline-version-id', + }, + recurring_run_id: 'test-recurring-run-id', + runtime_config: { parameters: { param1: 'value1' } }, + status: V2beta1RecurringRunStatus.ENABLED, + trigger: { + periodic_schedule: { + end_time: new Date(2018, 10, 9, 8, 7, 6), + interval_second: '3600', + start_time: new Date(2018, 9, 8, 7, 6), + }, + }, + } as V2beta1RecurringRun; + + testPipelineVersion = { + display_name: 'test_pipeline_version', + pipeline_id: 'test_pipeline_id', + pipeline_version_id: 'test_pipeline_version_id', + pipeline_spec: JsYaml.safeLoad(v2YamlTemplateString), + }; + + jest.clearAllMocks(); + // mock both v2_alpha and functional feature keys are enable. + jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + + getRecurringRunSpy.mockImplementation(() => fullTestV2RecurringRun); + getPipelineVersionSpy.mockImplementation(() => testPipelineVersion); + + deleteRecurringRunSpy.mockImplementation(); + enableRecurringRunSpy.mockImplementation(); + disableRecurringRunSpy.mockImplementation(); + getExperimentSpy.mockImplementation(); + }); + + it('renders a recurring run with periodic schedule', async () => { + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalledTimes(2); + expect(getPipelineVersionSpy).toHaveBeenCalled(); + }); + + screen.getByText('Enabled'); + screen.getByText('Yes'); + screen.getByText('Trigger'); + screen.getByText('Every 1 hours'); + screen.getByText('Max. concurrent runs'); + screen.getByText('50'); + screen.getByText('Catchup'); + screen.getByText('false'); + screen.getByText('param1'); + screen.getByText('value1'); + }); + + it('renders a recurring run with cron schedule', async () => { + const cronTestRecurringRun = { + ...fullTestV2RecurringRun, + no_catchup: undefined, // in api requests, it's undefined when false + trigger: { + cron_schedule: { + cron: '* * * 0 0 !', + end_time: new Date(2018, 10, 9, 8, 7, 6), + start_time: new Date(2018, 9, 8, 7, 6), + }, + }, + }; + getRecurringRunSpy.mockImplementation(() => cronTestRecurringRun); + + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + screen.getByText('Enabled'); + screen.getByText('Yes'); + screen.getByText('Trigger'); + screen.getByText('* * * 0 0 !'); + screen.getByText('Max. concurrent runs'); + screen.getByText('50'); + screen.getByText('Catchup'); + screen.getByText('true'); + }); + + it('loads the recurring run given its id in query params', async () => { + // The run id is in the router match object, defined inside generateProps + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + expect(getRecurringRunSpy).toHaveBeenLastCalledWith(fullTestV2RecurringRun.recurring_run_id); + expect(getExperimentSpy).not.toHaveBeenCalled(); + }); + + it('shows All runs -> run name when there is no experiment', async () => { + // The run id is in the router match object, defined inside generateProps + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + expect(updateToolbarSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + breadcrumbs: [{ displayName: 'All runs', href: RoutePage.RUNS }], + pageTitle: fullTestV2RecurringRun.display_name, + }), + ); + }); + + it('loads the recurring run and its experiment if it has one', async () => { + fullTestV2RecurringRun.experiment_id = 'test-experiment-id'; + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + expect(getRecurringRunSpy).toHaveBeenLastCalledWith(fullTestV2RecurringRun.recurring_run_id); + expect(getExperimentSpy).toHaveBeenLastCalledWith('test-experiment-id'); + }); + + it('shows Experiments -> Experiment name -> run name when there is an experiment', async () => { + fullTestV2RecurringRun.experiment_id = 'test-experiment-id'; + getExperimentSpy.mockImplementation(id => ({ + experiment_id: id, + display_name: 'test experiment name', + })); + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + expect(getExperimentSpy).toHaveBeenCalled(); + }); + + expect(updateToolbarSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + breadcrumbs: [ + { displayName: 'Experiments', href: RoutePage.EXPERIMENTS }, + { + displayName: 'test experiment name', + href: RoutePage.EXPERIMENT_DETAILS.replace( + ':' + RouteParams.experimentId, + 'test-experiment-id', + ), + }, + ], + pageTitle: fullTestV2RecurringRun.display_name, + }), + ); + }); + + it('shows error banner if run cannot be fetched', async () => { + TestUtils.makeErrorResponseOnce(getRecurringRunSpy, 'woops!'); + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + expect(updateBannerSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + additionalInfo: 'woops!', + message: `Error: failed to retrieve recurring run: ${fullTestV2RecurringRun.recurring_run_id}. Click Details for more information.`, + mode: 'error', + }), + ); + }); + + it('shows warning banner if has experiment but experiment cannot be fetched. still loads run', async () => { + fullTestV2RecurringRun.experiment_id = 'test-experiment-id'; + TestUtils.makeErrorResponseOnce(getExperimentSpy, 'woops!'); + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + }); + + expect(updateBannerSpy).toHaveBeenLastCalledWith( + expect.objectContaining({ + additionalInfo: 'woops!', + message: `Error: failed to retrieve this recurring run's experiment. Click Details for more information.`, + mode: 'warning', + }), + ); + + // "Still loads run" means that the details are still rendered successfully. + screen.getByText('Enabled'); + screen.getByText('Yes'); + screen.getByText('Trigger'); + screen.getByText('Every 1 hours'); + screen.getByText('Max. concurrent runs'); + screen.getByText('50'); + screen.getByText('Catchup'); + screen.getByText('false'); + screen.getByText('param1'); + screen.getByText('value1'); + }); + + it('shows top bar buttons', async () => { + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + expect(updateToolbarSpy).toHaveBeenCalledWith( + expect.objectContaining({ + actions: expect.objectContaining({ + cloneRecurringRun: expect.objectContaining({ title: 'Clone recurring run' }), + refresh: expect.objectContaining({ title: 'Refresh' }), + enableRecurringRun: expect.objectContaining({ title: 'Enable', disabled: true }), + disableRecurringRun: expect.objectContaining({ title: 'Disable', disabled: false }), + deleteRun: expect.objectContaining({ title: 'Delete' }), + }), + }), + ); + }); + }); + + it('enables Enable buttons if the run is disabled', async () => { + fullTestV2RecurringRun.status = V2beta1RecurringRunStatus.DISABLED; + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + expect(updateToolbarSpy).toHaveBeenCalledWith( + expect.objectContaining({ + actions: expect.objectContaining({ + cloneRecurringRun: expect.objectContaining({ title: 'Clone recurring run' }), + refresh: expect.objectContaining({ title: 'Refresh' }), + enableRecurringRun: expect.objectContaining({ title: 'Enable', disabled: false }), + disableRecurringRun: expect.objectContaining({ title: 'Disable', disabled: true }), + deleteRun: expect.objectContaining({ title: 'Delete' }), + }), + }), + ); + }); + }); + + it('shows enables Enable buttons if the run is undefined', async () => { + fullTestV2RecurringRun.status = undefined; + render( + + + , + ); + await waitFor(() => { + expect(getRecurringRunSpy).toHaveBeenCalled(); + expect(updateToolbarSpy).toHaveBeenCalledWith( + expect.objectContaining({ + actions: expect.objectContaining({ + cloneRecurringRun: expect.objectContaining({ title: 'Clone recurring run' }), + refresh: expect.objectContaining({ title: 'Refresh' }), + enableRecurringRun: expect.objectContaining({ title: 'Enable', disabled: false }), + disableRecurringRun: expect.objectContaining({ title: 'Disable', disabled: true }), + deleteRun: expect.objectContaining({ title: 'Delete' }), + }), + }), + ); + }); + }); +}); diff --git a/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.tsx b/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.tsx new file mode 100644 index 0000000000..0972d50fa2 --- /dev/null +++ b/frontend/src/pages/functional_components/RecurringRunDetailsV2FC.tsx @@ -0,0 +1,249 @@ +/* + * Copyright 2023 The Kubeflow Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useEffect, useState } from 'react'; +import { useQuery } from 'react-query'; +import Buttons, { ButtonKeys } from 'src/lib/Buttons'; +import DetailsTable from 'src/components/DetailsTable'; +import { V2beta1RecurringRun, V2beta1RecurringRunStatus } from 'src/apisv2beta1/recurringrun'; +import { V2beta1Experiment } from 'src/apisv2beta1/experiment'; +import { Apis } from 'src/lib/Apis'; +import { PageProps } from 'src/pages/Page'; +import { RoutePage, RouteParams } from 'src/components/Router'; +import { Breadcrumb, ToolbarProps } from 'src/components/Toolbar'; +import { classes } from 'typestyle'; +import { commonCss, padding } from 'src/Css'; +import { KeyValue } from 'src/lib/StaticGraphParser'; +import { formatDateString, enabledDisplayStringV2, errorToMessage } from 'src/lib/Utils'; +import { triggerDisplayString } from 'src/lib/TriggerUtils'; + +export function RecurringRunDetailsV2FC(props: PageProps) { + const { updateBanner, updateToolbar } = props; + const [refresh, setRefresh] = useState(true); + const [getRecurringRunErrMsg, setGetRecurringRunErrMsg] = useState(''); + const [getExperimentErrMsg, setGetExperimentErrMsg] = useState(''); + + // Related to Api Response + const [experimentName, setExperimentName] = useState(); + const [experimentIdFromApi, setExperimentIdFromApi] = useState(); + const [recurringRunName, setRecurringRunName] = useState(); + const [recurringRunIdFromApi, setRecurringRunIdFromApi] = useState(); + const [recurringRunStatus, setRecurringRunStatus] = useState(); + + const recurringRunId = props.match.params[RouteParams.recurringRunId]; + const Refresh = () => setRefresh(refreshed => !refreshed); + + const { + data: recurringRun, + error: getRecurringRunError, + refetch: refetchRecurringRun, + } = useQuery( + ['recurringRun', recurringRunId], + async () => { + return await Apis.recurringRunServiceApi.getRecurringRun(recurringRunId); + }, + { enabled: !!recurringRunId, staleTime: 0, cacheTime: 0 }, + ); + + const experimentId = recurringRun?.experiment_id!; + const { data: experiment, error: getExperimentError } = useQuery( + ['experiment'], + async () => { + return await Apis.experimentServiceApiV2.getExperiment(experimentId); + }, + { enabled: !!experimentId, staleTime: 0 }, + ); + + useEffect(() => { + if (recurringRun) { + setRecurringRunName(recurringRun.display_name); + setRecurringRunStatus(recurringRun.status); + setRecurringRunIdFromApi(recurringRun.recurring_run_id); + } + }, [recurringRun]); + + useEffect(() => { + if (experiment) { + setExperimentName(experiment.display_name); + setExperimentIdFromApi(experiment.experiment_id); + } + }, [experiment]); + + useEffect(() => { + const toolbarState = getInitialToolbarState(); + + toolbarState.actions[ButtonKeys.ENABLE_RECURRING_RUN].disabled = + recurringRunStatus === V2beta1RecurringRunStatus.ENABLED; + toolbarState.actions[ButtonKeys.DISABLE_RECURRING_RUN].disabled = + recurringRunStatus !== V2beta1RecurringRunStatus.ENABLED; + toolbarState.pageTitle = recurringRunName || recurringRunIdFromApi || 'Unknown recurring run'; + toolbarState.breadcrumbs = getBreadcrumbs(experimentIdFromApi, experimentName); + updateToolbar(toolbarState); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [ + recurringRunIdFromApi, + recurringRunName, + recurringRunStatus, + experimentIdFromApi, + experimentName, + ]); + + useEffect(() => { + if (getRecurringRunError) { + (async () => { + const errorMessage = await errorToMessage(getRecurringRunError); + setGetRecurringRunErrMsg(errorMessage); + })(); + } + + // getExperimentError is from the getExperiment useQuery which is enabled by the + // experiment ID in recurringRun object. => when getExperimentError changed, + // getRecurringRun useQuery must be successful (getRecurringRunError is null) + if (getExperimentError) { + (async () => { + const errorMessage = await errorToMessage(getExperimentError); + setGetExperimentErrMsg(errorMessage); + })(); + } + }, [getRecurringRunError, getExperimentError]); + + useEffect(() => { + if (getRecurringRunErrMsg) { + updateBanner({ + additionalInfo: getRecurringRunErrMsg ? getRecurringRunErrMsg : undefined, + message: + `Error: failed to retrieve recurring run: ${recurringRunId}.` + + (getRecurringRunErrMsg ? ' Click Details for more information.' : ''), + mode: 'error', + }); + } + + if (getExperimentErrMsg) { + updateBanner({ + additionalInfo: getExperimentErrMsg ? getExperimentErrMsg : undefined, + message: + `Error: failed to retrieve this recurring run's experiment.` + + (getExperimentErrMsg ? ' Click Details for more information.' : ''), + mode: 'warning', + }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [getRecurringRunErrMsg, getExperimentErrMsg]); + + useEffect(() => { + refetchRecurringRun(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [refresh]); + + const deleteCallback = (_: string[], success: boolean) => { + if (success) { + const breadcrumbs = props.toolbarProps.breadcrumbs; + const previousPage = breadcrumbs.length + ? breadcrumbs[breadcrumbs.length - 1].href + : RoutePage.EXPERIMENTS; + props.history.push(previousPage); + } + }; + + const getInitialToolbarState = (): ToolbarProps => { + const buttons = new Buttons(props, Refresh); + return { + actions: buttons + .cloneRecurringRun(() => (recurringRun ? [recurringRun.recurring_run_id!] : []), true) + .refresh(Refresh) + .enableRecurringRun(() => (recurringRun ? recurringRun.recurring_run_id! : '')) + .disableRecurringRun(() => (recurringRun ? recurringRun.recurring_run_id! : '')) + .delete( + () => (recurringRun ? [recurringRun.recurring_run_id!] : []), + 'recurring run config', + deleteCallback, + true /* useCurrentResource */, + ) + .getToolbarActionMap(), + breadcrumbs: [], + pageTitle: '', + }; + }; + + return ( +
+ {recurringRun && ( +
+
+ + + +
+
+ )} +
+ ); +} + +function getBreadcrumbs(experimentId?: string, experimentName?: string): Breadcrumb[] { + const breadcrumbs: Breadcrumb[] = []; + if (experimentId) { + breadcrumbs.push( + { displayName: 'Experiments', href: RoutePage.EXPERIMENTS }, + { + displayName: experimentName || 'Unknown experiment name', + href: RoutePage.EXPERIMENT_DETAILS.replace(':' + RouteParams.experimentId, experimentId), + }, + ); + } else { + breadcrumbs.push({ displayName: 'All runs', href: RoutePage.RUNS }); + } + + return breadcrumbs; +} + +function getRecurringRunDetails(recurringRun: V2beta1RecurringRun): Array> { + let details: Array> = []; + + details.push(['Description', recurringRun.description!]); + details.push(['Created at', formatDateString(recurringRun.created_at)]); + + return details; +} + +function getRunTriggers(recurringRun: V2beta1RecurringRun): Array> { + let triggers: Array> = []; + + triggers.push(['Enabled', enabledDisplayStringV2(recurringRun.trigger, recurringRun.status!)]); + triggers.push(['Trigger', triggerDisplayString(recurringRun.trigger)]); + triggers.push(['Max. concurrent runs', recurringRun.max_concurrency]); + triggers.push(['Catchup', `${!recurringRun.no_catchup}`]); + triggers.push(['Start time', '']); + + return triggers; +} + +function getRunParameters(recurringRun: V2beta1RecurringRun): Array> { + let parameters: Array> = []; + + parameters = Object.entries(recurringRun.runtime_config?.parameters || []).map(param => [ + param[0] || '', + param[1] || '', + ]); + + return parameters; +} From 6b7739d8b6317081675024557d6b6c91ac6e1d60 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Fri, 1 Sep 2023 10:39:35 -0700 Subject: [PATCH 130/253] chore(frontend): Refactor NewExperiment to functional component (#9948) * Add an alternative component for new experiment page. * Add unit tests. Assign default values for useState(). * Move the files to functional_components folder. Update the imported files from absolute path. * Move handing experiemnt creation error to useEffect rename the create helper to createExperiment. * Move redirection logic after createExperiment() is succeed to useEffect(). * Fix dependency array for handling createExperiment() succeed case. * Remove pipeline id in dependency array. --- frontend/src/pages/NewExperiment.tsx | 28 +- .../NewExperimentFC.test.tsx | 299 ++++++++++++++++++ .../functional_components/NewExperimentFC.tsx | 194 ++++++++++++ 3 files changed, 510 insertions(+), 11 deletions(-) create mode 100644 frontend/src/pages/functional_components/NewExperimentFC.test.tsx create mode 100644 frontend/src/pages/functional_components/NewExperimentFC.tsx diff --git a/frontend/src/pages/NewExperiment.tsx b/frontend/src/pages/NewExperiment.tsx index b3bd154696..7f1dc8fb13 100644 --- a/frontend/src/pages/NewExperiment.tsx +++ b/frontend/src/pages/NewExperiment.tsx @@ -15,21 +15,23 @@ */ import * as React from 'react'; -import BusyButton from '../atoms/BusyButton'; +import BusyButton from 'src/atoms/BusyButton'; import Button from '@material-ui/core/Button'; -import Input from '../atoms/Input'; +import Input from 'src/atoms/Input'; import { V2beta1Experiment } from 'src/apisv2beta1/experiment'; -import { Apis } from '../lib/Apis'; -import { Page, PageProps } from './Page'; -import { RoutePage, QUERY_PARAMS } from '../components/Router'; +import { Apis } from 'src/lib/Apis'; +import { Page, PageProps } from 'src/pages/Page'; +import { RoutePage, QUERY_PARAMS } from 'src/components/Router'; import { TextFieldProps } from '@material-ui/core/TextField'; -import { ToolbarProps } from '../components/Toolbar'; -import { URLParser } from '../lib/URLParser'; +import { ToolbarProps } from 'src/components/Toolbar'; +import { URLParser } from 'src/lib/URLParser'; import { classes, stylesheet } from 'typestyle'; -import { commonCss, padding, fontsize } from '../Css'; -import { logger, errorToMessage } from '../lib/Utils'; +import { commonCss, padding, fontsize } from 'src/Css'; +import { logger, errorToMessage } from 'src/lib/Utils'; import { NamespaceContext } from 'src/lib/KubeflowClient'; -import { getLatestVersion } from './NewRunV2'; +import { getLatestVersion } from 'src/pages/NewRunV2'; +import { NewExperimentFC } from 'src/pages/functional_components/NewExperimentFC'; +import { FeatureKey, isFeatureEnabled } from 'src/features'; interface NewExperimentState { description: string; @@ -201,7 +203,11 @@ export class NewExperiment extends Page<{ namespace?: string }, NewExperimentSta const EnhancedNewExperiment: React.FC = props => { const namespace = React.useContext(NamespaceContext); - return ; + return isFeatureEnabled(FeatureKey.FUNCTIONAL_COMPONENT) ? ( + + ) : ( + + ); }; export default EnhancedNewExperiment; diff --git a/frontend/src/pages/functional_components/NewExperimentFC.test.tsx b/frontend/src/pages/functional_components/NewExperimentFC.test.tsx new file mode 100644 index 0000000000..7ab083a6c5 --- /dev/null +++ b/frontend/src/pages/functional_components/NewExperimentFC.test.tsx @@ -0,0 +1,299 @@ +/* + * Copyright 2023 The Kubeflow Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; +import * as React from 'react'; +import { CommonTestWrapper } from 'src/TestWrapper'; +import TestUtils from 'src/TestUtils'; +import { NewExperimentFC } from './NewExperimentFC'; +import { Apis } from 'src/lib/Apis'; +import { PageProps } from 'src/pages/Page'; +import * as features from 'src/features'; +import { RoutePage, QUERY_PARAMS } from 'src/components/Router'; + +describe('NewExperiment', () => { + const TEST_EXPERIMENT_ID = 'new-experiment-id'; + const createExperimentSpy = jest.spyOn(Apis.experimentServiceApiV2, 'createExperiment'); + const historyPushSpy = jest.fn(); + const updateDialogSpy = jest.fn(); + const updateSnackbarSpy = jest.fn(); + const updateToolbarSpy = jest.fn(); + + function generateProps(): PageProps { + return { + history: { push: historyPushSpy } as any, + location: { pathname: RoutePage.NEW_EXPERIMENT } as any, + match: '' as any, + toolbarProps: { actions: {}, breadcrumbs: [], pageTitle: TEST_EXPERIMENT_ID }, + updateBanner: () => null, + updateDialog: updateDialogSpy, + updateSnackbar: updateSnackbarSpy, + updateToolbar: updateToolbarSpy, + }; + } + + beforeEach(() => { + jest.clearAllMocks(); + // mock both v2_alpha and functional feature keys are enable. + jest.spyOn(features, 'isFeatureEnabled').mockReturnValue(true); + + createExperimentSpy.mockImplementation(() => ({ + experiment_id: 'new-experiment-id', + display_name: 'new-experiment-name', + })); + }); + + it('does not include any action buttons in the toolbar', () => { + render( + + + , + ); + + expect(updateToolbarSpy).toHaveBeenCalledWith({ + actions: {}, + breadcrumbs: [{ displayName: 'Experiments', href: RoutePage.EXPERIMENTS }], + pageTitle: 'New experiment', + }); + }); + + it("enables the 'Next' button when an experiment name is entered", () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + }); + + it("re-disables the 'Next' button when an experiment name is cleared after having been entered", () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + // Remove experiment name + fireEvent.change(experimentNameInput, { target: { value: '' } }); + expect(nextButton.closest('button')?.disabled).toEqual(true); + }); + + it('updates the experiment name', () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + expect(experimentNameInput.closest('input')?.value).toBe('new-experiment-name'); + }); + + it('create new experiment', async () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const experimentDescriptionInput = screen.getByLabelText('Description'); + fireEvent.change(experimentDescriptionInput, { + target: { value: 'new-experiment-description' }, + }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'new-experiment-description', + display_name: 'new-experiment-name', + }), + ); + }); + }); + + it('create new experiment with namespace provided', async () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalledWith( + expect.objectContaining({ + description: '', + display_name: 'new-experiment-name', + namespace: 'test-ns', + }), + ); + }); + }); + + it('navigates to NewRun page upon successful creation', async () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalledWith( + expect.objectContaining({ + description: '', + display_name: 'new-experiment-name', + }), + ); + }); + expect(historyPushSpy).toHaveBeenCalledWith( + RoutePage.NEW_RUN + `?experimentId=${TEST_EXPERIMENT_ID}` + `&firstRunInExperiment=1`, + ); + }); + + it('includes pipeline ID and version ID in NewRun page query params if present', async () => { + const pipelineId = 'some-pipeline-id'; + const pipelineVersionId = 'version-id'; + const listPipelineVersionsSpy = jest.spyOn(Apis.pipelineServiceApiV2, 'listPipelineVersions'); + listPipelineVersionsSpy.mockImplementation(() => ({ + pipeline_versions: [{ pipeline_version_id: pipelineVersionId }], + })); + + const props = generateProps(); + props.location.search = `?${QUERY_PARAMS.pipelineId}=${pipelineId}`; + + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalledWith( + expect.objectContaining({ + description: '', + display_name: 'new-experiment-name', + }), + ); + }); + + expect(historyPushSpy).toHaveBeenCalledWith( + RoutePage.NEW_RUN + + `?experimentId=${TEST_EXPERIMENT_ID}` + + `&pipelineId=${pipelineId}` + + `&pipelineVersionId=${pipelineVersionId}` + + `&firstRunInExperiment=1`, + ); + }); + + it('shows snackbar confirmation after experiment is created', async () => { + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalledWith( + expect.objectContaining({ + description: '', + display_name: 'new-experiment-name', + }), + ); + }); + expect(updateSnackbarSpy).toHaveBeenLastCalledWith({ + autoHideDuration: 10000, + message: 'Successfully created new Experiment: new-experiment-name', + open: true, + }); + }); + + it('shows error dialog when experiment creation fails', async () => { + TestUtils.makeErrorResponseOnce(createExperimentSpy, 'There was something wrong!'); + render( + + + , + ); + + const experimentNameInput = screen.getByLabelText(/Experiment name/); + fireEvent.change(experimentNameInput, { target: { value: 'new-experiment-name' } }); + const nextButton = screen.getByText('Next'); + expect(nextButton.closest('button')?.disabled).toEqual(false); + + fireEvent.click(nextButton); + await waitFor(() => { + expect(createExperimentSpy).toHaveBeenCalled(); + }); + + expect(updateDialogSpy).toHaveBeenCalledWith( + expect.objectContaining({ + content: 'There was something wrong!', + title: 'Experiment creation failed', + }), + ); + }); + + it('navigates to experiment list page upon cancellation', () => { + render( + + + , + ); + + const cancelButton = screen.getByText('Cancel'); + fireEvent.click(cancelButton); + + expect(historyPushSpy).toHaveBeenCalledWith(RoutePage.EXPERIMENTS); + }); +}); diff --git a/frontend/src/pages/functional_components/NewExperimentFC.tsx b/frontend/src/pages/functional_components/NewExperimentFC.tsx new file mode 100644 index 0000000000..4cc53e8caf --- /dev/null +++ b/frontend/src/pages/functional_components/NewExperimentFC.tsx @@ -0,0 +1,194 @@ +/* + * Copyright 2023 The Kubeflow Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import React, { useEffect, useState } from 'react'; +import BusyButton from 'src/atoms/BusyButton'; +import Button from '@material-ui/core/Button'; +import Input from 'src/atoms/Input'; +import { V2beta1Experiment } from 'src/apisv2beta1/experiment'; +import { Apis } from 'src/lib/Apis'; +import { PageProps } from 'src/pages/Page'; +import { RoutePage, QUERY_PARAMS } from 'src/components/Router'; +import { URLParser } from 'src/lib/URLParser'; +import { classes, stylesheet } from 'typestyle'; +import { commonCss, padding, fontsize } from 'src/Css'; +import { errorToMessage } from 'src/lib/Utils'; +import { getLatestVersion } from 'src/pages/NewRunV2'; +import { useMutation } from 'react-query'; +import { V2beta1PipelineVersion } from 'src/apisv2beta1/pipeline'; + +const css = stylesheet({ + errorMessage: { + color: 'red', + }, + // TODO: move to Css.tsx and probably rename. + explanation: { + fontSize: fontsize.small, + }, +}); + +interface ExperimentProps { + namespace?: string; +} + +type NewExperimentFCProps = ExperimentProps & PageProps; + +export function NewExperimentFC(props: NewExperimentFCProps) { + const urlParser = new URLParser(props); + const { namespace, updateDialog, updateSnackbar, updateToolbar } = props; + const [description, setDescription] = useState(''); + const [experimentName, setExperimentName] = useState(''); + const [isbeingCreated, setIsBeingCreated] = useState(false); + const [errorMessage, setErrorMessage] = useState(''); + const [latestVersion, setLatestVersion] = useState(); + const [experimentResponse, setExperimentResponse] = useState(); + const [errMsgFromApi, setErrMsgFromApi] = useState(); + const pipelineId = urlParser.get(QUERY_PARAMS.pipelineId); + + useEffect(() => { + updateToolbar({ + actions: {}, + breadcrumbs: [{ displayName: 'Experiments', href: RoutePage.EXPERIMENTS }], + pageTitle: 'New experiment', + }); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + useEffect(() => { + if (pipelineId) { + (async () => { + setLatestVersion(await getLatestVersion(pipelineId)); + })(); + } + }, [pipelineId]); + + // Handle the redirection work when createExperiment is succeed + useEffect(() => { + if (experimentResponse) { + const searchString = pipelineId + ? new URLParser(props).build({ + [QUERY_PARAMS.experimentId]: experimentResponse.experiment_id || '', + [QUERY_PARAMS.pipelineId]: pipelineId, + [QUERY_PARAMS.pipelineVersionId]: latestVersion?.pipeline_version_id || '', + [QUERY_PARAMS.firstRunInExperiment]: '1', + }) + : new URLParser(props).build({ + [QUERY_PARAMS.experimentId]: experimentResponse.experiment_id || '', + [QUERY_PARAMS.firstRunInExperiment]: '1', + }); + props.history.push(RoutePage.NEW_RUN + searchString); + + updateSnackbar({ + autoHideDuration: 10000, + message: `Successfully created new Experiment: ${experimentResponse.display_name}`, + open: true, + }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [experimentResponse]); + + // Handle the error when createExperiment() is failed + useEffect(() => { + if (!experimentName) { + setErrorMessage('Experiment name is required'); + } else { + setErrorMessage(''); + } + }, [experimentName]); + + useEffect(() => { + if (errMsgFromApi) { + updateDialog({ + buttons: [{ text: 'Dismiss' }], + onClose: () => setIsBeingCreated(false), + content: errMsgFromApi, + title: 'Experiment creation failed', + }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [errMsgFromApi]); + + const newExperimentMutation = useMutation((experiment: V2beta1Experiment) => { + return Apis.experimentServiceApiV2.createExperiment(experiment); + }); + + const createExperiment = () => { + let newExperiment: V2beta1Experiment = { + display_name: experimentName, + description: description, + namespace: namespace, + }; + setIsBeingCreated(true); + + newExperimentMutation.mutate(newExperiment, { + onSuccess: response => { + setExperimentResponse(response); + }, + onError: async err => { + setErrMsgFromApi(await errorToMessage(err)); + }, + }); + }; + + return ( +
+
+
Experiment details
+
+ Think of an Experiment as a space that contains the history of all pipelines and their + associated runs +
+ + setExperimentName(event.target.value)} + value={experimentName} + autoFocus={true} + variant='outlined' + /> + setDescription(event.target.value)} + required={false} + value={description} + variant='outlined' + /> + +
+ + +
{errorMessage}
+
+
+
+ ); +} From 3fb199658f68e7debf4906d9ce32a9a307e39243 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 1 Sep 2023 12:07:33 -0700 Subject: [PATCH 131/253] chore(components): convert GCPC docstrings to markdown PiperOrigin-RevId: 562015575 --- components/google-cloud/docs/source/conf.py | 12 + .../feature_attribution_graph_component.py | 90 +++--- .../component.py | 26 +- .../llm_evaluation/component.py | 32 +- .../llm_retrieval_metrics/component.py | 5 +- .../llm_safety_bias/component.py | 17 +- .../evaluation_llm_safety_bias_pipeline.py | 12 +- .../container/utils/execution_context.py | 2 +- .../learn_to_learn_forecasting_pipeline.yaml | 80 ++--- ...ence_to_sequence_forecasting_pipeline.yaml | 80 ++--- ...sion_transformer_forecasting_pipeline.yaml | 80 ++--- ...es_dense_encoder_forecasting_pipeline.yaml | 80 ++--- ...ml_tabular_feature_selection_pipeline.yaml | 200 ++++++------ .../tabular/automl_tabular_v2_pipeline.yaml | 80 ++--- ...et_hyperparameter_tuning_job_pipeline.yaml | 42 +-- .../tabular/tabnet_trainer_pipeline.yaml | 42 +-- ...ep_hyperparameter_tuning_job_pipeline.yaml | 42 +-- .../wide_and_deep_trainer_pipeline.yaml | 42 +-- ...st_hyperparameter_tuning_job_pipeline.yaml | 42 +-- .../tabular/xgboost_trainer_pipeline.yaml | 42 +-- .../preview/llm/infer/component.py | 11 +- .../preview/llm/rlhf/component.py | 21 +- .../model_evaluation/data_bias_component.py | 36 +-- .../evaluation_llm_classification_pipeline.py | 23 +- ...evaluation_llm_text_generation_pipeline.py | 23 +- .../feature_attribution_graph_component.py | 90 +++--- .../model_evaluation/model_bias_component.py | 28 +- .../types/artifact_types.py | 31 +- .../google_cloud_pipeline_components/utils.py | 3 +- .../forecasting/prophet_predict_pipeline.yaml | 40 +-- .../forecasting/prophet_trainer_pipeline.yaml | 2 +- .../tabular/automl_tabular_pipeline.yaml | 200 ++++++------ .../v1/automl/training_job/__init__.py | 2 +- .../component.py | 34 +- .../automl_image_training_job/component.py | 14 +- .../automl_tabular_training_job/component.py | 20 +- .../automl_text_training_job/component.py | 10 +- .../automl_video_training_job/component.py | 8 +- .../v1/batch_predict_job/__init__.py | 2 +- .../v1/batch_predict_job/component.py | 156 +++++----- .../v1/bigquery/__init__.py | 2 +- .../v1/custom_job/__init__.py | 8 +- .../v1/custom_job/component.py | 36 +-- .../v1/custom_job/utils.py | 182 +++++------ .../v1/dataflow/__init__.py | 2 +- .../v1/dataflow/python_job/component.py | 2 +- .../v1/dataproc/__init__.py | 2 +- .../create_pyspark_batch/component.py | 20 +- .../dataproc/create_spark_batch/component.py | 14 +- .../create_spark_r_batch/component.py | 16 +- .../create_spark_sql_batch/component.py | 16 +- .../v1/dataset/__init__.py | 2 +- .../dataset/create_image_dataset/component.py | 14 +- .../create_tabular_dataset/component.py | 8 +- .../dataset/create_text_dataset/component.py | 14 +- .../create_time_series_dataset/component.py | 8 +- .../dataset/create_video_dataset/component.py | 16 +- .../dataset/export_image_dataset/component.py | 4 +- .../export_tabular_dataset/component.py | 4 +- .../dataset/export_text_dataset/component.py | 4 +- .../export_time_series_dataset/component.py | 4 +- .../dataset/export_video_dataset/component.py | 4 +- .../dataset/get_vertex_dataset/component.py | 6 +- .../dataset/import_image_dataset/component.py | 6 +- .../dataset/import_text_dataset/component.py | 8 +- .../dataset/import_video_dataset/component.py | 8 +- .../v1/endpoint/__init__.py | 2 +- .../v1/endpoint/create_endpoint/component.py | 14 +- .../v1/endpoint/delete_endpoint/component.py | 6 +- .../v1/endpoint/deploy_model/component.py | 26 +- .../v1/endpoint/undeploy_model/component.py | 6 +- .../v1/forecasting/__init__.py | 2 +- .../v1/hyperparameter_tuning_job/__init__.py | 8 +- .../v1/hyperparameter_tuning_job/component.py | 36 +-- .../v1/hyperparameter_tuning_job/utils.py | 15 +- .../v1/model/__init__.py | 2 +- .../v1/model/delete_model/component.py | 12 +- .../v1/model/export_model/component.py | 20 +- .../v1/model/upload_model/component.py | 22 +- .../classification_component.py | 44 +-- .../error_analysis_pipeline.py | 64 ++-- .../evaluated_annotation_pipeline.py | 58 ++-- ...ml_tabular_feature_attribution_pipeline.py | 284 +++++++++-------- .../evaluation_automl_tabular_pipeline.py | 215 +++++++------ ...uation_automl_unstructure_data_pipeline.py | 225 +++++++------- ...evaluation_feature_attribution_pipeline.py | 294 +++++++++--------- .../model_evaluation/forecasting_component.py | 26 +- .../model_evaluation/regression_component.py | 18 +- .../v1/vertex_notification_email/component.py | 4 +- .../v1/wait_gcp_resources/component.py | 10 +- components/google-cloud/setup.py | 1 + 91 files changed, 1804 insertions(+), 1822 deletions(-) diff --git a/components/google-cloud/docs/source/conf.py b/components/google-cloud/docs/source/conf.py index dc5f68c3ed..d8d574af93 100644 --- a/components/google-cloud/docs/source/conf.py +++ b/components/google-cloud/docs/source/conf.py @@ -19,6 +19,7 @@ import textwrap from typing import List +import commonmark import docstring_parser from google_cloud_pipeline_components import utils from kfp import components @@ -334,6 +335,17 @@ def process_named_docstring_returns(app, what, name, obj, options, lines): lines.extend([':returns:', '']) lines.extend(returns_section) + markdown_to_rst(app, what, name, obj, options, lines) + + +def markdown_to_rst(app, what, name, obj, options, lines): + md = '\n'.join(lines) + ast = commonmark.Parser().parse(md) + rst = commonmark.ReStructuredTextRenderer().render(ast) + lines.clear() + lines += rst.splitlines() + + def setup(app): app.connect('autodoc-process-docstring', process_named_docstring_returns) app.connect( diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py index f0ed330f1f..63d827946d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/feature_attribution/feature_attribution_graph_component.py @@ -69,21 +69,21 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -101,20 +101,19 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -126,37 +125,36 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -171,11 +169,11 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py index ea29082db0..24cfa5da95 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_classification_postprocessor/component.py @@ -81,11 +81,11 @@ def llm_classification_predictions_postprocessor_internal( (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC @@ -95,7 +95,7 @@ def llm_classification_predictions_postprocessor_internal( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this Custom Job will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. @@ -179,18 +179,18 @@ def llm_classification_predictions_postprocessor_graph_component( same order they appear in the batch predictions input file. display_name: The name of the custom job. machine_type: The machine type of this custom job. If not set, defaulted to - ``e2-highmem-16``. More details: + `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC @@ -200,7 +200,7 @@ def llm_classification_predictions_postprocessor_graph_component( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this Custom Job will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py index 8628bc66fc..972e752097 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_evaluation/component.py @@ -48,26 +48,26 @@ def model_evaluation_text_generation( """Computes evaluation metrics of a text generation model. Supports evaluating large language models performing the following generative - tasks: ``summarization``, ``question-answering``, and ``text-generation``. + tasks: `summarization`, `question-answering`, and `text-generation`. Args: project: The GCP project that runs the pipeline component. location: The GCP region that runs the pipeline component. evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that - specific task. Currently supported tasks are: ``summarization``, - ``question-answering`, and ``text-generation``. + specific task. Currently supported tasks are: `summarization`, + `question-answering`, and `text-generation`. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited - by ``.``. Alternatively referred to as the ground truth (or + by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. If not set, defaulted to - ``inputs.ground_truth``. + `inputs.ground_truth`. prediction_field_name: The full name path of the prediction field in the prediction file. Formatted to be able to find nested columns, delimited by - ``.``. If not set, defaulted to ``predictions.content``. + `.`. If not set, defaulted to `predictions.content`. predictions_format: The file format for the LLM Batch Prediction results. - ``jsonl`` is currently the only allowed format. If not set, defaulted to - ``jsonl``. + `jsonl` is currently the only allowed format. If not set, defaulted to + `jsonl`. joined_predictions_gcs_source: An Artifact with an URI pointing toward a GCS directory or a GCS file with joined prediction & ground truth files to be used for this evaluation. @@ -77,7 +77,7 @@ def model_evaluation_text_generation( only ground truth files to be used for this evaluation. display_name: The name of the evaluation custom job. machine_type: The machine type of this custom job. If not set, defaulted to - ``e2-highmem-16``. More details: + `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline @@ -87,11 +87,11 @@ def model_evaluation_text_generation( Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. reserved_ip_ranges: A list of names for the reserved ip ranges under the VPC @@ -101,12 +101,12 @@ def model_evaluation_text_generation( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. Returns: - evaluation_metrics: ``Metrics`` artifact representing the language model + evaluation_metrics: `Metrics` artifact representing the language model evaluation metrics. gcp_resources: Serialized gcp_resources proto tracking the custom job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py index 7a4be67fec..3dfe2d4ff0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py @@ -8,7 +8,6 @@ from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER - _IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' @@ -42,10 +41,10 @@ def llm_retrieval_metrics( location: Required. The GCP region that runs the pipeline component. golden_docs_pattern: Required. Files where queries and corresponding golden doc ids are saved. The path pattern can contain glob characters - (``*``, ``?``, and ``[...]`` sets). + (`*`, `?`, and `[...]` sets). embedding_retrieval_results_pattern: Required. Files where doc retrieval results for each query are saved. The path pattern can contain glob - characters (``*``, ``?``, and ``[...]`` sets). + characters (`*`, `?`, and `[...]` sets). retrieval_metrics_top_k_list: Required. k values for retrieval metrics, for example, precision@k, accuracy@k, etc. If more than one value, separated by comma. e.g., "1,5,10". diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py index 002bb7aeae..24ab20fa97 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/component.py @@ -21,7 +21,6 @@ from kfp.dsl import Output from kfp.dsl import OutputPath - _IMAGE_URI = 'us-docker.pkg.dev/vertex-ai-restricted/llm-eval/llm-bias:v0.2' @@ -49,7 +48,7 @@ def llm_safety_bias_metrics( directory with prediction results to be used for this evaluation. display_name: The display name of the evaluation custom job. machine_type: The machine type of this custom job. If not set, defaulted to - ``e2-highmem-16``. More details: + `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline @@ -59,24 +58,24 @@ def llm_safety_bias_metrics( Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. Returns: - llm_safety_bias_evaluation_metrics: ``Artifact`` tracking the LLM safety + llm_safety_bias_evaluation_metrics: `Artifact` tracking the LLM safety bias evaluation metrics output. gcp_resources: Serialized gcp_resources proto tracking the custom job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py index 0c57b6d7ef..c0c5fcf75b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_safety_bias/evaluation_llm_safety_bias_pipeline.py @@ -129,17 +129,17 @@ def evaluation_llm_safety_bias_pipeline( Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. diff --git a/components/google-cloud/google_cloud_pipeline_components/container/utils/execution_context.py b/components/google-cloud/google_cloud_pipeline_components/container/utils/execution_context.py index c336cb8033..f43b53680f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/utils/execution_context.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/utils/execution_context.py @@ -21,7 +21,7 @@ class ExecutionContext: """Execution context for running inside Google Cloud Pipeline Components. The base class is aware of the GCPC environment and can cascade - a pipeline cancel event to the operation through ``on_cancel`` handler. + a pipeline cancel event to the operation through `on_cancel` handler. Args: on_cancel: optional, function to handle KFP cancel event. """ diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml index 3fad373d5c..03944e1674 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -4037,7 +4037,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4045,25 +4045,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4089,7 +4089,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4166,37 +4166,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4499,7 +4499,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4507,25 +4507,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4551,7 +4551,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4628,37 +4628,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml index 4eb9e7da3c..469ebc3139 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -4019,7 +4019,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4027,25 +4027,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4071,7 +4071,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4148,37 +4148,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4481,7 +4481,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4489,25 +4489,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4533,7 +4533,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4610,37 +4610,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml index c7f96df41c..eb1ab81b2a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -4012,7 +4012,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4020,25 +4020,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4064,7 +4064,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4141,37 +4141,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4474,7 +4474,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4482,25 +4482,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4526,7 +4526,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4603,37 +4603,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml index e7b6bcedd8..1280f38b7d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -4037,7 +4037,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4045,25 +4045,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4089,7 +4089,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4166,37 +4166,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4499,7 +4499,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4507,25 +4507,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4551,7 +4551,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4628,37 +4628,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml index 4993452158..2ff954e4ac 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -5346,7 +5346,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5354,25 +5354,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5398,7 +5398,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5475,37 +5475,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5808,7 +5808,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5816,25 +5816,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5860,7 +5860,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5937,37 +5937,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6270,7 +6270,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6278,25 +6278,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6322,7 +6322,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6399,37 +6399,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6732,7 +6732,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6740,25 +6740,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6784,7 +6784,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6861,37 +6861,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7194,7 +7194,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7202,25 +7202,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7246,7 +7246,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7323,37 +7323,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml index 8889594111..c2ac05de8e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -4441,7 +4441,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4449,25 +4449,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4493,7 +4493,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4570,37 +4570,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4903,7 +4903,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4911,25 +4911,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4955,7 +4955,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5032,37 +5032,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml index 59208869c2..10076631fb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -1768,7 +1768,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1776,25 +1776,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1820,7 +1820,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1897,37 +1897,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2410,7 +2410,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml index f0133e6d52..9cf550cd89 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -1721,7 +1721,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1729,25 +1729,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1773,7 +1773,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1850,37 +1850,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2363,7 +2363,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml index 42683860a8..d4bc2cdf2f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -1706,7 +1706,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1714,25 +1714,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1758,7 +1758,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1835,37 +1835,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2243,7 +2243,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml index 06e25c73d9..625b0b3b19 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -1674,7 +1674,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1682,25 +1682,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1726,7 +1726,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1803,37 +1803,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2211,7 +2211,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml index 13071125a4..1c37b19c30 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -1815,7 +1815,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1823,25 +1823,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1867,7 +1867,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1944,37 +1944,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2352,7 +2352,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml index 185af0e76a..9101e59a3d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -2079,7 +2079,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -2087,25 +2087,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has `google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2131,7 +2131,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2208,37 +2208,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2616,7 +2616,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index 161b6202c2..03be883b3c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -22,7 +22,6 @@ from google_cloud_pipeline_components._implementation.llm import private_text_importer import kfp - PipelineOutput = NamedTuple('Outputs', output_prediction_gcs_path=str) @@ -46,14 +45,14 @@ def infer_pipeline( Args: large_model_reference: Name of the base model. Supported values are - ``text-bison@001``, ``t5-small``, ``t5-large``, ``t5-xl`` and ``t5-xxl``. - ``text-bison@001`` and ``t5-small`` are supported in ``us-central1` and - ``europe-west4``. ``t5-large``, ``t5-xl`` and ``t5-xxl`` are only - supported in ``europe-west4``. + `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. + `text-bison@001` and `t5-small` are supported in `us-central1` and + `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in + `europe-west4`. model_checkpoint: Cloud storage path to the model checkpoint. prompt_dataset: Cloud storage path to an unlabled prompt dataset used for reinforcement learning. The dataset format is jsonl. Each example in the - dataset must have an ``input_text`` field that contains the prompt. + dataset must have an `input_text` field that contains the prompt. prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index d2b0da0f97..175ea233b5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -28,7 +28,6 @@ from google_cloud_pipeline_components.preview.llm.infer import component import kfp - PipelineOutput = NamedTuple( 'Outputs', model_resource_name=str, endpoint_resource_name=str ) @@ -61,17 +60,17 @@ def rlhf_pipeline( Args: prompt_dataset: Cloud storage path to an unlabled prompt dataset used for reinforcement learning. The dataset format is jsonl. Each example in the - dataset must have an ``input_text`` field that contains the prompt. + dataset must have an `input_text` field that contains the prompt. preference_dataset: Cloud storage path to a human preference dataset used to train a reward model. The dataset format is jsonl. Each example in the - dataset must contain the following fields: ``input_text`` that contains - the prompt, ``candidate_0`` and ``candidate_1`` that contain candidate - responses, ``choice`` that specifies the preferred candidate. + dataset must contain the following fields: `input_text` that contains the + prompt, `candidate_0` and `candidate_1` that contain candidate responses, + `choice` that specifies the preferred candidate. large_model_reference: Name of the base model. Supported values are - ``text-bison@001``, ``t5-small``, ``t5-large``, ``t5-xl`` and ``t5-xxl``. - ``text-bison@001`` and ``t5-small`` are supported in ``us-central1` and - ``europe-west4``. ``t5-large``, ``t5-xl`` and ``t5-xxl`` are only - supported in ``europe-west4``. + `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. + `text-bison@001` and `t5-small` are supported in `us-central1` and + `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in + `europe-west4`. model_display_name: Name of the fine-tuned model shown in the Model Registry. If not provided, a default name will be created. prompt_sequence_length: Maximum tokenized sequence length for input text. @@ -103,12 +102,12 @@ def rlhf_pipeline( follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field. - deploy_model: Whether to deploy the model to an endpoint in ``us-central1``. + deploy_model: Whether to deploy the model to an endpoint in `us-central1`. Default is True. eval_dataset: Optional Cloud storage path to an evaluation dataset. If provided, inference will be performed on this dataset after training. The dataset format is jsonl. Each example in the dataset must contain a field - ``input_text`` that contains the prompt. + `input_text` that contains the prompt. project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used. location: Location used to run custom jobs. If not specified the location diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py index 3b25fac5a0..c3d2d6f7ae 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py @@ -53,43 +53,43 @@ def detect_data_bias( location: Location for running data bias detection. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, - delimited by ``.``. Alternatively referred to as the ground truth (or + delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. bias_configs: A list of - ``google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig``. + `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. When provided, compute data bias metrics for each defined slice. Below is an example of how to format this input. 1: First, create a BiasConfig. - ``from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig`` + `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - ``bias_config = BiasConfig(bias_slices=SliceSpec(configs={ - 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))`` + `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ + 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` 2: Create a list to store the bias configs into. - ``bias_configs = []`` + `bias_configs = []` 3: Format each BiasConfig into a JSON or Dict. - ``bias_config_json = json_format.MessageToJson(bias_config`` or - ``bias_config_dict = json_format.MessageToDict(bias_config).`` + `bias_config_json = json_format.MessageToJson(bias_config` or + `bias_config_dict = json_format.MessageToDict(bias_config).` 4: Combine each bias_config JSON into a list. - ``bias_configs.append(bias_config_json)`` + `bias_configs.append(bias_config_json)` 5: Finally, pass bias_configs as an parameter for this component. - ``DetectDataBiasOp(bias_configs=bias_configs)`` - dataset_format: The file format for the dataset. ``jsonl`` and ``csv`` are the + `DetectDataBiasOp(bias_configs=bias_configs)` + dataset_format: The file format for the dataset. `jsonl` and `csv` are the currently allowed formats. dataset_storage_source_uris: Google Cloud - Storage URI(-s) to unmanaged test datasets.``jsonl`` and ``csv`` is currently - allowed format. If ``dataset`` is also provided, this field will be + Storage URI(-s) to unmanaged test datasets.`jsonl` and `csv` is currently + allowed format. If `dataset` is also provided, this field will be overriden by the provided Vertex Dataset. - dataset: A ``google.VertexDataset`` - artifact of the dataset. If ``dataset_gcs_source`` is also provided, this + dataset: A `google.VertexDataset` + artifact of the dataset. If `dataset_gcs_source` is also provided, this Vertex Dataset argument will override the GCS source. encryption_spec_key_name: Customer-managed encryption key options for the Dataflow. If this is set, then all resources created by the Dataflow will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index 2df0303395..4461332987 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -23,7 +23,6 @@ from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp from kfp import dsl - _PIPELINE_NAME = 'evaluation-llm-classification-pipeline' @@ -60,7 +59,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -78,7 +77,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that specific task. Currently supported Classification tasks is: - ``text-classification``. + `text-classification`. evaluation_class_labels: The JSON array of class names for the target_field, in the same order they appear in the batch predictions input file. batch_predict_instances_format: The format in which instances are given, @@ -90,7 +89,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. machine_type: The machine type of the custom jobs in this pipeline. If not - set, defaulted to ``e2-highmem-16`. More details: + set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline @@ -100,18 +99,18 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_disk_size_gb: The disk size (in GB) of the machine executing the - evaluation run. If not set, defaulted to ``50``. + evaluation run. If not set, defaulted to `50`. dataflow_max_num_workers: The max number of workers executing the evaluation - run. If not set, defaulted to ``5``. + run. If not set, defaulted to `5`. dataflow_service_account: Custom service account to run Dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: @@ -121,7 +120,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index 827bf9dcc5..d93487ade3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -22,7 +22,6 @@ from kfp import dsl from kfp.dsl import Metrics - _PIPELINE_NAME = 'evaluation-llm-text-generation-pipeline' @@ -46,8 +45,8 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul """LLM Text Generation Evaluation pipeline. This pipeline supports evaluating large language models, publisher or managed - models, performing the following generative tasks: ``summarization``, - ``question-answering``, and ``text-generation``. + models, performing the following generative tasks: `summarization`, + `question-answering`, and `text-generation`. Args: project: The GCP project that runs the pipeline components. @@ -67,8 +66,8 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul resources. evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that - specific task. Currently supported tasks are: ``summarization``, - ``question-answering``, ``text-generation``. + specific task. Currently supported tasks are: `summarization`, + `question-answering`, `text-generation`. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. Only "jsonl" is currently supported. For more details about this input config, see @@ -79,7 +78,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. machine_type: The machine type of this custom job. If not set, defaulted to - ``e2-highmem-16``. More details: + `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline @@ -89,17 +88,17 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name, as in ``myVPC``. To specify this field, you must have - already configured VPC Network Peering for Vertex AI + be peered. For example, `projects/12345/global/networks/myVPC`. Format is + of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a + network name, as in `myVPC`. To specify this field, you must have already + configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py index 7eda608857..610d041eed 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py @@ -68,21 +68,21 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -100,20 +100,19 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -125,37 +124,36 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -170,11 +168,11 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py index 1442977634..d069656f81 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py @@ -52,10 +52,10 @@ def detect_model_bias( location: Location for running data bias detection. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited - by ``.``. Alternatively referred to as the ground truth (or + by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. predictions_format: The file format for the batch prediction results. - ``jsonl`` is the only currently allow format. + `jsonl` is the only currently allow format. predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named @@ -65,33 +65,33 @@ def detect_model_bias( data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". bias_configs: A list of - ``google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig``. + `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. When provided, compute model bias metrics for each defined slice. Below is an example of how to format this input. 1: First, create a BiasConfig. - ``from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig`` + `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - ``bias_config = BiasConfig(bias_slices=SliceSpec(configs={ - 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))`` + `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ + 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` 2: Create a list to store the bias configs into. - ``bias_configs = []`` + `bias_configs = []` 3: Format each BiasConfig into a JSON or Dict. - ``bias_config_json = json_format.MessageToJson(bias_config`` or - ``bias_config_dict = json_format.MessageToDict(bias_config)`` + `bias_config_json = json_format.MessageToJson(bias_config` or + `bias_config_dict = json_format.MessageToDict(bias_config)` 4: Combine each bias_config JSON into a list. - ``bias_configs.append(bias_config_json)`` + `bias_configs.append(bias_config_json)` 5: Finally, pass bias_configs as an parameter for this component. - ``DetectModelBiasOp(bias_configs=bias_configs)`` + `DetectModelBiasOp(bias_configs=bias_configs)` thresholds: A list of float values to be used as prediction decision thresholds. encryption_spec_key_name: Customer-managed encryption key options for the Dataflow. If this is set, then all resources created by the Dataflow will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py index 1878b50432..4d770a98c2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py +++ b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py @@ -14,13 +14,13 @@ """Artifact types corresponding to Google Cloud Resources produced and consumed by GCPC components. These artifact types can be used in your custom KFP SDK components similarly to -other `KFP SDK artifacts -`_. +other [KFP SDK +artifacts](https://www.kubeflow.org/docs/components/pipelines/v2/data-types/artifacts/). If you wish to produce Google artifacts from your own components, it is -recommended that you use `Containerized Python Components -`_. +recommended that you use [Containerized Python +Components](https://www.kubeflow.org/docs/components/pipelines/v2/components/containerized-python-components/). You should assign metadata to the Google artifacts according to the artifact's -schema (provided by each artifact's ``.schema`` attribute). +schema (provided by each artifact's `.schema` attribute). """ @@ -39,13 +39,14 @@ import textwrap from typing import Any, Dict, Optional + from kfp import dsl _RESOURCE_NAME_KEY = 'resourceName' class VertexModel(dsl.Artifact): - """An artifact representing a Vertex AI `Model resource `_.""" + """An artifact representing a Vertex AI [Model resource](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models).""" schema_title = 'google.VertexModel' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -86,7 +87,7 @@ def create( class VertexEndpoint(dsl.Artifact): - """An artifact representing a Vertex AI `Endpoint resource `_.""" + """An artifact representing a Vertex AI [Endpoint resource](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints).""" schema_title = 'google.VertexEndpoint' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -127,7 +128,7 @@ def create( class VertexBatchPredictionJob(dsl.Artifact): - """An artifact representing a Vertex AI `BatchPredictionJob resource `_.""" + """An artifact representing a Vertex AI [BatchPredictionJob resource](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#resource:-batchpredictionjob).""" schema_title = 'google.VertexBatchPredictionJob' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -195,7 +196,7 @@ def create( class VertexDataset(dsl.Artifact): - """An artifact representing a Vertex AI `Dataset resource `_.""" + """An artifact representing a Vertex AI [Dataset resource](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets).""" schema_title = 'google.VertexDataset' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -236,7 +237,7 @@ def create( class BQMLModel(dsl.Artifact): - """An artifact representing a Google Cloud `BQML Model resource `_.""" + """An artifact representing a Google Cloud [BQML Model resource](https://cloud.google.com/bigquery/docs/reference/rest/v2/models).""" schema_title = 'google.BQMLModel' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -282,7 +283,7 @@ def create( class BQTable(dsl.Artifact): - """An artifact representing a Google Cloud `BQ Table resource `_.""" + """An artifact representing a Google Cloud [BQ Table resource](https://cloud.google.com/bigquery/docs/reference/rest/v2/tables).""" schema_title = 'google.BQTable' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -330,7 +331,7 @@ def create( class UnmanagedContainerModel(dsl.Artifact): - """An artifact representing a Vertex AI `unmanaged container model `_.""" + """An artifact representing a Vertex AI [unmanaged container model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ModelContainerSpec).""" schema_title = 'google.UnmanagedContainerModel' schema_version = '0.0.1' schema = textwrap.dedent("""\ @@ -410,7 +411,7 @@ def create( class ClassificationMetrics(dsl.Artifact): - """An artifact representing evaluation `classification metrics `_.""" + """An artifact representing evaluation [classification metrics](https://cloud.google.com/vertex-ai/docs/tabular-data/classification-regression/evaluate-model#classification_1).""" schema_title = 'google.ClassificationMetrics' schema_version = '0.0.1' @@ -586,7 +587,7 @@ def create( class RegressionMetrics(dsl.Artifact): - """An artifact representing evaluation `regression metrics `_.""" + """An artifact representing evaluation [regression metrics](https://cloud.google.com/vertex-ai/docs/tabular-data/classification-regression/evaluate-model#regression_1).""" schema_title = 'google.RegressionMetrics' schema_version = '0.0.1' @@ -652,7 +653,7 @@ def create( class ForecastingMetrics(dsl.Artifact): - """An artifact representing evaluation `forecasting metrics `_.""" + """An artifact representing evaluation [forecasting metrics](https://cloud.google.com/vertex-ai/docs/tabular-data/forecasting/evaluate-model#metrics).""" schema_title = 'google.ForecastingMetrics' schema_version = '0.0.1' diff --git a/components/google-cloud/google_cloud_pipeline_components/utils.py b/components/google-cloud/google_cloud_pipeline_components/utils.py index 7e7032e7e5..3ff0ba53d2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/utils.py @@ -27,7 +27,6 @@ from google.protobuf import json_format - # note: this is a slight dependency on KFP SDK implementation details # other code should not similarly depend on the stability of kfp.placeholders DOCS_INTEGRATED_OUTPUT_RENAMING_PREFIX = "output__" @@ -46,7 +45,7 @@ def build_serverless_customjob_container_spec( project: Project to run the job in. location: Location to run the job in. custom_job_payload: Payload to pass to the custom job. This dictionary is - serialized and passed as the custom job ``--payload``. + serialized and passed as the custom job `--payload`. gcp_resources: GCP resources that can be used to track the job. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml index 843c8412d1..3ccdd129b2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -901,7 +901,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -909,25 +909,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -953,7 +953,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1030,37 +1030,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml index a89f24fe42..aff359fcc6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -1863,7 +1863,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml index b251e1779e..f9b15715d9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -5223,7 +5223,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5231,25 +5231,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5275,7 +5275,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5352,37 +5352,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5685,7 +5685,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5693,25 +5693,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5737,7 +5737,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5814,37 +5814,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6147,7 +6147,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6155,25 +6155,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6199,7 +6199,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6276,37 +6276,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6609,7 +6609,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6617,25 +6617,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6661,7 +6661,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6738,37 +6738,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7071,7 +7071,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7079,25 +7079,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has ``google.rpc.Status` - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7123,7 +7123,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7200,37 +7200,37 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py index 73fff4032e..3b00d88901 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create `Vertex AI AutoML training jobs `_ for image, text, video, and forecasting.""" +"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide) for image, text, video, and forecasting.""" from google_cloud_pipeline_components.v1.automl.training_job.automl_forecasting_training_job.component import automl_forecasting_training_job as AutoMLForecastingTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_image_training_job.component import automl_image_training_job as AutoMLImageTrainingJobRunOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py index 003a672ac4..d214a3a255 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py @@ -81,14 +81,14 @@ def automl_forecasting_training_job( If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + Any of `training_fraction_split`, `validation_fraction_split` and + `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. + If using predefined splits, `predefined_split_column_name` must be provided. Supported only for tabular Datasets. Timestamp splits: Assigns input data to training, validation, and test sets @@ -122,8 +122,8 @@ def automl_forecasting_training_job( The amount of time into the future for which forecasted values for the target are returned. Expressed in number of units defined by the [data_granularity_unit] and [data_granularity_count] field. Inclusive. - data_granularity_unit: The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_unit: The data granularity unit. Accepted values are `minute`, + `hour`, `day`, `week`, `month`, `year`. data_granularity_count: The number of data granularity units between data points in the training data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other values of [data_granularity_unit], must be 1. @@ -135,8 +135,8 @@ def automl_forecasting_training_job( the Model. This is ignored if Dataset is not provided. predefined_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or - value in the column) must be one of {``TRAIN``, - ``VALIDATE``, ``TEST``}, and it defines to which set the + value in the column) must be one of {`TRAIN`, + `VALIDATE`, `TEST`}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. @@ -167,10 +167,10 @@ def automl_forecasting_training_job( If False, then the export is not performed. export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. Expected format: - ``bq://::`` + `bq://::
` If not specified, then results are exported to the following auto-created BigQuery table: - ``:export_evaluated_examples__.evaluated_examples`` + `:export_evaluated_examples__.evaluated_examples` Applies only if [export_evaluated_data_items] is True. export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], if the table exists, for exported test set predictions. If False, and the @@ -232,10 +232,10 @@ def automl_forecasting_training_job( model_version_description: The description of the model version being uploaded by this job. hierarchy_group_columns: A list of time series attribute column names that define the time series hierarchy. Only one level of hierarchy is - supported, ex. ``region`` for a hierarchy of stores or - ``department`` for a hierarchy of products. If multiple columns + supported, ex. `region` for a hierarchy of stores or + `department` for a hierarchy of products. If multiple columns are specified, time series will be grouped by their combined - values, ex. (``blue``, ``large``) for ``color`` and ``size``, up + values, ex. (`blue`, `large`) for `color` and `size`, up to 5 columns are accepted. If no group columns are specified, all time series are considered to be part of the same group. hierarchy_group_total_weight: The weight of the loss for predictions aggregated over @@ -249,14 +249,14 @@ def automl_forecasting_training_job( booleans; if the value of the row is True, generate a sliding window from that row. window_stride_length: Step length used to generate input examples. Every - ``window_stride_length`` rows will be used to generate a sliding + `window_stride_length` rows will be used to generate a sliding window. window_max_count: Number of rows that should be used to generate input examples. If the total row count is larger than this number, the input data will be randomly sampled to hit the count. holiday_regions: The geographical regions to use when creating holiday features. This option is only allowed when data_granularity_unit - is ``day``. Acceptable values can come from any of the following + is `day`. Acceptable values can come from any of the following levels: Top level: GLOBAL Second level: continental regions @@ -312,17 +312,17 @@ def automl_forecasting_training_job( training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py index 5ade101a9a..f68fe04169 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py @@ -54,8 +54,8 @@ def automl_image_training_job( If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + Any of `training_fraction_split`, `validation_fraction_split` and + `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. @@ -66,8 +66,8 @@ def automl_image_training_job( containing DataItems. If any of the filters in this message are to match nothing, then they can be set as '-' (the minus sign). - If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and - ``test_filter_split`` must be provided. + If using filter splits, all of `training_filter_split`, `validation_filter_split` and + `test_filter_split` must be provided. Supported only for unstructured Datasets. @@ -202,17 +202,17 @@ def automl_image_training_job( training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py index 1b680aae56..f77d56cc92 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py @@ -65,14 +65,14 @@ def automl_tabular_training_job( If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + Any of `training_fraction_split`, `validation_fraction_split` and + `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. + If using predefined splits, `predefined_split_column_name` must be provided. Supported only for tabular Datasets. Timestamp splits: Assigns input data to training, validation, and test sets @@ -99,8 +99,8 @@ def automl_tabular_training_job( the Model. This is ignored if Dataset is not provided. predefined_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or - value in the column) must be one of {``training``, - ``validation``, ``test``}, and it defines to which set the + value in the column) must be one of {`training`, + `validation`, `test`}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. @@ -173,10 +173,10 @@ def automl_tabular_training_job( If False, then the export is not performed. export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. Expected format: - ``bq://::
`` + `bq://::
` If not specified, then results are exported to the following auto-created BigQuery table: - ``:export_evaluated_examples__.evaluated_examples`` + `:export_evaluated_examples__.evaluated_examples` Applies only if [export_evaluated_data_items] is True. export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], if the table exists, for exported test set predictions. If False, and the @@ -253,17 +253,17 @@ def automl_tabular_training_job( training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py index ba21409c61..541ef7a1bf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py @@ -48,8 +48,8 @@ def automl_text_training_job( If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + Any of `training_fraction_split`, `validation_fraction_split` and + `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. @@ -123,17 +123,17 @@ def automl_text_training_job( training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py index 62c8484f20..4e14de8df0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py @@ -46,7 +46,7 @@ def automl_video_training_job( If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: - ``training_fraction_split``, and ``test_fraction_split`` may optionally + `training_fraction_split`, and `test_fraction_split` may optionally be provided, they must sum to up to 1. If none of the fractions are set, by default roughly 80% of data will be used for training, and 20% for test. Data filter splits: @@ -126,17 +126,17 @@ def automl_video_training_job( training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py index 9cc8710051..7a836012ad 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Serve batch predictions from your models using `Vertex AI Batch Predictions `_.""" +"""Serve batch predictions from your models using [Vertex AI Batch Predictions](https://cloud.google.com/vertex-ai/docs/predictions/overview?_ga=2.161419069.-1686833729.1684288907#batch_predictions).""" from google_cloud_pipeline_components.v1.batch_predict_job.component import model_batch_predict as ModelBatchPredictOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py index 78fd2027a7..0f3166a431 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py @@ -65,130 +65,130 @@ def model_batch_predict( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a Google Cloud Vertex `BatchPredictionJob `_ and waits for it to complete. + """Creates a Google Cloud Vertex [BatchPredictionJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs) and waits for it to complete. - For more details, see `BatchPredictionJob.Create `_. + For more details, see [BatchPredictionJob.Create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/create). Args: job_display_name: The user-defined name of this BatchPredictionJob. location: Location for creating the BatchPredictionJob. instances_format: The format in which instances are - given, must be one of the `Model `_'s supportedInputStorageFormats. + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)'s supportedInputStorageFormats. For more details about this input config, see - `InputConfig `_ + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.) predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. - For more details about this output config, see `OutputConfig `_. + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig). model: The Model used to get predictions via this job. Must share the same ancestor Location. Starting this job has no impact on any existing deployments of the Model and their resources. Either this or - ``unmanaged_container_model`` must be specified. + `unmanaged_container_model` must be specified. unmanaged_container_model: The unmanaged container model used to get predictions via this job. This should be used for models that are not uploaded to Vertex. Either this or model must be specified. gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction - on. They must match ``instances_format``. May contain wildcards. For more - information on wildcards, see `WildcardNames `_. - For more details about this input config, see `InputConfig `_. + on. They must match `instances_format`. May contain wildcards. For more + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). bigquery_source_input_uri: BigQuery URI to a table, up to 2000 characters long. For example: - ``projectId.bqDatasetId.bqTableId`` For more details about this input + `projectId.bqDatasetId.bqTableId` For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. model_parameters: The parameters that govern the predictions. The schema of the parameters instance_type: The format of the instance that the Model accepts. Vertex AI will convert compatible - `InstancesFormat `_ + [InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) to the specified format. Supported values are: - ``object``: Each input is converted to JSON object format. - * For ``bigquery``, each row is converted to an object. - * For ``jsonl``, each line of the JSONL input must be an object. - * Does not apply to ``csv``, ``file-list``, ``tf-record``, or ``tf-record-gzip``. - ``array``: Each input is converted to JSON array format. - * For ``bigquery``, each row is converted to an array. The order + `object`: Each input is converted to JSON object format. + * For `bigquery`, each row is converted to an object. + * For `jsonl`, each line of the JSONL input must be an object. + * Does not apply to `csv`, `file-list`, `tf-record`, or `tf-record-gzip`. + `array`: Each input is converted to JSON array format. + * For `bigquery`, each row is converted to an array. The order of columns is determined by the BigQuery column order, unless - `included_fields `_ is populated. - ``included_fields`` must be populated for specifying field orders. - * For ``jsonl``, if each line of the JSONL input is an object, - ``included_fields`` must be populated for specifying field orders. - * Does not apply to `csv`, ``file-list``, ``tf-record``, or - ``tf-record-gzip``. + [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) is populated. + `included_fields` must be populated for specifying field orders. + * For `jsonl`, if each line of the JSONL input is an object, + `included_fields` must be populated for specifying field orders. + * Does not apply to `csv`, `file-list`, `tf-record`, or + `tf-record-gzip`. If not specified, Vertex AI converts the batch prediction input as follows: - * For ``bigquery`` and ``csv``, the behavior is the same as ``array`. The + * For `bigquery` and `csv`, the behavior is the same as `array`. The order of columns is the same as defined in the file or table, unless included_fields is populated. - * For ``jsonl``, the prediction instance format is determined by + * For `jsonl`, the prediction instance format is determined by each line of the input. - * For ``tf-record``/``tf-record-gzip``, each record will be converted to - an object in the format of ``{"b64": }``, where ```` is + * For `tf-record`/`tf-record-gzip`, each record will be converted to + an object in the format of `{"b64": }`, where `` is the Base64-encoded string of the content of the record. - * For ``file-list``, each file in the list will be converted to an - object in the format of ``{"b64": }``, where ```` is + * For `file-list`, each file in the list will be converted to an + object in the format of `{"b64": }`, where `` is the Base64-encoded string of the content of the file. key_field: The name of the field that is considered as a key. The values identified by the key field is not included in the transformed instances that is sent to the Model. This is similar to - specifying this name of the field in `excluded_fields `_. In addition, + specifying this name of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). In addition, the batch prediction output will not include the instances. Instead the output will only include the value of the key field, in a field named - ``key`` in the output: - * For ``jsonl`` output format, the output will have a ``key`` field - instead of the ``instance`` field. - * For ``csv``/``bigquery`` output format, the output will have have a ``key`` + `key` in the output: + * For `jsonl` output format, the output will have a `key` field + instead of the `instance` field. + * For `csv`/`bigquery` output format, the output will have have a `key` column instead of the instance feature columns. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. included_fields: Fields that will be included in the prediction instance that is sent to the Model. - If ``instance_type`` is ``array``, the order of field names in - ``included_fields`` also determines the order of the values in the array. - When ``included_fields`` is populated, ``excluded_fields`` must be empty. + If `instance_type` is `array`, the order of field names in + `included_fields` also determines the order of the values in the array. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. excluded_fields: Fields that will be excluded in the prediction instance that is sent to the Model. Excluded will be attached to the batch prediction output if key_field is not specified. - When ``excluded_fields`` is populated, ``included_fields`` must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. - may be specified via the Model's ``parameters_schema_uri``. + may be specified via the Model's `parameters_schema_uri`. gcs_destination_output_uri_prefix: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, - ..., ``predictions_N.`` are created where ```` - depends on chosen ``predictions_format``, and N may equal 0001 and + `predictions_0001.`, `predictions_0002.`, + ..., `predictions_N.` are created where `` + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as - value has ``google.rpc.Status`` containing only ``code`` and - ``message`` fields. For more details about this output config, see + per their schema, followed by an additional `error` field which as + value has `google.rpc.Status` containing only `code` and + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` - and ``prediction`` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + `predictions`, and `errors`. If the Model has both `instance` + and `prediction` schemata defined then the tables have columns as + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table + Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status `_ - represented as a STRUCT, and containing only ``code`` and - ``message``. For more details about this output config, see + column, which as values has [google.rpc.Status](Status) + represented as a STRUCT, and containing only `code` and + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports @@ -200,19 +200,19 @@ def model_batch_predict( For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``accelerator_count``. Only used if - ``machine_type`` is set. For more details about the machine spec, see + attached to the machine as per `accelerator_count`. Only used if + `machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec accelerator_count: The number of accelerators to attach - to the ``machine_type``. Only used if ``machine_type`` is set. For more + to the `machine_type`. Only used if `machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI - decides starting number, not greater than ``max_replica_count``. Only - used if ``machine_type`` is set. + decides starting number, not greater than `max_replica_count`. Only + used if `machine_type` is set. max_replica_count: The maximum number of machine replicas the batch operation may be scaled - to. Only used if ``machine_type`` is set. + to. Only used if `machine_type` is set. manual_batch_tuning_parameters_batch_size: The number of the records (e.g. instances) of the operation given in each batch to a machine replica. Machine type, and size of a single record should be @@ -222,31 +222,31 @@ def model_batch_predict( fail. generate_explanation: Generate explanation along with the batch prediction results. This will cause the batch prediction - output to include explanations based on the ``prediction_format``: - - ``bigquery``: output includes a column named ``explanation``. The value is + output to include explanations based on the `prediction_format`: - + `bigquery`: output includes a column named `explanation`. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - - ``jsonl``: The JSON objects on each line include an additional entry - keyed ``explanation``. The value of the entry is a JSON object that - conforms to the [aiplatform.gapic.Explanation] object. - ``csv``: + `jsonl`: The JSON objects on each line include an additional entry + keyed `explanation`. The value of the entry is a JSON object that + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: Generating explanations for CSV format is not supported. If this field is set to true, either the Model.explanation_spec or explanation_metadata and explanation_parameters must be populated. explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if - ``generate_explanation`` is set to `True`. This value overrides the - value of ``Model.explanation_metadata``. All fields of - ``explanation_metadata`` are optional in the request. If a field of the - ``explanation_metadata`` object is not populated, the corresponding + `generate_explanation` is set to `True`. This value overrides the + value of `Model.explanation_metadata`. All fields of + `explanation_metadata` are optional in the request. If a field of the + `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if - ``generate_explanation`` is set to `True`. This value overrides the + `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of - ``explanation_parameters`` are optional in the request. If a field of - the ``explanation_parameters`` object is not populated, the - corresponding field of the ``Model.explanation_parameters`` object is + `explanation_parameters` are optional in the request. If a field of + the `explanation_parameters` object is not populated, the + corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. labels: The labels with user-defined metadata to @@ -259,7 +259,7 @@ def model_batch_predict( key options for a BatchPredictionJob. If this is set, then all resources created by the BatchPredictionJob will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to create the BatchPredictionJob. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py index a40784d810..156ea9fc19 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create and execute machine learning models via SQL using `Google Cloud BigQuery ML `_.""" +"""Create and execute machine learning models via SQL using [Google Cloud BigQuery ML](https://cloud.google.com/bigquery/docs/bqml-introduction).""" from google_cloud_pipeline_components.v1.bigquery.create_model.component import bigquery_create_model_job as BigqueryCreateModelJobOp from google_cloud_pipeline_components.v1.bigquery.detect_anomalies_model.component import bigquery_detect_anomalies_job as BigqueryDetectAnomaliesModelJobOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py index 6fa091caf6..397119290d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py @@ -11,13 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Run KFP components as `Vertex AI Custom Training Jobs `_ with customized worker and cloud configurations.""" +"""Run KFP components as [Vertex AI Custom Training Jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) with customized worker and cloud configurations.""" from google_cloud_pipeline_components.v1.custom_job.component import custom_training_job as CustomTrainingJobOp -from google_cloud_pipeline_components.v1.custom_job.utils import ( - create_custom_training_job_from_component, - create_custom_training_job_op_from_component, -) +from google_cloud_pipeline_components.v1.custom_job.utils import create_custom_training_job_from_component +from google_cloud_pipeline_components.v1.custom_job.utils import create_custom_training_job_op_from_component __all__ = [ 'CustomTrainingJobOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py index 737d7c1bc7..62bafbe6b5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py @@ -39,10 +39,10 @@ def custom_training_job( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Launch a Vertex AI `custom training job `_ using the `CustomJob `_ API. + """Launch a Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - See `Create custom training jobs - `_ for + See [Create custom training jobs + ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: @@ -52,7 +52,7 @@ def custom_training_job( worker_pool_specs: Serialized json spec of the worker pools including machine type and Docker image. All worker pools except the first one are optional and can be skipped by providing an - empty value. See `more information `_. + empty value. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#WorkerPoolSpec). timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". @@ -60,23 +60,23 @@ def custom_training_job( gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. service_account: Sets the default service account for workload run-as - account. The `service account - `_ + account. The [service account + ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this - run-as account. If unspecified, the Vertex AI Custom Code `Service Agent - `_ + run-as account. If unspecified, the Vertex AI Custom Code [Service Agent + ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. tensorboard: The name of a Vertex AI Tensorboard resource to which this CustomJob will upload Tensorboard logs. - enable_web_access: Whether you want Vertex AI to enable `interactive shell + enable_web_access: Whether you want Vertex AI to enable [interactive shell access - `_ - to training containers. If ``True``, you can access interactive shells at + ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) + to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a + be peered. For example, `projects/12345/global/networks/myVPC`. Format + is of the form `projects/{project}/global/networks/{network}`. Where + `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC @@ -84,17 +84,17 @@ def custom_training_job( within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. base_output_directory: The Cloud Storage location to store the output of - this CustomJob or HyperparameterTuningJob. See `more information - `_. + this CustomJob or HyperparameterTuningJob. See [more information + ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See - `more information `_. + [more information](https://goo.gl/xmQnxf). encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the CustomJob. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. """ # fmt: on return utils.build_serverless_customjob_container_spec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index 9182c85e80..cea7f018a8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -70,97 +70,97 @@ def create_custom_training_job_from_component( base_output_directory: str = '', labels: Optional[Dict[str, str]] = None, ) -> Callable: - """Convert a KFP component into Vertex AI `custom training job `_ using the `CustomJob `_ API. - - This utility converts a `KFP component - `_ - provided to ``component_spec`` into ``CustomTrainingJobOp`` component. Your - components inputs, outputs, and logic are carried over, with additional - `CustomJob - `_ - parameters exposed. - - Note that this utility constructs a ClusterSpec where the master and all the - workers use the same spec, meaning all disk/machine spec related parameters - will apply to all replicas. This is suitable for uses cases such as executing - a training component over multiple replicas with `MultiWorkerMirroredStrategy - `_ - or `MirroredStrategy - `_. - - See `Create custom training jobs - `_ for - more information. - - Args: - component_spec: A KFP component. - display_name: The name of the CustomJob. If not provided the component's - name will be used instead. - replica_count: The count of instances in the cluster. One replica always - counts towards the master in worker_pool_spec[0] and the remaining - replicas will be allocated in worker_pool_spec[1]. See `more information. - `_ - machine_type: The type of the machine to run the CustomJob. The default - value is "n1-standard-4". See `more information - `_. - accelerator_type: The type of accelerator(s) that may be attached to the - machine per ``accelerator_count``. See `more information - `_. - accelerator_count: The number of accelerators to attach to the machine. - Defaults to 1 if ``accelerator_type`` is set. - boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: - "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent - Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot - be changed as a pipeline parameter. - boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). - ``boot_disk_size_gb`` is set as a static value and cannot be changed as a - pipeline parameter. - timeout: The maximum job running time. The default is 7 days. A duration in - seconds with up to nine fractional digits, terminated by 's', for example: - "3.5s". - restart_job_on_worker_restart: Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by distributed training jobs that - are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as - account. The `service account - `_ - running the pipeline submitting jobs must have act-as permission on this - run-as account. If unspecified, the Vertex AI Custom Code `Service Agent - `_ - for the CustomJob's project. - network: The full name of the Compute Engine network to which the job should - be peered. For example, ``projects/12345/global/networks/myVPC``. Format - is of the form ``projects/{project}/global/networks/{network}``. Where - ``{project}`` is a project number, as in ``12345``, and ``{network}`` is a - network name. Private services access must already be configured for the - network. If left unspecified, the job is not peered with any network. - encryption_spec_key_name: Customer-managed encryption key options for the - CustomJob. If this is set, then all resources created by the CustomJob - will be encrypted with the provided encryption key. - tensorboard: The name of a Vertex AI Tensorboard resource to which this - CustomJob will upload Tensorboard logs. - enable_web_access: Whether you want Vertex AI to enable `interactive shell - access - `_ - to training containers. If ``True``, you can access interactive shells at - the URIs given by [CustomJob.web_access_uris][]. - reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC - network that can be used for this job. If set, we will deploy the job - within the provided IP ranges. Otherwise, the job will be deployed to any - IP ranges under the provided VPC network. - nfs_mounts: A list of `NfsMount - `_ - resource specs in Json dict format. For more details about mounting NFS - for CustomJob, see `Mount an NFS share for custom training - `_. - base_output_directory: The Cloud Storage location to store the output of - this CustomJob or HyperparameterTuningJob. See `more information - `_. - labels: The labels with user-defined metadata to organize the CustomJob. See - `more information `_. - - Returns: - A KFP component with CustomJob specification applied. + """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. + + This utility converts a [KFP component + ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) + provided to `component_spec` into `CustomTrainingJobOp` component. Your + components inputs, outputs, and logic are carried over, with additional + [CustomJob + ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) + parameters exposed. + + Note that this utility constructs a ClusterSpec where the master and all the + workers use the same spec, meaning all disk/machine spec related parameters + will apply to all replicas. This is suitable for uses cases such as executing + a training component over multiple replicas with [MultiWorkerMirroredStrategy + ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) + or [MirroredStrategy + ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). + + See [Create custom training jobs + ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for + more information. + + Args: + component_spec: A KFP component. + display_name: The name of the CustomJob. If not provided the component's + name will be used instead. + replica_count: The count of instances in the cluster. One replica always + counts towards the master in worker_pool_spec[0] and the remaining + replicas will be allocated in worker_pool_spec[1]. See [more information. + ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default + value is "n1-standard-4". See [more information + ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the + machine per `accelerator_count`. See [more information + ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + accelerator_count: The number of accelerators to attach to the machine. + Defaults to 1 if `accelerator_type` is set. + boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: + "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent + Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot + be changed as a pipeline parameter. + boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). + `boot_disk_size_gb` is set as a static value and cannot be changed as a + pipeline parameter. + timeout: The maximum job running time. The default is 7 days. A duration in + seconds with up to nine fractional digits, terminated by 's', for + example: "3.5s". + restart_job_on_worker_restart: Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by distributed training jobs + that are not resilient to workers leaving and joining a job. + service_account: Sets the default service account for workload run-as + account. The [service account + ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + running the pipeline submitting jobs must have act-as permission on this + run-as account. If unspecified, the Vertex AI Custom Code [Service Agent + ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, `projects/12345/global/networks/myVPC`. + Format is of the form `projects/{project}/global/networks/{network}`. + Where `{project}` is a project number, as in `12345`, and `{network}` is + a network name. Private services access must already be configured for + the network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + tensorboard: The name of a Vertex AI TensorBoard resource to which this + CustomJob will upload TensorBoard logs. + enable_web_access: Whether you want Vertex AI to enable [interactive shell + access + ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) + to training containers. If `True`, you can access interactive shells at + the URIs given by [CustomJob.web_access_uris][]. + reserved_ip_ranges: A list of names for the reserved IP ranges under the + VPC network that can be used for this job. If set, we will deploy the job + within the provided IP ranges. Otherwise, the job will be deployed to any + IP ranges under the provided VPC network. + nfs_mounts: A list of [NfsMount + ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) + resource specs in Json dict format. For more details about mounting NFS + for CustomJob, see [Mount an NFS share for custom training + ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of + this CustomJob or HyperparameterTuningJob. See [more information + ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + labels: The labels with user-defined metadata to organize the CustomJob. + See [more information](https://goo.gl/xmQnxf). + + Returns: + A KFP component with CustomJob specification applied. """ # This function constructs a Custom Job component based on the input # component, by performing a 3-way merge of the inputs/outputs of the diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py index d223ac39d0..9b766731b5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create `Google Cloud Dataflow `_ jobs from within Vertex AI Pipelines.""" +"""Create [Google Cloud Dataflow](https://cloud.google.com/dataflow) jobs from within Vertex AI Pipelines.""" from google_cloud_pipeline_components.v1.dataflow.python_job.component import dataflow_python as DataflowPythonJobOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py index 01a077aa9e..40d213cb2f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py @@ -36,7 +36,7 @@ def dataflow_python( Args: location: Location of the Dataflow job. If not set, defaults to - ``'us-central1'``. + `'us-central1'`. python_module_path: The GCS path to the Python file to run. temp_location: A GCS path for Dataflow to stage temporary job files created during the execution of the pipeline. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py index 89d3f2c3f9..c23660af80 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create `Google Cloud Dataproc `_ jobs from within Vertex AI Pipelines.""" +"""Create [Google Cloud Dataproc](https://cloud.google.com/dataproc) jobs from within Vertex AI Pipelines.""" from google_cloud_pipeline_components.v1.dataproc.create_pyspark_batch.component import dataproc_create_pyspark_batch as DataprocPySparkBatchOp from google_cloud_pipeline_components.v1.dataproc.create_spark_batch.component import dataproc_create_spark_batch as DataprocSparkBatchOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py index 850631a77e..5276ea785c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py @@ -51,18 +51,18 @@ def dataproc_create_pyspark_batch( Args: location: Location of the Dataproc batch workload. If - not set, defaults to ``"us-central1"``. + not set, defaults to `"us-central1"`. batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``. + value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of ``"key": - value`` pairs. - Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. + be associated with a batch. An object containing a list of `"key": + value` pairs. + Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. @@ -79,20 +79,20 @@ def dataproc_create_pyspark_batch( spark_history_dataproc_cluster: The Spark History Server configuration for the workload. main_python_file_uri: The HCFS URI of the main Python - file to use as the Spark driver. Must be a ``.py`` file. + file to use as the Spark driver. Must be a `.py` file. python_file_uris: HCFS file URIs of Python files to - pass to the PySpark framework. Supported file types: ``.py``, ``.egg``, - and ``.zip``. + pass to the PySpark framework. Supported file types: `.py`, `.egg`, + and `.zip`. jar_file_uris: HCFS URIs of jar files to add to the classpath of the Spark driver and tasks. file_uris: HCFS URIs of files to be placed in the working directory of each executor. archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: - ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``. + `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as - ``--conf``, since a collision can occur that causes an incorrect batch + `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py index ed8e3136e3..ab6a860969 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py @@ -51,18 +51,18 @@ def dataproc_create_spark_batch( Args: location: Location of the Dataproc batch workload. If - not set, defaults to ``"us-central1"``. + not set, defaults to `"us-central1"`. batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``. + value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of ``"key": - value`` pairs. - Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. + be associated with a batch. An object containing a list of `"key": + value` pairs. + Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. @@ -89,10 +89,10 @@ def dataproc_create_spark_batch( each executor. archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: - ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``. + `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as - ``--conf``, since a collision can occur that causes an incorrect batch + `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py index 9e0923d072..811ba5cc8e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py @@ -49,18 +49,18 @@ def dataproc_create_spark_r_batch( Args: location: Location of the Dataproc batch workload. If not set, defaults to - ``"us-central1"``. + `"us-central1"`. batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``. + value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of ``"key": - value`` pairs. - Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. + be associated with a batch. An object containing a list of `"key": + value` pairs. + Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. @@ -76,15 +76,15 @@ def dataproc_create_spark_r_batch( spark_history_dataproc_cluster: The Spark History Server configuration for the workload. main_r_file_uri: The HCFS URI of the main R file to use as the driver. - Must be a ``.R`` or ``.r`` file. + Must be a `.R` or `.r` file. file_uris: HCFS URIs of files to be placed in the working directory of each executor. archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: - ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``. + `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as - ``--conf``, since a collision can occur that causes an incorrect batch + `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py index ed2d615ec8..6a9120e024 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py @@ -47,7 +47,7 @@ def dataproc_create_spark_sql_batch( Args: location: Location of the Dataproc batch workload. If - not set, defaults to ``"us-central1"``. + not set, defaults to `"us-central1"`. batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This @@ -56,9 +56,9 @@ def dataproc_create_spark_sql_batch( keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of ``"key": - value`` pairs. - Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. + be associated with a batch. An object containing a list of `"key": + value` pairs. + Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. @@ -76,11 +76,11 @@ def dataproc_create_spark_sql_batch( query_file_uri: The HCFS URI of the script that contains Spark SQL queries to execute. query_variables: Mapping of query variable names to values (equivalent to - the Spark SQL command: ``SET name="value";``). An object containing a - list of ``"key": value`` pairs. - Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``. + the Spark SQL command: `SET name="value";`). An object containing a + list of `"key": value` pairs. + Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. jar_file_uris: HCFS URIs of jar files to be added to the Spark - ``CLASSPATH``. + `CLASSPATH`. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py index 641ed5ec04..852ad1bb81 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Manage datasets via `Vertex AI Datasets `_.""" +"""Manage datasets via [Vertex AI Datasets](https://cloud.google.com/vertex-ai/docs/training/using-managed-datasets).""" from google_cloud_pipeline_components.v1.dataset.create_image_dataset.component import image_dataset_create as ImageDatasetCreateOp from google_cloud_pipeline_components.v1.dataset.create_tabular_dataset.component import tabular_dataset_create as TabularDatasetCreateOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py index 780d09448d..fcce18eb33 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py @@ -34,8 +34,8 @@ def image_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new image `Dataset `_ and optionally imports data into Dataset when - ``source`` and ``import_schema_uri`` are passed. + """Creates a new image [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when + `source` and `import_schema_uri` are passed. Args: display_name: The user-defined name of the Dataset. @@ -46,11 +46,11 @@ def image_dataset_create( input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. + For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema Object `_. + [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -63,7 +63,7 @@ def image_dataset_create( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, e.g. jsonl file. + `import_schema_uri`, e.g. jsonl file. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -77,11 +77,11 @@ def image_dataset_create( encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py index 45ea84b29c..c5e901ba69 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py @@ -34,7 +34,7 @@ def tabular_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new tabular `Dataset `_. + """Creates a new tabular [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: display_name: The user-defined name of the Dataset. @@ -45,7 +45,7 @@ def tabular_dataset_create( input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. + For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. bq_source: BigQuery URI to the input table. For example, "bq://project.dataset.table_name". location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. @@ -60,11 +60,11 @@ def tabular_dataset_create( encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py index 6c5417370b..a466396a28 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py @@ -35,8 +35,8 @@ def text_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new text `Dataset `_ and optionally imports data into Dataset when - ``source`` and ``import_schema_uri`` are passed. + """Creates a new text [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when + `source` and `import_schema_uri` are passed. Args: display_name: The user-defined name of the Dataset. @@ -47,11 +47,11 @@ def text_dataset_create( input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. + For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema Object `_. + [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -64,7 +64,7 @@ def text_dataset_create( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, e.g. jsonl file. + `import_schema_uri`, e.g. jsonl file. location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -78,11 +78,11 @@ def text_dataset_create( encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py index 4119729f66..2e93a41c15 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py @@ -34,7 +34,7 @@ def time_series_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new time series `Dataset `_. + """Creates a new time series [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: display_name: The user-defined name of the Dataset. @@ -45,7 +45,7 @@ def time_series_dataset_create( input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. + For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. bq_source: BigQuery URI to the input table. For example, bq://project.dataset.table_name". location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. @@ -60,11 +60,11 @@ def time_series_dataset_create( encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py index 51a4b29f8c..78c10227d4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py @@ -35,8 +35,8 @@ def video_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new video `Dataset `_ and optionally imports data into Dataset when - ``source`` and ``import_schema_uri`` are passed. + """Creates a new video [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when + `source` and `import_schema_uri` are passed. Args: display_name: The user-defined name of the Dataset. @@ -47,12 +47,12 @@ def video_dataset_create( input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, ``"gs://bucket/file.csv"`` or ``["gs://bucket/file1.csv", "gs://bucket/file2.csv"]``. + For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema - Object `_. + [OpenAPI 3.0.2 Schema + Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -65,7 +65,7 @@ def video_dataset_create( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, + `import_schema_uri`, location: Optional location to retrieve Dataset from. labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters @@ -79,11 +79,11 @@ def video_dataset_create( encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides ``encryption_spec_key_name`` set in ``aiplatform.init``. + Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py index 5351e3b6e4..f109013786 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py @@ -31,13 +31,13 @@ def image_dataset_export( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Exports `Dataset `_ to a GCS output directory. + """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: - ``export-data--`` + `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py index f10358c4d6..68ec1b5bcd 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py @@ -31,13 +31,13 @@ def tabular_dataset_export( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Exports `Dataset `_ to a GCS output directory. + """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: - ``export-data--`` + `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py index 7450cedd5c..0f78b4bbb8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py @@ -31,13 +31,13 @@ def text_dataset_export( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Exports `Dataset `_ to a GCS output directory. + """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: - ``export-data--`` + `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py index 10aa1cf34e..fd74cf451b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py @@ -31,13 +31,13 @@ def time_series_dataset_export( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Exports `Dataset `_ to a GCS output directory. + """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: - ``export-data--`` + `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py index 83c27efeb0..abbd43daf3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py @@ -31,13 +31,13 @@ def video_dataset_export( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Exports `Dataset `_ to a GCS output directory. + """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: - ``export-data--`` + `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/get_vertex_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/get_vertex_dataset/component.py index 801ec50133..4686a1162b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/get_vertex_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/get_vertex_dataset/component.py @@ -25,13 +25,13 @@ def get_vertex_dataset( gcp_resources: dsl.OutputPath(str), ): # fmt: off - """Gets a `Dataset `_ artifact as a Vertex Dataset artifact. + """Gets a [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) artifact as a Vertex Dataset artifact. Args: - dataset_resource_name: Vertex Dataset resource name in the format of ``projects/{project}/locations/{location}/datasets/{dataset}``. + dataset_resource_name: Vertex Dataset resource name in the format of `projects/{project}/locations/{location}/datasets/{dataset}`. Returns: - dataset: Vertex Dataset artifact with a ``resourceName`` metadata field in the format of ``projects/{project}/locations/{location}/datasets/{dataset}``. + dataset: Vertex Dataset artifact with a `resourceName` metadata field in the format of `projects/{project}/locations/{location}/datasets/{dataset}`. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py index 2d0727f97f..61dec5950f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py @@ -36,7 +36,7 @@ def image_dataset_import( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Uploads data to an existing managed `Dataset `_. + """Uploads data to an existing managed [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: location: Optional location to retrieve Dataset from. @@ -50,7 +50,7 @@ def image_dataset_import( import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema Object `_. + [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -63,7 +63,7 @@ def image_dataset_import( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, e.g. jsonl file. + `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py index 2528b6dd40..fe7ea37320 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py @@ -35,7 +35,7 @@ def text_dataset_import( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Uploads data to an existing managed `Dataset `_. + """Uploads data to an existing managed [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: location: Optional location to retrieve Datasetfrom. @@ -49,8 +49,8 @@ def text_dataset_import( import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema - Object `_. + [OpenAPI 3.0.2 Schema + Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -63,7 +63,7 @@ def text_dataset_import( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, + `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py index fbbaf05aec..fb6c275590 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py @@ -36,7 +36,7 @@ def video_dataset_import( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Uploads data to an existing managed `Dataset `_. + """Uploads data to an existing managed [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: location: Optional location to retrieve Dataset from. @@ -50,8 +50,8 @@ def video_dataset_import( import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an - `OpenAPI 3.0.2 Schema - Object `_. + [OpenAPI 3.0.2 Schema + Object](https://tinyurl.com/y538mdwt). data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these @@ -64,7 +64,7 @@ def video_dataset_import( if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by - ``import_schema_uri``, + `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py index 18716819c8..93a41c6bf1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Manage model serving endpoints via `Vertex AI Endpoints `_.""" +"""Manage model serving endpoints via [Vertex AI Endpoints](https://cloud.google.com/vertex-ai/docs/predictions/overview?_ga=2.161419069.-1686833729.1684288907#model_deployment).""" from google_cloud_pipeline_components.v1.endpoint.create_endpoint.component import endpoint_create as EndpointCreateOp from google_cloud_pipeline_components.v1.endpoint.delete_endpoint.component import endpoint_delete as EndpointDeleteOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py index 9fb29d1380..7a827e4c4a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py @@ -37,10 +37,10 @@ def endpoint_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """`Creates `_ a Google Cloud Vertex `Endpoint `_ and waits for it to be ready. + """[Creates](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) and waits for it to be ready. - See the `Endpoint create `_ method for more information. + See the [Endpoint create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) method for more information. Args: location: Location to create the Endpoint. If not set, @@ -58,7 +58,7 @@ def endpoint_create( encryption_spec_key_name: Customer-managed encryption key spec for an Endpoint. If set, this Endpoint and all of this Endoint's sub-resources will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. @@ -66,14 +66,14 @@ def endpoint_create( network to which the Endpoint should be peered. Private services access must already be configured for the network. If left unspecified, the Endpoint is not peered with any network. - `Format `_: - ``projects/{project}/global/networks/{network}``. Where ``{project}`` is a - project number, as in ``'12345'``, and ``{network}`` is network name. + [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): + `projects/{project}/global/networks/{network}`. Where `{project}` is a + project number, as in `'12345'`, and `{network}` is network name. project: Project to create the Endpoint. Defaults to the project in which the PipelineJob is run. Returns: endpoint: Artifact tracking the created Endpoint. - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the create Endpoint's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the create Endpoint's long-running operation. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py index d8bf307f7e..ca05eea9a0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py @@ -25,15 +25,15 @@ def endpoint_delete( gcp_resources: dsl.OutputPath(str), ): # fmt: off - """`Deletes `_ a Google Cloud Vertex `Endpoint `_. + """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). - See the `Endpoint delete `_ method for more information. + See the [Endpoint delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) method for more information. Args: endpoint: The Endpoint to be deleted. Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the delete Endpoint's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the delete Endpoint's long-running operation. """ # fmt: on return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py index 9136fdb589..773ff9fe20 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py @@ -45,10 +45,10 @@ def model_deploy( explanation_parameters: Dict[str, str] = {}, ): # fmt: off - """`Deploys `_ a Google Cloud Vertex Model to an `Endpoint `_ creating a - `DeployedModel `_ within it. + """[Deploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) a Google Cloud Vertex Model to an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) creating a + [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within it. - See the `deploy Model `_ method for more information. + See the [deploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) method for more information. Args: model: The model to be deployed. @@ -68,10 +68,10 @@ def model_deploy( field is empty, then the Endpoint's trafficSplit is not updated. dedicated_resources_machine_type: The specification of a single machine used by the prediction. This field is required if - ``automatic_resources_min_replica_count`` is not specified. See `more information `_. + `automatic_resources_min_replica_count` is not specified. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#dedicatedresources). dedicated_resources_accelerator_type: Hardware - accelerator type. Must also set accelerator_count if used. See `available options `_. This field is required if - ``dedicated_resources_machine_type`` is specified. + accelerator type. Must also set accelerator_count if used. See [available options](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType). This field is required if + `dedicated_resources_machine_type` is specified. dedicated_resources_accelerator_count: The number of accelerators to attach to a worker replica. dedicated_resources_min_replica_count: The minimum @@ -91,14 +91,14 @@ def model_deploy( guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, - will use ``dedicated_resources_min_replica_count`` as the default value. + will use `dedicated_resources_min_replica_count` as the default value. automatic_resources_min_replica_count: The minimum number of replicas this DeployedModel will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more - replicas up to ``automatic_resources_max_replica_count``, and as traffic + replicas up to `automatic_resources_max_replica_count`, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error. This field is required - if ``dedicated_resources_machine_type`` is not specified. + if `dedicated_resources_machine_type` is not specified. automatic_resources_max_replica_count: The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the @@ -114,7 +114,7 @@ def model_deploy( service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the - ``iam.serviceAccounts.actAs`` permission on this service account. + `iam.serviceAccounts.actAs` permission on this service account. disable_container_logging: For custom-trained Models and AutoML Tabular Models, the container of the DeployedModel instances will send stderr and stdout streams to Stackdriver Logging @@ -128,12 +128,12 @@ def model_deploy( high queries per second rate (QPS). Estimate your costs before enabling this option. explanation_metadata: Metadata describing the Model's - input and output for explanation. See `more information `_. + input and output for explanation. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). explanation_parameters: Parameters that configure - explaining information of the Model's predictions. See `more information `_. + explaining information of the Model's predictions. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the deploy Model's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the deploy Model's long-running operation. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py index ee28438485..1461a4fda3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py @@ -30,9 +30,9 @@ def model_undeploy( traffic_split: Dict[str, str] = {}, ): # fmt: off - """`Undeploys `_ a Google Cloud Vertex `DeployedModel `_ within an `Endpoint `_. + """[Undeploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) a Google Cloud Vertex [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). - See the `undeploy Model `_ method for more information. + See the [undeploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) method for more information. Args: @@ -46,7 +46,7 @@ def model_undeploy( assigned to it when this method executes, or if this field unassigns any traffic to it. Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the undeploy Model's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the undeploy Model's long-running operation. """ # fmt: on return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py index fddf25bf11..f7e39641ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Compose `tabular data forecasting `_ pipelines.""" +"""Compose [tabular data forecasting](https://cloud.google.com/vertex-ai/docs/tabular-data/forecasting/overview) pipelines.""" from google_cloud_pipeline_components.v1.forecasting.prepare_data_for_train.component import prepare_data_for_train as ForecastingPrepareDataForTrainOp from google_cloud_pipeline_components.v1.forecasting.preprocess.component import forecasting_preprocessing as ForecastingPreprocessingOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py index 2098bb8442..49fd217295 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py @@ -11,14 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create `hyperparameter tuning jobs `_ via a `Vertex AI Custom Training Job `_.""" +"""Create [hyperparameter tuning jobs](https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning) via a [Vertex AI Custom Training Job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job).""" from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.component import hyperparameter_tuning_job as HyperparameterTuningJobRunOp -from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.utils import ( - serialize_metrics, - serialize_parameters, -) +from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.utils import serialize_metrics +from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.utils import serialize_parameters __all__ = [ 'HyperparameterTuningJobRunOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py index 3de88c0aa6..511bc5ccd6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py @@ -45,7 +45,7 @@ def hyperparameter_tuning_job( """Creates a Vertex AI hyperparameter tuning job and waits for it to complete. - See `more information. `_ + See [more information](https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning). Args: @@ -56,7 +56,7 @@ def hyperparameter_tuning_job( store the output of this HyperparameterTuningJob. The base_output_directory of each child CustomJob backing a Trial is set to a subdirectory with name as the trial id under its parent - HyperparameterTuningJob's ``base_output_directory``. The following Vertex + HyperparameterTuningJob's `base_output_directory`. The following Vertex AI environment variables will be passed to containers or Python modules when this field is set: * AIP_MODEL_DIR = `\/\/model\/` @@ -67,8 +67,8 @@ def hyperparameter_tuning_job( first one are optional and can be skipped by providing an empty value. study_spec_metrics: List serialized from dictionary representing the metrics to optimize. The dictionary key is the metric_id, which is reported by your training job, and the - dictionary value is the optimization goal of the metric (``'minimize'`` or - ``'maximize'``). + dictionary value is the optimization goal of the metric (`'minimize'` or + `'maximize'`). Example: :: @@ -97,8 +97,8 @@ def hyperparameter_tuning_job( 'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear') }) - Parameters specs should be subclasses of `_ParameterSpec `_. Supported subclasses include: ``DoubleParameterSpec``, - ``IntegerParameterSpec``, ``CategoricalParameterSpace``, ``DiscreteParameterSpec``. + Parameters specs should be subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, + `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. max_trial_count: The desired total number of Trials. parallel_trial_count: The desired number of Trials to run in parallel. @@ -107,31 +107,31 @@ def hyperparameter_tuning_job( 0, Vertex AI decides how many Trials must fail before the whole job fails. location: Location to run the HyperparameterTuningJob - in, defaults to ``'us-central1'``. + in, defaults to `'us-central1'`. study_spec_algorithm: The search algorithm specified for the Study. Accepts one of the following: - * ``'ALGORITHM_UNSPECIFIED'`` - If you do not specify an algorithm, your job uses the default Vertex AI algorithm. The default algorithm applies Bayesian optimization to arrive at the optimal solution with a more effective search over the parameter space. - * ``'GRID_SEARCH'`` - A simple grid search within the feasible space. This option is particularly useful if you want to specify a quantity of trials that is greater than the number of points in the feasible space. In such cases, if you do not specify a grid search, the Vertex AI default algorithm may generate duplicate suggestions. To use grid search, all parameter specs must be of type ``IntegerParameterSpec``, ``CategoricalParameterSpace``, or ``DiscreteParameterSpec``. - * ``'RANDOM_SEARCH'`` - A simple random search within the feasible space. + * `'ALGORITHM_UNSPECIFIED'` - If you do not specify an algorithm, your job uses the default Vertex AI algorithm. The default algorithm applies Bayesian optimization to arrive at the optimal solution with a more effective search over the parameter space. + * `'GRID_SEARCH'` - A simple grid search within the feasible space. This option is particularly useful if you want to specify a quantity of trials that is greater than the number of points in the feasible space. In such cases, if you do not specify a grid search, the Vertex AI default algorithm may generate duplicate suggestions. To use grid search, all parameter specs must be of type `IntegerParameterSpec`, `CategoricalParameterSpace`, or `DiscreteParameterSpec`. + * `'RANDOM_SEARCH'` - A simple random search within the feasible space. study_spec_measurement_selection_type: This indicates which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. - Accepts: ``'BEST_MEASUREMENT'`` or ``'LAST_MEASUREMENT'``. Choose this based on + Accepts: `'BEST_MEASUREMENT'` or `'LAST_MEASUREMENT'`. Choose this based on two considerations: A) Do you expect your measurements to - monotonically improve? If so, choose ``'LAST_MEASUREMENT'``. On the + monotonically improve? If so, choose `'LAST_MEASUREMENT'`. On the other hand, if you're in a situation where your system can "over-train" and you expect the performance to get better for a - while but then start declining, choose ``'BEST_MEASUREMENT'``. B) Are + while but then start declining, choose `'BEST_MEASUREMENT'`. B) Are your measurements significantly noisy and/or irreproducible? If - so, ``'BEST_MEASUREMENT'`` will tend to be over-optimistic, and it may - be better to choose ``'LAST_MEASUREMENT'``. If both or neither of (A) + so, `'BEST_MEASUREMENT'` will tend to be over-optimistic, and it may + be better to choose `'LAST_MEASUREMENT'`. If both or neither of (A) and (B) apply, it doesn't matter which selection type is chosen. encryption_spec_key_name: Customer-managed encryption key options for a HyperparameterTuningJob. If this is set, then all resources created by the HyperparameterTuningJob will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. service_account: Specifies the service account for @@ -139,13 +139,13 @@ def hyperparameter_tuning_job( permission on this run-as account. network: The full name of the Compute Engine network to which the job should be peered. For example, - ``projects/12345/global/networks/myVPC``. Private services access must + `projects/12345/global/networks/myVPC`. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. project: Project to run the HyperparameterTuningJob in. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which contains the GCP resource ID of the Hyperparameter Tuning job. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which contains the GCP resource ID of the Hyperparameter Tuning job. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py index 2ce7d5d6eb..8a503fcb22 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py @@ -14,6 +14,7 @@ """Module for supporting Google Vertex AI Hyperparameter Tuning Job Op.""" from typing import Any, Dict, List + from google.cloud.aiplatform import hyperparameter_tuning from google.cloud.aiplatform_v1.types import study @@ -22,12 +23,12 @@ def serialize_parameters( parameters: Dict[str, hyperparameter_tuning._ParameterSpec] ) -> List[Dict[str, Any]]: # fmt: off - """Utility for converting a hyperparameter tuning `ParameterSpec `_ into a list of dictionaries. + """Utility for converting a hyperparameter tuning [ParameterSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/StudySpec#ParameterSpec) into a list of dictionaries. Args: parameters (Dict[str, hyperparameter_tuning._ParameterSpec]): Dictionary - of paramater ids to subclasses of `_ParameterSpec `_. Supported subclasses include: ``DoubleParameterSpec``, - ``IntegerParameterSpec``, ``CategoricalParameterSpace``, ``DiscreteParameterSpec``. + of parameter ids to subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, + `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. :Example: :: @@ -45,7 +46,7 @@ def serialize_parameters( } Returns: - List of ``ParameterSpec`` dictionaries. + List of `ParameterSpec` dictionaries. """ # fmt: on # the to_dict function is used here instead of the to_json function for compatibility with GAPIC @@ -59,13 +60,13 @@ def serialize_parameters( def serialize_metrics(metric_spec: Dict[str, str]) -> List[Dict[str, Any]]: # fmt: off - """Utility for converting a hyperparameter tuning `MetricSpec `_ into a list of dictionaries. + """Utility for converting a hyperparameter tuning [MetricSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/StudySpec#metricspec) into a list of dictionaries. Args: metric_spec (Dict[str, str]): Dictionary representing metrics to optimize. The dictionary key is the metric_id, which is reported by your training job, and the dictionary value is the optimization goal of the - metric (``'minimize'`` or ``'maximize'``). + metric (`'minimize'` or `'maximize'`). :Example: :: @@ -73,7 +74,7 @@ def serialize_metrics(metric_spec: Dict[str, str]) -> List[Dict[str, Any]]: metrics = {'loss': 'minimize', 'accuracy': 'maximize'} Returns: - List of ``MetricSpec`` dictionaries. + List of `MetricSpec` dictionaries. """ # fmt: on return [ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py index 0ff8cf50df..3aecbeb2c9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Manage models via `Vertex AI Model Registry `_.""" +"""Manage models via [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction).""" from google_cloud_pipeline_components.v1.model.delete_model.component import model_delete as ModelDeleteOp from google_cloud_pipeline_components.v1.model.export_model.component import model_export as ModelExportOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py index 30df2efa52..5f4e98078d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py @@ -21,17 +21,17 @@ @dsl.container_component def model_delete(model: Input[VertexModel], gcp_resources: dsl.OutputPath(str)): # fmt: off - """`Deletes `_ a Google Cloud Vertex `Model `_. + """[Deletes](https://cloud.google.com/vertex- + ai/docs/reference/rest/v1/projects.locations.models/delete) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models). - See the `Model delete `_ method for more information. + See the [Model delete](https://cloud.google.com/vertex- + ai/docs/reference/rest/v1/projects.locations.models/delete) method for more information. Args: - model: The name of the Model resource to be deleted. Format: ``projects/{project}/locations/{location}/models/{model}``. `More information. `_ + model: The name of the Model resource to be deleted. Format: `projects/{project}/locations/{location}/models/{model}`. [More information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete#path-parameters). Returns: - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the delete Model's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the delete Model's long-running operation. """ # fmt: on return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py index 7cfe4538e4..1baa950b32 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py @@ -33,47 +33,47 @@ def model_export( image_destination: str = '', ): # fmt: off - """`Exports `_ a Google Cloud Vertex `Model `_ to a user-specified location. + """[Exports](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) to a user-specified location. The Model must be exportable. A Model is considered to be exportable if it has at least one supported export format. - See the `Model export `_ method for more information. + See the [Model export](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) method for more information. Args: model: The Model to export. export_format_id: The ID of the format in which the Model must be exported. Each Model lists the export formats it supports. If no value is provided here, then the first from the list of the Model's - supported formats is used by default. `More information. `_ + supported formats is used by default. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) artifact_destination: The Cloud Storage location where the Model artifact is to be written to. Under the directory given as the destination a new one with name - ``"model-export--"``, + `"model-export--"`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, will be created. Inside, the Model and any of its supporting files will be written. This field should only be set when, in [Model.supported_export_formats], the value for the key given in - ``export_format_id`` contains ``ARTIFACT``. `More information. `_ + `export_format_id` contains `ARTIFACT`. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) image_destination: The Google Container Registry or Artifact Registry URI where the Model container image will be copied - to. `More information. `_ + to. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) Accepted forms: - - Google Container Registry path. For example: ``gcr.io/projectId/imageName:tag``. + - Google Container Registry path. For example: `gcr.io/projectId/imageName:tag`. - Artifact Registry path. For example: - ``us-central1-docker.pkg.dev/projectId/repoName/imageName:tag``. + `us-central1-docker.pkg.dev/projectId/repoName/imageName:tag`. - This field should only be set when, in [Model.supported_export_formats], the value for the key given in ``export_format_id`` contains ``IMAGE``. + This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains `IMAGE`. Returns: output_info: Details of the completed export with output destination paths to the artifacts or container image. - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the export Model's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the export Model's long-running operation. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py index b4c321c4be..030a47dc9d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py @@ -44,20 +44,20 @@ def model_upload( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """`Uploads `_ a Google Cloud Vertex `Model `_ and returns a Model artifact representing the uploaded Model + """[Uploads](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) and returns a Model artifact representing the uploaded Model resource. - See `Model upload `_ method for more information. + See [Model upload](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) method for more information. Args: location: Optional location to upload this Model to. If - not set, defaults to ``us-central1``. + not set, defaults to `us-central1`. display_name: The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 - characters. `More information. `_ - description: The description of the Model. `More information. `_ - parent_model: An artifact of a model which to upload a new version to. Only specify this field when uploading a new version. `More information. `_ - unmanaged_container_model: The unmanaged container model to be uploaded. The Model can be passed from an upstream step or imported via a KFP ``dsl.importer``. + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) + parent_model: An artifact of a model which to upload a new version to. Only specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) + unmanaged_container_model: The unmanaged container model to be uploaded. The Model can be passed from an upstream step or imported via a KFP `dsl.importer`. :Examples: :: @@ -75,13 +75,13 @@ def model_upload( }) explanation_metadata: Metadata describing the Model's - input and output for explanation. Both ``explanation_metadata`` and ``explanation_parameters`` must be passed together when used. `More information. `_ + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata) explanation_parameters: Parameters to configure - explaining for Model's predictions. `More information. `_ + explaining for Model's predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters) encryption_spec_key_name: Customer-managed encryption key spec for a Model. If set, this Model and all sub-resources of this Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. labels: The labels with user-defined metadata to @@ -94,7 +94,7 @@ def model_upload( Returns: model: Artifact tracking the created Model. - gcp_resources: Serialized JSON of ``gcp_resources`` `proto `_ which tracks the upload Model's long-running operation. + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the upload Model's long-running operation. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py index d38c1a1dd6..5e424a9689 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py @@ -54,7 +54,7 @@ def model_evaluation_classification( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Computes a ``google.ClassificationMetrics`` Artifact, containing evaluation + """Computes a `google.ClassificationMetrics` Artifact, containing evaluation metrics given a model's prediction results. Creates a Dataflow job with Apache Beam and TFMA to compute evaluation @@ -65,7 +65,7 @@ def model_evaluation_classification( Args: location: Location for running the evaluation. predictions_format: The file format for the batch - prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files @@ -76,8 +76,8 @@ def model_evaluation_classification( with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. ``jsonl``, - ``csv``, and ``bigquery`` are the allowed formats. + tabular data. The file format for the ground truth files. `jsonl`, + `csv`, and `bigquery` are the allowed formats. ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction @@ -88,50 +88,50 @@ def model_evaluation_classification( Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. classification_type: The type of classification problem, - either ``multiclass`` or ``multilabel``. + either `multiclass` or `multilabel`. class_labels: The list of class names for the target_field_name, in the same order they appear in the batch predictions jobs predictions output file. For instance, if the values of - target_field_name could be either ``1`` or ``0``, and the predictions output + target_field_name could be either `1` or `0`, and the predictions output contains ["1", "0"] for the prediction_label_column, then the class_labels input will be ["1", "0"]. If not set, defaults to the classes found in the prediction_label_column in the batch prediction jobs predictions file. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, - delimited by ``.``. Alternatively referred to as the ground truth (or + delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested - columns, delimited by ``.``. + columns, delimited by `.`. prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find - nested columns, delimited by ``.``. + nested columns, delimited by `.`. slicing_specs: List of - ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. When + `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component Below is an example of how to format this input. 1: First, create a SlicingSpec. - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` - ``from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig`` + `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - ``slicing_spec = SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value='label_a'))})`` + `slicing_spec = SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value='label_a'))})` 2: Create a list to store the slicing specs into. - ``slicing_specs = []`` + `slicing_specs = []` 3: Format each SlicingSpec into a JSON or Dict. - ``slicing_spec_json = json_format.MessageToJson(slicing_spec)`` + `slicing_spec_json = json_format.MessageToJson(slicing_spec)` or - ``slicing_spec_dict = json_format.MessageToDict(slicing_spec)`` + `slicing_spec_dict = json_format.MessageToDict(slicing_spec)` 4: Combine each slicing_spec JSON into a list. - ``slicing_specs.append(slicing_spec_json)`` + `slicing_specs.append(slicing_spec_json)` 5: Finally, pass slicing_specs as an parameter for this component. - ``ModelEvaluationClassificationOp(slicing_specs=slicing_specs)`` + `ModelEvaluationClassificationOp(slicing_specs=slicing_specs)` For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice positive_classes: The list of class @@ -158,16 +158,16 @@ def model_evaluation_classification( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. - force_runner_mode: Flag to choose Beam runner. Valid options are ``DirectRunner`` - and ``Dataflow``. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` + and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: evaluation_metrics: - ``google.ClassificationMetrics`` representing the classification + `google.ClassificationMetrics` representing the classification evaluation metrics in GCS. gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index d5139f6542..c1c0797f7e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -67,46 +67,46 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of - ``projects/{project}/locations/{location}/models/{model}`` or - ``projects/{project}/locations/{location}/models/{model}@{model_version_id - or model_version_alias}`` + `projects/{project}/locations/{location}/models/{model}` or + `projects/{project}/locations/{location}/models/{model}@{model_version_id + or model_version_alias}` batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. test_dataset_resource_name: A Vertex dataset resource name of the test - dataset. If ``test_dataset_storage_source_uris`` is also provided, this + dataset. If `test_dataset_storage_source_uris` is also provided, this argument will override the GCS source. test_dataset_annotation_set_name: A string of the annotation_set resource name containing the ground truth of the test datset used for evaluation. training_dataset_resource_name: A Vertex dataset resource name of the - training dataset. If ``training_dataset_storage_source_uris`` is also + training dataset. If `training_dataset_storage_source_uris` is also provided, this argument will override the GCS source. training_dataset_annotation_set_name: A string of the annotation_set resource name containing the ground truth of the test datset used for feature extraction. test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged - test datasets.``jsonl`` is currently the only allowed format. If - ``test_dataset`` is also provided, this field will be overriden by the + test datasets.`jsonl` is currently the only allowed format. If + `test_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. training_dataset_storage_source_uris: Google Cloud Storage URI(-s) to - unmanaged test datasets.``jsonl`` is currently the only allowed format. If - ``training_dataset`` is also provided, this field will be overriden by the + unmanaged test datasets.`jsonl` is currently the only allowed format. If + `training_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -126,18 +126,18 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -152,11 +152,11 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index c41cc81715..12bff9008f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -57,36 +57,36 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value location: The GCP region that runs the pipeline components. model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of - ``projects/{project}/locations/{location}/models/{model}`` or - ``projects/{project}/locations/{location}/models/{model}@{model_version_id - or model_version_alias}`` + `projects/{project}/locations/{location}/models/{model}` or + `projects/{project}/locations/{location}/models/{model}@{model_version_id + or model_version_alias}` batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. test_dataset_resource_name: A Vertex dataset resource name of the test - dataset. If ``test_dataset_storage_source_uris`` is also provided, this + dataset. If `test_dataset_storage_source_uris` is also provided, this argument will override the GCS source. test_dataset_annotation_set_name: A string of the annotation_set name containing the ground truth of the test datset used for evaluation. test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged - test datasets.``jsonl`` is currently the only allowed format. If - ``test_dataset`` is also provided, this field will be overriden by the + test datasets.`jsonl` is currently the only allowed format. If + `test_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -106,18 +106,18 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -132,11 +132,11 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index d6398dca48..f8a2e748e1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -75,7 +75,7 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -84,21 +84,21 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -116,20 +116,19 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -141,40 +140,39 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. slicing_specs: List of - ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. - When provided, compute metrics for each defined slice. See sample code in + `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When + provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. @@ -192,11 +190,11 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -356,7 +354,7 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -365,21 +363,21 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -397,20 +395,19 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -422,37 +419,36 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -467,11 +463,11 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. Returns: A google.RegressionMetrics artifact. @@ -625,7 +621,7 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -634,21 +630,21 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -666,20 +662,19 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -691,40 +686,39 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. slicing_specs: List of - ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. - When provided, compute metrics for each defined slice. See sample code in + `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When + provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. @@ -742,11 +736,11 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index ad596db057..77b39a5780 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -67,7 +67,7 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -76,21 +76,21 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -108,20 +108,19 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -133,21 +132,21 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. slicing_specs: List of - ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. - When provided, compute metrics for each defined slice. See sample code in + `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When + provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. @@ -165,11 +164,11 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -290,7 +289,7 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -299,21 +298,21 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -331,20 +330,19 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -356,18 +354,18 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -382,11 +380,11 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. Returns: A google.RegressionMetrics artifact and imported @@ -505,7 +503,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -514,21 +512,21 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more @@ -546,20 +544,19 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -571,21 +568,21 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. slicing_specs: List of - ``google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec``. - When provided, compute metrics for each defined slice. See sample code in + `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When + provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. @@ -603,11 +600,11 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index 5b14991b91..a35026914b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -72,7 +72,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -81,21 +81,21 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -114,20 +114,19 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -139,29 +138,29 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either ``1`` or ``0``, then the class_labels - input will be ["1", "0"]. + target_field_name could be either `1` or `0`, then the class_labels input + will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -176,11 +175,11 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -327,7 +326,7 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -336,21 +335,21 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -369,20 +368,19 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -394,21 +392,21 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -423,11 +421,11 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. Returns: A Tuple of google.RegressionMetrics artifact and the imported evaluation @@ -572,7 +570,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -581,21 +579,21 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -614,20 +612,19 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -639,29 +636,29 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either ``1`` or ``0``, then the class_labels - input will be ["1", "0"]. + target_field_name could be either `1` or `0`, then the class_labels input + will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -676,11 +673,11 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 609290f0d8..8eea0e9f32 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -74,7 +74,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -83,21 +83,21 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -116,20 +116,19 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -141,48 +140,47 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either ``1`` or ``0``, then the class_labels - input will be ["1", "0"]. + target_field_name could be either `1` or `0`, then the class_labels input + will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -197,11 +195,11 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -387,7 +385,7 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -396,21 +394,21 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -429,20 +427,19 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -454,40 +451,39 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -502,11 +498,11 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. Returns: A google.RegressionMetrics artifact. @@ -689,7 +685,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul model_name: The Vertex model resource name to be imported and used for batch prediction. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details @@ -698,21 +694,21 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp is - in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, ..., - ``predictions_N.`` are created where ```` depends on - chosen ``predictions_format``, and N may equal 0001 and depends on the - total number of successfully predicted instances. If the Model has both - ``instance`` and ``prediction`` schemata defined then each such file - contains predictions as per the ``predictions_format``. If prediction for - any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., - ``errors_N.`` files are created (N depends on total number of + `prediction--`, where timestamp is in + YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + `predictions_0001.`, `predictions_0002.`, ..., + `predictions_N.` are created where `` depends on + chosen `predictions_format`, and N may equal 0001 and depends on the total + number of successfully predicted instances. If the Model has both + `instance` and `prediction` schemata defined then each such file contains + predictions as per the `predictions_format`. If prediction for any + instance failed (partially or completely), then an additional + `errors_0001.`, `errors_0002.`,..., + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per - their schema, followed by an additional ``error`` field which as value has - ``google.rpc.Status`` containing only ``code`` and ``message`` fields. For - more details about this output config, see + their schema, followed by an additional `error` field which as value has + `google.rpc.Status` containing only `code` and `message` fields. For more + details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also @@ -731,20 +727,19 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset - is created with name ``prediction__`` + is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, ``predictions``, and ``errors``. If the Model has - both ``instance`` and ``prediction`` schemata defined then the tables have - columns as follows: The ``predictions`` table contains instances for which - the prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The ``errors`` table contains - rows for which the prediction has failed, it has instance columns, as per - the instance schema, followed by a single "errors" column, which as values - has ````google.rpc.Status`` ``__ represented as a STRUCT, and - containing only ``code`` and ``message``. For more details about this - output config, see + tables will be created, `predictions`, and `errors`. If the Model has both + `instance` and `prediction` schemata defined then the tables have columns + as follows: The `predictions` table contains instances for which the + prediction succeeded, it has columns as per a concatenation of the Model's + instance and prediction schemata. The `errors` table contains rows for + which the prediction has failed, it has instance columns, as per the + instance schema, followed by a single "errors" column, which as values has + `google.rpc.Status` represented as a STRUCT, and containing only `code` + and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this @@ -756,48 +751,47 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than ``max_replica_count``. Only used if - ``machine_type`` is set. + number, not greater than `max_replica_count`. Only used if `machine_type` + is set. batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if ``machine_type`` is set. + batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if ``generate_explanation`` - is set to ``True``. This value overrides the value of - ``Model.explanation_metadata``. All fields of ``explanation_metadata`` are - optional in the request. If a field of the ``explanation_metadata`` object + this BatchPredictionJob. Can be specified only if `generate_explanation` + is set to `True`. This value overrides the value of + `Model.explanation_metadata`. All fields of `explanation_metadata` are + optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the - ``Model.explanation_metadata`` object is inherited. For more details, see + `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if ``generate_explanation`` is - set to ``True``. This value overrides the value of - ``Model.explanation_parameters``. All fields of ``explanation_parameters`` - are optional in the request. If a field of the ``explanation_parameters`` - object is not populated, the corresponding field of the - ``Model.explanation_parameters`` object is inherited. For more details, - see + Model's predictions. Can be specified only if `generate_explanation` is + set to `True`. This value overrides the value of + `Model.explanation_parameters`. All fields of `explanation_parameters` are + optional in the request. If a field of the `explanation_parameters` object + is not populated, the corresponding field of the + `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per ``batch_predict_accelerator_count``. Only - used if ``batch_predict_machine_type`` is set. For more details about the + attached to the machine as per `batch_predict_accelerator_count`. Only + used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the - ``batch_predict_machine_type``. Only used if - ``batch_predict_machine_type`` is set. + `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is + set. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, - delimited by ``.``. + delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either ``1`` or ``0``, then the class_labels - input will be ["1", "0"]. + target_field_name could be either `1` or `0`, then the class_labels input + will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. @@ -812,11 +806,11 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Indicate the runner mode to use forcely. Valid options - are ``Dataflow`` and ``DirectRunner``. + are `Dataflow` and `DirectRunner`. """ with kfp.dsl.Condition( prediction_type == 'classification', name='classification' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py index f45c05d5e1..927ececbf4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py @@ -52,7 +52,7 @@ def model_evaluation_forecasting( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Computes a ``google.ForecastingMetrics`` Artifact, containing evaluation + """Computes a `google.ForecastingMetrics` Artifact, containing evaluation metrics given a model's prediction results. Creates a Dataflow job with Apache Beam and TFMA to compute evaluation @@ -62,7 +62,7 @@ def model_evaluation_forecasting( Args: location: Location for running the evaluation. predictions_format: The file format for the batch - prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files @@ -73,8 +73,8 @@ def model_evaluation_forecasting( with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. ``jsonl``, - ``csv``, and ``bigquery`` are the allowed formats. + tabular data. The file format for the ground truth files. `jsonl`, + `csv`, and `bigquery` are the allowed formats. ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction @@ -85,23 +85,23 @@ def model_evaluation_forecasting( Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. forecasting_type: The forecasting type being addressed by - this evaluation run. ``point`` and ``quantile`` are the supported types. + this evaluation run. `point` and `quantile` are the supported types. forecasting_quantiles: Required for a - ``quantile`` forecasting_type. The list of quantiles in the same order + `quantile` forecasting_type. The list of quantiles in the same order appeared in the quantile prediction score column. - point_evaluation_quantile: Required for a ``quantile`` + point_evaluation_quantile: Required for a `quantile` forecasting_type. A quantile in the list of forecasting_quantiles that will be used for point evaluation metrics. target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, - delimited by ``.``. Alternatively referred to as the ground truth (or + delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested - columns, delimited by ``.``. + columns, delimited by `.`. dataflow_service_account: Service account to run the Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see @@ -123,17 +123,17 @@ def model_evaluation_forecasting( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. - force_runner_mode: Flag to choose Beam runner. Valid options are ``DirectRunner`` - and ``Dataflow``. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` + and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: evaluation_metrics: - ``google.ForecastingMetrics`` representing the forecasting + `google.ForecastingMetrics` representing the forecasting evaluation metrics in GCS. gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py index 1502f91824..48bdc63b36 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py @@ -49,7 +49,7 @@ def model_evaluation_regression( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Computes a ``google.RegressionMetrics`` Artifact, containing evaluation + """Computes a `google.RegressionMetrics` Artifact, containing evaluation metrics given a model's prediction results. Creates a Dataflow job with Apache Beam and TFMA to compute evaluation @@ -59,7 +59,7 @@ def model_evaluation_regression( Args: location: Location for running the evaluation. predictions_format: The file format for the batch - prediction results. ``jsonl``, ``csv``, and ``bigquery`` are the allowed + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files @@ -70,8 +70,8 @@ def model_evaluation_regression( with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. ``jsonl``, - ``csv``, and ``bigquery`` are the allowed formats. + tabular data. The file format for the ground truth files. `jsonl`, + `csv`, and `bigquery` are the allowed formats. ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction @@ -82,14 +82,14 @@ def model_evaluation_regression( Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by ``.``. Prefixed with 'instance.' on the + nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested - columns, delimited by ``.``. + columns, delimited by `.`. dataflow_service_account: Service account to run the Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see @@ -111,16 +111,16 @@ def model_evaluation_regression( encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. force_runner_mode: Flag to choose Beam runner. Valid options are - ``DirectRunner`` and ``Dataflow``. + `DirectRunner` and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: evaluation_metrics: - ``google.RegressionMetrics`` representing the regression + `google.RegressionMetrics` representing the regression evaluation metrics in GCS. gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py index 49f997e81f..34e78bafc2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py @@ -28,11 +28,11 @@ def vertex_pipelines_notification_email( # fmt: off """Send notification email(s) when an upstream task/DAG completes. - This component can only be used as an `ExitHandler `_'s exit task. Note that the `PipelineTaskFinalStatus `_ is provided automatically by Vertex Pipelines at runtime. You should not provide any input to this parameter when you instantiate this component as a task. + This component can only be used as an [ExitHandler](https://www.kubeflow.org/docs/components/pipelines/v2/pipelines/control-flow/#exit-handling-dslexithandler)'s exit task. Note that the [PipelineTaskFinalStatus](https://kubeflow-pipelines.readthedocs.io/en/latest/source/dsl.html#kfp.dsl.PipelineTaskFinalStatus) is provided automatically by Vertex Pipelines at runtime. You should not provide any input to this parameter when you instantiate this component as a task. This component works only on Vertex Pipelines. This component raises an exception when run on Kubeflow Pipelines. - See a `usage example `_. + See a [usage example](https://cloud.google.com/vertex-ai/docs/pipelines/email-notifications). Args: recipients: A list of email addresses to send a notification to. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py index 8c7e4f8e84..9e26933388 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py @@ -26,11 +26,11 @@ def wait_gcp_resources( # fmt: off """Waits for the completion of one or more GCP resources by polling for completion statuses. - Currently this component only supports waiting on a `DataflowJob `_ resource. + Currently this component only supports waiting on a [DataflowJob](https://cloud.google.com/config-connector/docs/reference/resource-docs/dataflow/dataflowjob) resource. - To use this component, first create a component that outputs a ``gcp_resources`` proto as JSON, then pass it to this component's ``gcp_resources`` parameter. + To use this component, first create a component that outputs a `gcp_resources` proto as JSON, then pass it to this component's `gcp_resources` parameter. - See `details `_ on how to create a ``gcp_resources`` proto as a component output. + See [details](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) on how to create a `gcp_resources` proto as a component output. Examples: :: @@ -44,10 +44,10 @@ def wait_gcp_resources( ) Args: - gcp_resources: Serialized JSON of ``gcp_resources`` proto, indicating the resource(s) this component should wait on. + gcp_resources: Serialized JSON of `gcp_resources` proto, indicating the resource(s) this component should wait on. Returns: - gcp_resources: The ``gcp_resource``, including any relevant error information. + gcp_resources: The `gcp_resource`, including any relevant error information. """ # fmt: on diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index 0561139b6a..dbc7d91ef9 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -67,6 +67,7 @@ "protobuf<4.0.0dev,>=3.19.0", "grpcio-status<=1.47.0", ] + [ + "commonmark==0.9.1", "autodocsumm==0.2.9", "sphinx==5.0.2", "sphinx-immaterial==0.9.0", From ef0788d98690d0c70f747d8900ed719ce1328b35 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 5 Sep 2023 12:25:31 -0500 Subject: [PATCH 132/253] feat(sdk): add logging at end of executor execution (#9895) * feat(sdk): add logging at end of executor execution * Update executor.py * Update executor.py --- sdk/python/kfp/dsl/executor.py | 28 +++++++++++++++++++++++++--- sdk/python/kfp/dsl/executor_main.py | 6 +++++- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp/dsl/executor.py index e153f42f3f..63fcbb039d 100644 --- a/sdk/python/kfp/dsl/executor.py +++ b/sdk/python/kfp/dsl/executor.py @@ -241,7 +241,19 @@ def _handle_single_return_value(self, output_name: str, f'Unknown return type: {annotation_type}. Must be one of the supported data types: https://www.kubeflow.org/docs/components/pipelines/v2/data-types/' ) - def _write_executor_output(self, func_output: Optional[Any] = None): + def _write_executor_output(self, + func_output: Optional[Any] = None + ) -> Optional[str]: + """Writes executor output containing the Python function output. The + executor output file will not be written if this code is executed from + a non-chief node in a mirrored execution strategy. + + Args: + func_output: The object returned by the function. + + Returns: + Optional[str]: Returns the location of the executor_output file as a string if the file is written. Else, None. + """ if self._output_artifacts: self._executor_output['artifacts'] = {} @@ -296,8 +308,18 @@ def _write_executor_output(self, func_output: Optional[Any] = None): os.makedirs(os.path.dirname(executor_output_path), exist_ok=True) with open(executor_output_path, 'w') as f: f.write(json.dumps(self._executor_output)) + return executor_output_path - def execute(self): + return None + + def execute(self) -> Optional[str]: + """Executes the function and writes the executor output file. The + executor output file will not be written if this code is executed from + a non-chief node in a mirrored execution strategy. + + Returns: + Optional[str]: Returns the location of the executor_output file as a string if the file is written. Else, None. + """ annotations = inspect.getfullargspec(self._func).annotations # Function arguments. @@ -344,7 +366,7 @@ def execute(self): func_kwargs[k] = self._get_input_artifact_path(k) result = self._func(**func_kwargs) - self._write_executor_output(result) + return self._write_executor_output(result) def create_artifact_instance( diff --git a/sdk/python/kfp/dsl/executor_main.py b/sdk/python/kfp/dsl/executor_main.py index 1836ea5889..61e9406dd5 100644 --- a/sdk/python/kfp/dsl/executor_main.py +++ b/sdk/python/kfp/dsl/executor_main.py @@ -98,7 +98,11 @@ def executor_main(): executor = component_executor.Executor( executor_input=executor_input, function_to_execute=function_to_execute) - executor.execute() + output_file = executor.execute() + if output_file is None: + logging.info('Did not write output file.') + else: + logging.info(f'Wrote executor output file to {output_file}.') if __name__ == '__main__': From cac185653317326459ff1f4a107b86c29aedaf59 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 5 Sep 2023 10:54:15 -0700 Subject: [PATCH 133/253] feat(components): Embedding eval pipeline for experimental launch PiperOrigin-RevId: 562829441 --- .../model_evaluation/__init__.py | 2 + .../llm_embedding/__init__.py | 14 ++ .../evaluation_llm_embedding_pipeline.py | 226 ++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 075ccdd67d..fe52d91dcc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -25,6 +25,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding.evaluation_llm_embedding_pipeline import evaluation_llm_embedding_pipeline from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp @@ -36,6 +37,7 @@ __all__ = [ 'evaluation_llm_safety_bias_pipeline', + 'evaluation_llm_embedding_pipeline', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', 'ErrorAnalysisAnnotationOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/__init__.py new file mode 100644 index 0000000000..5a34bea2c6 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Embedding Evaluation Pipeline.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py new file mode 100644 index 0000000000..d6665b2b0b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py @@ -0,0 +1,226 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM embedding evaluation pipeline based on information retrieval (IR) task.""" + +from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_embedding_retrieval.component import llm_embedding_retrieval as LLMEmbeddingRetrievalOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_information_retrieval_preprocessor.component import llm_information_retrieval_preprocessor as LLMInformationRetrievalPreprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.llm_retrieval_metrics.component import llm_retrieval_metrics as LLMRetrievalMetricsOp +from google_cloud_pipeline_components.types.artifact_types import VertexModel +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp +import kfp +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +_PIPELINE_NAME = 'evaluation-llm-embedding-pipeline' + + +@kfp.dsl.pipeline(name=_PIPELINE_NAME) +def evaluation_llm_embedding_pipeline( + project: str, + location: str, + corpus_gcs_source: str, + query_gcs_source: str, + golden_docs_gcs_source: str, + model_name: str, + batch_predict_instances_format: str = 'jsonl', + batch_predict_predictions_format: str = 'jsonl', + embedding_retrieval_top_n: int = 10, + retrieval_metrics_top_k_list: str = '10', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + runner: str = 'DirectRunner', + dataflow_service_account: str = '', + dataflow_disk_size_gb: int = 50, + dataflow_machine_type: str = 'n1-standard-4', + dataflow_workers_num: int = 1, + dataflow_max_workers_num: int = 5, + dataflow_subnetwork: str = '', + dataflow_use_public_ips: bool = True, + encryption_spec_key_name: str = '', +): + """The LLM Embedding Evaluation Pipeline. + + Args: + project: Required. The GCP project that runs the pipeline components. + location: Required. The GCP region that runs the pipeline components. + corpus_gcs_source: The gcs location for json file containing corpus + documents. + query_gcs_source: The gcs location for json file containing query documents. + golden_docs_gcs_source: The gcs location for csv file containing mapping of + each query to the golden docs. + model_name: The path for model to generate embeddings. + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_instances_format: The format in which perdictions are made, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + embedding_retrieval_top_n: Top N docs will be retrieved for each query, + based on similarity. + retrieval_metrics_top_k_list: k values for retrieval metrics, for example, + precision@k, accuracy@k, etc. If more than one value, separated by comma. + e.g., "1,5,10". + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Service account to run the dataflow job. If not set, + dataflow will use the default worker service account. For more details, + see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + network: Dataflow's fully qualified subnetwork name, when empty the default + subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + runner: runner for the beam pipeline. DirectRunner and DataflowRunner are + supported. + dataflow_service_account: Service account to run the dataflow job. If not + set, dataflow will use the default worker service account. For more + details, see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the + evaluation run. + dataflow_machine_type: The machine type executing the evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the evaluation + run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty + the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP + addresses. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + """ + + preprocessing_task = LLMInformationRetrievalPreprocessorOp( + project=project, + location=location, + corpus_gcs_source=corpus_gcs_source, + query_gcs_source=query_gcs_source, + golden_docs_gcs_source=golden_docs_gcs_source, + machine_type=machine_type, + service_account=service_account, + network=network, + runner=runner, + dataflow_service_account=dataflow_service_account, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_machine_type=dataflow_machine_type, + dataflow_workers_num=dataflow_workers_num, + dataflow_max_workers_num=dataflow_max_workers_num, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + + get_vertex_model_task = kfp.dsl.importer( + artifact_uri=( + f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' + ), + artifact_class=VertexModel, + metadata={'resourceName': model_name}, + ) + get_vertex_model_task.set_display_name('get-vertex-model') + + batch_predict_corpus = ModelBatchPredictOp( + project=project, + location=location, + model=get_vertex_model_task.outputs['artifact'], + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=preprocessing_task.outputs[ + 'predictions_corpus_gcs_source' + ], + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + ) + + batch_predict_query = ModelBatchPredictOp( + project=project, + location=location, + model=get_vertex_model_task.outputs['artifact'], + job_display_name='evaluation-batch-predict-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + gcs_source_uris=preprocessing_task.outputs[ + 'predictions_query_gcs_source' + ], + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + ) + + # TODO(b/290838262): Revisit if/when the concurrent jobs limit is increased/removed. + batch_predict_query.after(batch_predict_corpus) + + embedding_retrieval_task = LLMEmbeddingRetrievalOp( + project=project, + location=location, + query_embedding_source_directory=batch_predict_query.outputs[ + 'gcs_output_directory' + ], + doc_embedding_source_directory=batch_predict_corpus.outputs[ + 'gcs_output_directory' + ], + embedding_retrieval_top_n=embedding_retrieval_top_n, + machine_type=machine_type, + service_account=service_account, + network=network, + runner=runner, + dataflow_service_account=dataflow_service_account, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_machine_type=dataflow_machine_type, + dataflow_workers_num=dataflow_workers_num, + dataflow_max_workers_num=dataflow_max_workers_num, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + + retrieval_metrics_task = LLMRetrievalMetricsOp( + project=project, + location=location, + golden_docs_pattern=preprocessing_task.outputs[ + 'embedding_retrieval_gcs_source' + ], + embedding_retrieval_results_pattern=embedding_retrieval_task.outputs[ + 'embedding_retrieval_results_path' + ], + retrieval_metrics_top_k_list=retrieval_metrics_top_k_list, + machine_type=machine_type, + service_account=service_account, + network=network, + runner=runner, + dataflow_service_account=dataflow_service_account, + dataflow_disk_size_gb=dataflow_disk_size_gb, + dataflow_machine_type=dataflow_machine_type, + dataflow_workers_num=dataflow_workers_num, + dataflow_max_workers_num=dataflow_max_workers_num, + dataflow_subnetwork=dataflow_subnetwork, + dataflow_use_public_ips=dataflow_use_public_ips, + encryption_spec_key_name=encryption_spec_key_name, + ) + + ModelImportEvaluationOp( + embedding_metrics=retrieval_metrics_task.outputs['retrieval_metrics'], + model=get_vertex_model_task.outputs['artifact'], + display_name=_PIPELINE_NAME, + ) From 959f54efd7f001eb7b585198384d8b489bdafb26 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 5 Sep 2023 13:32:31 -0500 Subject: [PATCH 134/253] chore(sdk): release kfp sdk 2.1.3 (#9896) * release kfp sdk 2.1.3 * update tests * update OWNERS file --- docs/OWNERS | 10 ++-- docs/conf.py | 4 +- sdk/RELEASE.md | 11 ++++ sdk/python/kfp/__init__.py | 2 +- sdk/python/kfp/dsl/component_factory_test.py | 58 +++++++++++++------- 5 files changed, 56 insertions(+), 29 deletions(-) diff --git a/docs/OWNERS b/docs/OWNERS index 7b6c1ed48a..bbea284a9d 100644 --- a/docs/OWNERS +++ b/docs/OWNERS @@ -1,8 +1,6 @@ approvers: - - Ark-kun - - gaoning777 - - hongye-sun + - chensun + - connor-mccarthy reviewers: - - Ark-kun - - gaoning777 - - hongye-sun \ No newline at end of file + - chensun + - connor-mccarthy diff --git a/docs/conf.py b/docs/conf.py index fabc7e26ae..56d6c769ae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -134,9 +134,9 @@ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags { 'version': - 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.1/', + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.3/', 'title': - '2.1.2', + '2.1.3', 'aliases': ['stable'], }, { diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 2f77057d37..0395401544 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -1,6 +1,17 @@ # Current Version (in development) +## Features + +## Breaking changes + +## Deprecations + +## Bug fixes and other changes + +## Documentation updates +# 2.1.3 + ## Features * Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9886](https://github.com/kubeflow/pipelines/pull/9886) diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 5bcc914a18..ce4bee8252 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.1.2' +__version__ = '2.1.3' TYPE_CHECK = True diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py index 883c406efd..aa71c11b4f 100644 --- a/sdk/python/kfp/dsl/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re from typing import List import unittest @@ -26,6 +27,13 @@ from kfp.dsl.types.type_annotations import OutputPath +def strip_kfp_version(command: List[str]) -> List[str]: + return [ + re.sub(r"'kfp==(\d+).(\d+).(\d+)(-[a-z]+.\d+)?'", 'kfp', c) + for c in command + ] + + class TestGetPackagesToInstallCommand(unittest.TestCase): def test_with_no_user_packages_to_install(self): @@ -34,10 +42,12 @@ def test_with_no_user_packages_to_install(self): command = component_factory._get_packages_to_install_command( packages_to_install=packages_to_install) - self.assertEqual(command, [ - 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' - ]) + self.assertEqual( + strip_kfp_version(command), + strip_kfp_version([ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ])) def test_with_no_user_packages_to_install_and_install_kfp_false(self): packages_to_install = [] @@ -56,10 +66,12 @@ def test_with_no_user_packages_to_install_and_kfp_package_path(self): kfp_package_path='git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python' ) - self.assertEqual(command, [ - 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n' - ]) + self.assertEqual( + strip_kfp_version(command), + strip_kfp_version([ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n' + ])) def test_with_no_user_packages_to_install_and_kfp_package_path_and_install_kfp_false( self): @@ -82,10 +94,12 @@ def test_with_user_packages_to_install_and_kfp_package_path_and_install_kfp_fals install_kfp_package=False, ) - self.assertEqual(command, [ - 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n' - ]) + self.assertEqual( + strip_kfp_version(command), + strip_kfp_version([ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n' + ])) def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image( self): @@ -116,10 +130,12 @@ def test_with_user_packages_to_install_and_no_pip_index_url(self): command = component_factory._get_packages_to_install_command( packages_to_install=packages_to_install) - self.assertEqual(command, [ - 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' - ]) + self.assertEqual( + strip_kfp_version(command), + strip_kfp_version([ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ])) def test_with_packages_to_install_with_pip_index_url(self): packages_to_install = ['package1', 'package2'] @@ -130,10 +146,12 @@ def test_with_packages_to_install_with_pip_index_url(self): pip_index_urls=pip_index_urls, ) - self.assertEqual(command, [ - 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' - ]) + self.assertEqual( + strip_kfp_version(command), + strip_kfp_version([ + 'sh', '-c', + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + ])) class TestInvalidParameterName(unittest.TestCase): From e21174f94aa75f48b6ae99f4c4b64f82d91bffd9 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 5 Sep 2023 12:40:39 -0700 Subject: [PATCH 135/253] feat(components): Add sampling_strategy parameter to bulk inferrer to support different strategy. By default, we use greedy PiperOrigin-RevId: 562860317 --- .../_implementation/llm/bulk_inferrer.py | 3 +++ .../preview/llm/infer/component.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py index 15be32859a..0bb327fbf3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/bulk_inferrer.py @@ -36,6 +36,7 @@ def BulkInferrer( # pylint: disable=invalid-name output_prediction: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation output_prediction_gcs_path: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation gcp_resources: kfp.dsl.OutputPath(str), # pytype: disable=invalid-annotation + sampling_strategy: str = 'greedy', ) -> kfp.dsl.ContainerSpec: # pylint: disable=g-doc-args """Performs bulk inference. @@ -53,6 +54,7 @@ def BulkInferrer( # pylint: disable=invalid-name input_model: Model to use for inference. large_model_reference: Predefined model used to create the ``input_model``. input_dataset_path: Path to dataset to use for inference. + sampling_strategy: The sampling strategy for inference. dataset_split: Perform inference on this split of the input dataset. Returns: @@ -76,6 +78,7 @@ def BulkInferrer( # pylint: disable=invalid-name f'--large_model_reference={large_model_reference}', f'--inputs_sequence_length={inputs_sequence_length}', f'--targets_sequence_length={targets_sequence_length}', + f'--sampling_strategy={sampling_strategy}', f'--output_prediction={output_prediction}', f'--output_prediction_gcs_path={output_prediction_gcs_path}', ], diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index 03be883b3c..2f009f9d92 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -37,6 +37,7 @@ def infer_pipeline( prompt_dataset: str, prompt_sequence_length: int = 512, target_sequence_length: int = 64, + sampling_strategy: str = 'greedy', instruction: Optional[str] = None, project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, @@ -59,6 +60,8 @@ def infer_pipeline( target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64. + sampling_strategy: This field specifies the sampling strategy. The valid + options are 'greedy' and 'temperature_sampling'. instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will @@ -115,6 +118,7 @@ def infer_pipeline( large_model_reference=reference_model_metadata.outputs[ 'large_model_reference' ], + sampling_strategy=sampling_strategy, accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], machine_type=machine_spec.outputs['machine_type'], From 2b05ec867fad84e24fe73ef7515e3b5849297e79 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 5 Sep 2023 15:57:31 -0500 Subject: [PATCH 136/253] chore(sdk): remove old file (#9959) --- sdk/python/install_from_source.sh | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 sdk/python/install_from_source.sh diff --git a/sdk/python/install_from_source.sh b/sdk/python/install_from_source.sh deleted file mode 100644 index 6fb0bce65e..0000000000 --- a/sdk/python/install_from_source.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -pip3 install -e sdk/python/kfp-dsl -pip3 install -e sdk/python From 3b8cea060fc3088520666fea26e6452bda2fdb15 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Thu, 7 Sep 2023 13:03:01 -0700 Subject: [PATCH 137/253] fix(components): Have RLHF importer use default image if override is falsy PiperOrigin-RevId: 563521264 --- .../_implementation/llm/private_text_importer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py index ecfd40c0fb..36d7d4986a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/private_text_importer.py @@ -21,7 +21,8 @@ def _resolve_image(default: str = '') -> str: - return os.environ.get('TEXT_IMPORTER_IMAGE_OVERRIDE', default) + return os.environ.get('TEXT_IMPORTER_IMAGE_OVERRIDE') or default + # pytype: disable=unsupported-operands @dsl.container_component From 760c1589edbe58bbd77611222a66a17b371a0d08 Mon Sep 17 00:00:00 2001 From: Diana Atanasova Date: Fri, 8 Sep 2023 02:49:33 +0300 Subject: [PATCH 138/253] feat(backend): enforce SA Token based auth b/w Persistence Agent and Pipeline API Server (#9957) * Enforece SA-Toben auth b/n Persistence agent & Pipeline server for all reqs Signed-off-by: Diana Atanasova * Fix persistence agent license file Signed-off-by: Diana Atanasova --------- Signed-off-by: Diana Atanasova --- .../persistence/client/fake_namespace.go | 85 ------------------ .../persistence/client/kubernetes_core.go | 87 ------------------- .../client/kubernetes_core_fake.go | 37 -------- .../persistence/client/pipeline_client.go | 60 ++++++------- .../client/pipeline_client_fake.go | 4 +- backend/src/agent/persistence/main.go | 5 -- .../agent/persistence/persistence_agent.go | 3 +- .../persistence/worker/metrics_reporter.go | 6 +- .../worker/metrics_reporter_test.go | 31 +++---- .../worker/persistence_worker_test.go | 23 ++--- .../persistence/worker/workflow_saver.go | 11 +-- .../persistence/worker/workflow_saver_test.go | 53 ++--------- backend/src/common/util/execution_status.go | 4 +- backend/src/common/util/workflow.go | 12 +-- .../persistence_agent.csv | 9 -- .../persistence-agent/cluster-role.yaml | 7 +- .../persistence-agent/deployment-patch.yaml | 7 -- .../persistence-agent/kustomization.yaml | 4 - .../multi-user/persistence-agent/params.env | 1 - ...-pipeline-persistenceagent-deployment.yaml | 4 - .../ml-pipeline-persistenceagent-role.yaml | 7 +- 21 files changed, 81 insertions(+), 379 deletions(-) delete mode 100644 backend/src/agent/persistence/client/fake_namespace.go delete mode 100644 backend/src/agent/persistence/client/kubernetes_core.go delete mode 100644 backend/src/agent/persistence/client/kubernetes_core_fake.go delete mode 100644 manifests/kustomize/base/installs/multi-user/persistence-agent/params.env diff --git a/backend/src/agent/persistence/client/fake_namespace.go b/backend/src/agent/persistence/client/fake_namespace.go deleted file mode 100644 index bbc8c8e022..0000000000 --- a/backend/src/agent/persistence/client/fake_namespace.go +++ /dev/null @@ -1,85 +0,0 @@ -package client - -import ( - "context" - "errors" - "github.com/golang/glog" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - types "k8s.io/apimachinery/pkg/types" - watch "k8s.io/apimachinery/pkg/watch" - corev1 "k8s.io/client-go/applyconfigurations/core/v1" -) - -type FakeNamespaceClient struct { - namespace string - user string -} - -func (f *FakeNamespaceClient) SetReturnValues(namespace string, user string) { - f.namespace = namespace - f.user = user -} - -func (f FakeNamespaceClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.Namespace, error) { - if f.namespace == name && len(f.user) != 0 { - ns := v1.Namespace{ObjectMeta: metav1.ObjectMeta{ - Namespace: f.namespace, - Annotations: map[string]string{ - "owner": f.user, - }, - }} - return &ns, nil - } - return nil, errors.New("failed to get namespace") -} - -func (f FakeNamespaceClient) Create(ctx context.Context, namespace *v1.Namespace, opts metav1.CreateOptions) (*v1.Namespace, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Update(ctx context.Context, namespace *v1.Namespace, opts metav1.UpdateOptions) (*v1.Namespace, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) UpdateStatus(ctx context.Context, namespace *v1.Namespace, opts metav1.UpdateOptions) (*v1.Namespace, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { - glog.Error("This fake method is not yet implemented.") - return nil -} - -func (f FakeNamespaceClient) List(ctx context.Context, opts metav1.ListOptions) (*v1.NamespaceList, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.Namespace, err error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Apply(ctx context.Context, namespace *corev1.NamespaceApplyConfiguration, opts metav1.ApplyOptions) (result *v1.Namespace, err error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) ApplyStatus(ctx context.Context, namespace *corev1.NamespaceApplyConfiguration, opts metav1.ApplyOptions) (result *v1.Namespace, err error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} - -func (f FakeNamespaceClient) Finalize(ctx context.Context, item *v1.Namespace, opts metav1.UpdateOptions) (*v1.Namespace, error) { - glog.Error("This fake method is not yet implemented.") - return nil, nil -} diff --git a/backend/src/agent/persistence/client/kubernetes_core.go b/backend/src/agent/persistence/client/kubernetes_core.go deleted file mode 100644 index 25605ba88a..0000000000 --- a/backend/src/agent/persistence/client/kubernetes_core.go +++ /dev/null @@ -1,87 +0,0 @@ -package client - -import ( - "context" - "fmt" - "os" - "time" - - "github.com/cenkalti/backoff" - "github.com/golang/glog" - "github.com/pkg/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - v1 "k8s.io/client-go/kubernetes/typed/core/v1" - "k8s.io/client-go/rest" - - "github.com/kubeflow/pipelines/backend/src/common/util" -) - -type KubernetesCoreInterface interface { - NamespaceClient() v1.NamespaceInterface - GetNamespaceOwner(namespace string) (string, error) -} - -type KubernetesCore struct { - coreV1Client v1.CoreV1Interface -} - -func (c *KubernetesCore) NamespaceClient() v1.NamespaceInterface { - return c.coreV1Client.Namespaces() -} - -func (c *KubernetesCore) GetNamespaceOwner(namespace string) (string, error) { - if os.Getenv("MULTIUSER") == "" || os.Getenv("MULTIUSER") == "false" { - return "", nil - } - ns, err := c.NamespaceClient().Get(context.Background(), namespace, metav1.GetOptions{}) - if err != nil { - return "", errors.Wrapf(err, "failed to get namespace '%v'", namespace) - } - owner, ok := ns.Annotations["owner"] - if !ok { - return "", errors.New(fmt.Sprintf("namespace '%v' has no owner in the annotations", namespace)) - } - return owner, nil -} - -func createKubernetesCore(clientParams util.ClientParameters) (KubernetesCoreInterface, error) { - clientSet, err := getKubernetesClientset(clientParams) - if err != nil { - return nil, err - } - return &KubernetesCore{clientSet.CoreV1()}, nil -} - -// CreateKubernetesCoreOrFatal creates a new client for the Kubernetes pod. -func CreateKubernetesCoreOrFatal(initConnectionTimeout time.Duration, clientParams util.ClientParameters) KubernetesCoreInterface { - var client KubernetesCoreInterface - var err error - var operation = func() error { - client, err = createKubernetesCore(clientParams) - return err - } - b := backoff.NewExponentialBackOff() - b.MaxElapsedTime = initConnectionTimeout - err = backoff.Retry(operation, b) - - if err != nil { - glog.Fatalf("Failed to create namespace client. Error: %v", err) - } - return client -} - -func getKubernetesClientset(clientParams util.ClientParameters) (*kubernetes.Clientset, error) { - restConfig, err := rest.InClusterConfig() - if err != nil { - return nil, errors.Wrap(err, "Failed to initialize kubernetes client.") - } - restConfig.QPS = float32(clientParams.QPS) - restConfig.Burst = clientParams.Burst - - clientSet, err := kubernetes.NewForConfig(restConfig) - if err != nil { - return nil, errors.Wrap(err, "Failed to initialize kubernetes client set.") - } - return clientSet, nil -} diff --git a/backend/src/agent/persistence/client/kubernetes_core_fake.go b/backend/src/agent/persistence/client/kubernetes_core_fake.go deleted file mode 100644 index 73fa0e34fe..0000000000 --- a/backend/src/agent/persistence/client/kubernetes_core_fake.go +++ /dev/null @@ -1,37 +0,0 @@ -package client - -import ( - "context" - "errors" - "fmt" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1 "k8s.io/client-go/kubernetes/typed/core/v1" -) - -type KubernetesCoreFake struct { - coreV1ClientFake *FakeNamespaceClient -} - -func (c *KubernetesCoreFake) NamespaceClient() v1.NamespaceInterface { - return c.coreV1ClientFake -} - -func (c *KubernetesCoreFake) GetNamespaceOwner(namespace string) (string, error) { - ns, err := c.NamespaceClient().Get(context.Background(), namespace, metav1.GetOptions{}) - if err != nil { - return "", err - } - owner, ok := ns.Annotations["owner"] - if !ok { - return "", errors.New(fmt.Sprintf("namespace '%v' has no owner in the annotations", namespace)) - } - return owner, nil -} - -func NewKubernetesCoreFake() *KubernetesCoreFake { - return &KubernetesCoreFake{&FakeNamespaceClient{}} -} -func (c *KubernetesCoreFake) Set(namespaceToReturn string, userToReturn string) { - c.coreV1ClientFake.SetReturnValues(namespaceToReturn, userToReturn) -} diff --git a/backend/src/agent/persistence/client/pipeline_client.go b/backend/src/agent/persistence/client/pipeline_client.go index e1725cc20c..2535993361 100644 --- a/backend/src/agent/persistence/client/pipeline_client.go +++ b/backend/src/agent/persistence/client/pipeline_client.go @@ -17,11 +17,9 @@ package client import ( "context" "fmt" - "os" "strings" "time" - "github.com/kubeflow/pipelines/backend/src/apiserver/common" "google.golang.org/grpc/metadata" api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" @@ -38,8 +36,8 @@ const ( type PipelineClientInterface interface { ReportWorkflow(workflow util.ExecutionSpec) error ReportScheduledWorkflow(swf *util.ScheduledWorkflow) error - ReadArtifact(request *api.ReadArtifactRequest, user string) (*api.ReadArtifactResponse, error) - ReportRunMetrics(request *api.ReportRunMetricsRequest, user string) (*api.ReportRunMetricsResponse, error) + ReadArtifact(request *api.ReadArtifactRequest) (*api.ReadArtifactResponse, error) + ReportRunMetrics(request *api.ReportRunMetricsRequest) (*api.ReportRunMetricsResponse, error) } type PipelineClient struct { @@ -173,17 +171,26 @@ func (p *PipelineClient) ReportScheduledWorkflow(swf *util.ScheduledWorkflow) er // ReadArtifact reads artifact content from run service. If the artifact is not present, returns // nil response. -func (p *PipelineClient) ReadArtifact(request *api.ReadArtifactRequest, user string) (*api.ReadArtifactResponse, error) { +func (p *PipelineClient) ReadArtifact(request *api.ReadArtifactRequest) (*api.ReadArtifactResponse, error) { pctx := context.Background() - if user != "" { - pctx = metadata.AppendToOutgoingContext(pctx, getKubeflowUserIDHeader(), - getKubeflowUserIDPrefix()+user) - } + pctx = metadata.AppendToOutgoingContext(pctx, "Authorization", + "Bearer "+p.tokenRefresher.GetToken()) + ctx, cancel := context.WithTimeout(pctx, time.Minute) defer cancel() response, err := p.runServiceClient.ReadArtifactV1(ctx, request) if err != nil { + statusCode, _ := status.FromError(err) + if statusCode.Code() == codes.Unauthenticated && strings.Contains(err.Error(), "service account token has expired") { + // If unauthenticated because SA token is expired, re-read/refresh the token and try again + p.tokenRefresher.RefreshToken() + return nil, util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, + "Error while reporting workflow resource (code: %v, message: %v): %v", + statusCode.Code(), + statusCode.Message(), + err.Error()) + } // TODO(hongyes): check NotFound error code before skip the error. return nil, nil } @@ -192,17 +199,26 @@ func (p *PipelineClient) ReadArtifact(request *api.ReadArtifactRequest, user str } // ReportRunMetrics reports run metrics to run service. -func (p *PipelineClient) ReportRunMetrics(request *api.ReportRunMetricsRequest, user string) (*api.ReportRunMetricsResponse, error) { +func (p *PipelineClient) ReportRunMetrics(request *api.ReportRunMetricsRequest) (*api.ReportRunMetricsResponse, error) { pctx := context.Background() - if user != "" { - pctx = metadata.AppendToOutgoingContext(pctx, getKubeflowUserIDHeader(), - getKubeflowUserIDPrefix()+user) - } + pctx = metadata.AppendToOutgoingContext(pctx, "Authorization", + "Bearer "+p.tokenRefresher.GetToken()) + ctx, cancel := context.WithTimeout(pctx, time.Minute) defer cancel() response, err := p.runServiceClient.ReportRunMetricsV1(ctx, request) if err != nil { + statusCode, _ := status.FromError(err) + if statusCode.Code() == codes.Unauthenticated && strings.Contains(err.Error(), "service account token has expired") { + // If unauthenticated because SA token is expired, re-read/refresh the token and try again + p.tokenRefresher.RefreshToken() + return nil, util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, + "Error while reporting workflow resource (code: %v, message: %v): %v", + statusCode.Code(), + statusCode.Message(), + err.Error()) + } // This call should always succeed unless the run doesn't exist or server is broken. In // either cases, the job should retry at a later time. return nil, util.NewCustomError(err, util.CUSTOM_CODE_TRANSIENT, @@ -210,19 +226,3 @@ func (p *PipelineClient) ReportRunMetrics(request *api.ReportRunMetricsRequest, } return response, nil } - -// TODO use config file & viper and "github.com/kubeflow/pipelines/backend/src/apiserver/common.GetKubeflowUserIDHeader()" -func getKubeflowUserIDHeader() string { - if value, ok := os.LookupEnv(common.KubeflowUserIDHeader); ok { - return value - } - return common.GoogleIAPUserIdentityHeader -} - -// TODO use of viper & viper and "github.com/kubeflow/pipelines/backend/src/apiserver/common.GetKubeflowUserIDPrefix()" -func getKubeflowUserIDPrefix() string { - if value, ok := os.LookupEnv(common.KubeflowUserIDPrefix); ok { - return value - } - return common.GoogleIAPUserIdentityPrefix -} diff --git a/backend/src/agent/persistence/client/pipeline_client_fake.go b/backend/src/agent/persistence/client/pipeline_client_fake.go index 6b1ff3a03e..42e9bce25b 100644 --- a/backend/src/agent/persistence/client/pipeline_client_fake.go +++ b/backend/src/agent/persistence/client/pipeline_client_fake.go @@ -57,7 +57,7 @@ func (p *PipelineClientFake) ReportScheduledWorkflow(swf *util.ScheduledWorkflow return nil } -func (p *PipelineClientFake) ReadArtifact(request *api.ReadArtifactRequest, user string) (*api.ReadArtifactResponse, error) { +func (p *PipelineClientFake) ReadArtifact(request *api.ReadArtifactRequest) (*api.ReadArtifactResponse, error) { if p.err != nil { return nil, p.err } @@ -65,7 +65,7 @@ func (p *PipelineClientFake) ReadArtifact(request *api.ReadArtifactRequest, user return p.artifacts[request.String()], nil } -func (p *PipelineClientFake) ReportRunMetrics(request *api.ReportRunMetricsRequest, user string) (*api.ReportRunMetricsResponse, error) { +func (p *PipelineClientFake) ReportRunMetrics(request *api.ReportRunMetricsRequest) (*api.ReportRunMetricsResponse, error) { p.reportedMetricsRequest = request return p.reportMetricsResponseStub, p.reportMetricsErrorStub } diff --git a/backend/src/agent/persistence/main.go b/backend/src/agent/persistence/main.go index f8c26da385..4da32a7095 100644 --- a/backend/src/agent/persistence/main.go +++ b/backend/src/agent/persistence/main.go @@ -95,10 +95,6 @@ func main() { } else { swfInformerFactory = swfinformers.NewFilteredSharedInformerFactory(swfClient, time.Second*30, namespace, nil) } - k8sCoreClient := client.CreateKubernetesCoreOrFatal(DefaultConnectionTimeout, util.ClientParameters{ - QPS: clientQPS, - Burst: clientBurst, - }) tokenRefresher := client.NewTokenRefresher(time.Duration(saTokenRefreshIntervalInSecs)*time.Second, nil) err = tokenRefresher.StartTokenRefreshTicker() @@ -122,7 +118,6 @@ func main() { swfInformerFactory, execInformer, pipelineClient, - k8sCoreClient, util.NewRealTime()) go swfInformerFactory.Start(stopCh) diff --git a/backend/src/agent/persistence/persistence_agent.go b/backend/src/agent/persistence/persistence_agent.go index d234df09bf..d280b74dbd 100644 --- a/backend/src/agent/persistence/persistence_agent.go +++ b/backend/src/agent/persistence/persistence_agent.go @@ -46,7 +46,6 @@ func NewPersistenceAgent( swfInformerFactory swfinformers.SharedInformerFactory, execInformer util.ExecutionInformer, pipelineClient *client.PipelineClient, - k8sCoreClient client.KubernetesCoreInterface, time util.TimeInterface) *PersistenceAgent { // obtain references to shared informers swfInformer := swfInformerFactory.Scheduledworkflow().V1beta1().ScheduledWorkflows() @@ -63,7 +62,7 @@ func NewPersistenceAgent( workflowWorker := worker.NewPersistenceWorker(time, workflowregister.WorkflowKind, execInformer, true, - worker.NewWorkflowSaver(workflowClient, pipelineClient, k8sCoreClient, ttlSecondsAfterWorkflowFinish)) + worker.NewWorkflowSaver(workflowClient, pipelineClient, ttlSecondsAfterWorkflowFinish)) agent := &PersistenceAgent{ swfClient: swfClient, diff --git a/backend/src/agent/persistence/worker/metrics_reporter.go b/backend/src/agent/persistence/worker/metrics_reporter.go index 021ff970f5..c7a708cbf0 100644 --- a/backend/src/agent/persistence/worker/metrics_reporter.go +++ b/backend/src/agent/persistence/worker/metrics_reporter.go @@ -42,7 +42,7 @@ func NewMetricsReporter(pipelineClient client.PipelineClientInterface) *MetricsR } // ReportMetrics reports workflow metrics to pipeline server. -func (r MetricsReporter) ReportMetrics(workflow util.ExecutionSpec, user string) error { +func (r MetricsReporter) ReportMetrics(workflow util.ExecutionSpec) error { if !workflow.ExecutionStatus().HasMetrics() { return nil } @@ -52,14 +52,14 @@ func (r MetricsReporter) ReportMetrics(workflow util.ExecutionSpec, user string) // Skip reporting if the workflow doesn't have the run id label return nil } - runMetrics, partialFailures := workflow.ExecutionStatus().CollectionMetrics(r.pipelineClient.ReadArtifact, user) + runMetrics, partialFailures := workflow.ExecutionStatus().CollectionMetrics(r.pipelineClient.ReadArtifact) if len(runMetrics) == 0 { return aggregateErrors(partialFailures) } reportMetricsResponse, err := r.pipelineClient.ReportRunMetrics(&api.ReportRunMetricsRequest{ RunId: runID, Metrics: runMetrics, - }, user) + }) if err != nil { return err } diff --git a/backend/src/agent/persistence/worker/metrics_reporter_test.go b/backend/src/agent/persistence/worker/metrics_reporter_test.go index 7fa3ba000d..c2b43faf2c 100644 --- a/backend/src/agent/persistence/worker/metrics_reporter_test.go +++ b/backend/src/agent/persistence/worker/metrics_reporter_test.go @@ -32,11 +32,6 @@ import ( "k8s.io/apimachinery/pkg/types" ) -const ( - NamespaceName = "kf-namespace" - USER = "test-user@example.com" -) - func TestReportMetrics_NoCompletedNode_NoOP(t *testing.T) { pipelineFake := client.NewPipelineClientFake() @@ -57,7 +52,7 @@ func TestReportMetrics_NoCompletedNode_NoOP(t *testing.T) { }, }, }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.Nil(t, err) assert.Nil(t, pipelineFake.GetReportedMetricsRequest()) } @@ -82,7 +77,7 @@ func TestReportMetrics_NoRunID_NoOP(t *testing.T) { }, }, }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.Nil(t, err) assert.Nil(t, pipelineFake.GetReadArtifactRequest()) assert.Nil(t, pipelineFake.GetReportedMetricsRequest()) @@ -109,7 +104,7 @@ func TestReportMetrics_NoArtifact_NoOP(t *testing.T) { }, }, }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.Nil(t, err) assert.Nil(t, pipelineFake.GetReadArtifactRequest()) assert.Nil(t, pipelineFake.GetReportedMetricsRequest()) @@ -139,7 +134,7 @@ func TestReportMetrics_NoMetricsArtifact_NoOP(t *testing.T) { }, }, }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.Nil(t, err) assert.Nil(t, pipelineFake.GetReadArtifactRequest()) assert.Nil(t, pipelineFake.GetReportedMetricsRequest()) @@ -182,7 +177,7 @@ func TestReportMetrics_Succeed(t *testing.T) { Results: []*api.ReportRunMetricsResponse_ReportRunMetricResult{}, }, nil) - err1 := reporter.ReportMetrics(workflow, USER) + err1 := reporter.ReportMetrics(workflow) assert.Nil(t, err1) expectedMetricsRequest := &api.ReportRunMetricsRequest{ @@ -241,7 +236,7 @@ func TestReportMetrics_EmptyArchive_Fail(t *testing.T) { Data: []byte(artifactData), }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.NotNil(t, err) assert.True(t, util.HasCustomCode(err, util.CUSTOM_CODE_PERMANENT)) @@ -284,7 +279,7 @@ func TestReportMetrics_MultipleFilesInArchive_Fail(t *testing.T) { Data: []byte(artifactData), }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.NotNil(t, err) assert.True(t, util.HasCustomCode(err, util.CUSTOM_CODE_PERMANENT)) @@ -326,7 +321,7 @@ func TestReportMetrics_InvalidMetricsJSON_Fail(t *testing.T) { Data: []byte(artifactData), }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.NotNil(t, err) assert.True(t, util.HasCustomCode(err, util.CUSTOM_CODE_PERMANENT)) @@ -387,7 +382,7 @@ func TestReportMetrics_InvalidMetricsJSON_PartialFail(t *testing.T) { Data: []byte(validArtifactData), }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) // Partial failure is reported while valid metrics are reported. assert.NotNil(t, err) @@ -447,7 +442,7 @@ func TestReportMetrics_CorruptedArchiveFile_Fail(t *testing.T) { Data: []byte("invalid tgz content"), }) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.NotNil(t, err) assert.True(t, util.HasCustomCode(err, util.CUSTOM_CODE_PERMANENT)) @@ -511,7 +506,7 @@ func TestReportMetrics_MultiplMetricErrors_TransientErrowWin(t *testing.T) { }, }, nil) - err := reporter.ReportMetrics(workflow, USER) + err := reporter.ReportMetrics(workflow) assert.NotNil(t, err) assert.True(t, util.HasCustomCode(err, util.CUSTOM_CODE_TRANSIENT)) @@ -520,8 +515,6 @@ func TestReportMetrics_MultiplMetricErrors_TransientErrowWin(t *testing.T) { func TestReportMetrics_Unauthorized(t *testing.T) { pipelineFake := client.NewPipelineClientFake() reporter := NewMetricsReporter(pipelineFake) - k8sFake := client.NewKubernetesCoreFake() - k8sFake.Set(NamespaceName, USER) workflow := util.NewWorkflow(&workflowapi.Workflow{ ObjectMeta: metav1.ObjectMeta{ @@ -557,7 +550,7 @@ func TestReportMetrics_Unauthorized(t *testing.T) { Results: []*api.ReportRunMetricsResponse_ReportRunMetricResult{}, }, errors.New("failed to read artifacts")) - err1 := reporter.ReportMetrics(workflow, USER) + err1 := reporter.ReportMetrics(workflow) assert.NotNil(t, err1) assert.Contains(t, err1.Error(), "failed to read artifacts") diff --git a/backend/src/agent/persistence/worker/persistence_worker_test.go b/backend/src/agent/persistence/worker/persistence_worker_test.go index e29226d140..bde3ef7e4e 100644 --- a/backend/src/agent/persistence/worker/persistence_worker_test.go +++ b/backend/src/agent/persistence/worker/persistence_worker_test.go @@ -53,11 +53,9 @@ func TestPersistenceWorker_Success(t *testing.T) { // Set up pipeline client pipelineClient := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) // Set up peristence worker - saver := NewWorkflowSaver(workflowClient, pipelineClient, k8sClient, 100) + saver := NewWorkflowSaver(workflowClient, pipelineClient, 100) eventHandler := NewFakeEventHandler() worker := NewPersistenceWorker( util.NewFakeTimeForEpoch(), @@ -83,12 +81,11 @@ func TestPersistenceWorker_NotFoundError(t *testing.T) { }) workflowClient := client.NewWorkflowClientFake() - // Set up pipeline client and kubernetes client + // Set up pipeline client pipelineClient := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() // Set up peristence worker - saver := NewWorkflowSaver(workflowClient, pipelineClient, k8sClient, 100) + saver := NewWorkflowSaver(workflowClient, pipelineClient, 100) eventHandler := NewFakeEventHandler() worker := NewPersistenceWorker( util.NewFakeTimeForEpoch(), @@ -115,12 +112,11 @@ func TestPersistenceWorker_GetWorklowError(t *testing.T) { workflowClient := client.NewWorkflowClientFake() workflowClient.Put("MY_NAMESPACE", "MY_NAME", nil) - // Set up pipeline client and kubernetes client + // Set up pipeline client pipelineClient := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() // Set up peristence worker - saver := NewWorkflowSaver(workflowClient, pipelineClient, k8sClient, 100) + saver := NewWorkflowSaver(workflowClient, pipelineClient, 100) eventHandler := NewFakeEventHandler() worker := NewPersistenceWorker( util.NewFakeTimeForEpoch(), @@ -152,12 +148,9 @@ func TestPersistenceWorker_ReportWorkflowRetryableError(t *testing.T) { pipelineClient := client.NewPipelineClientFake() pipelineClient.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_TRANSIENT, "My Retriable Error")) - //Set up kubernetes client - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) // Set up peristence worker - saver := NewWorkflowSaver(workflowClient, pipelineClient, k8sClient, 100) + saver := NewWorkflowSaver(workflowClient, pipelineClient, 100) eventHandler := NewFakeEventHandler() worker := NewPersistenceWorker( util.NewFakeTimeForEpoch(), @@ -188,11 +181,9 @@ func TestPersistenceWorker_ReportWorkflowNonRetryableError(t *testing.T) { pipelineClient := client.NewPipelineClientFake() pipelineClient.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_PERMANENT, "My Permanent Error")) - // Set up kubernetes client - k8sClient := client.NewKubernetesCoreFake() // Set up peristence worker - saver := NewWorkflowSaver(workflowClient, pipelineClient, k8sClient, 100) + saver := NewWorkflowSaver(workflowClient, pipelineClient, 100) eventHandler := NewFakeEventHandler() worker := NewPersistenceWorker( util.NewFakeTimeForEpoch(), diff --git a/backend/src/agent/persistence/worker/workflow_saver.go b/backend/src/agent/persistence/worker/workflow_saver.go index 3b874273f1..5e93a60bb1 100644 --- a/backend/src/agent/persistence/worker/workflow_saver.go +++ b/backend/src/agent/persistence/worker/workflow_saver.go @@ -27,17 +27,15 @@ import ( type WorkflowSaver struct { client client.WorkflowClientInterface pipelineClient client.PipelineClientInterface - k8sClient client.KubernetesCoreInterface metricsReporter *MetricsReporter ttlSecondsAfterWorkflowFinish int64 } func NewWorkflowSaver(client client.WorkflowClientInterface, - pipelineClient client.PipelineClientInterface, k8sClient client.KubernetesCoreInterface, ttlSecondsAfterWorkflowFinish int64) *WorkflowSaver { + pipelineClient client.PipelineClientInterface, ttlSecondsAfterWorkflowFinish int64) *WorkflowSaver { return &WorkflowSaver{ client: client, pipelineClient: pipelineClient, - k8sClient: k8sClient, metricsReporter: NewMetricsReporter(pipelineClient), ttlSecondsAfterWorkflowFinish: ttlSecondsAfterWorkflowFinish, } @@ -70,11 +68,6 @@ func (s *WorkflowSaver) Save(key string, namespace string, name string, nowEpoch return nil } - user, err1 := s.k8sClient.GetNamespaceOwner(namespace) - if err1 != nil { - return util.Wrapf(err1, "Failed get '%v' namespace", namespace) - } - // Save this Workflow to the database. err = s.pipelineClient.ReportWorkflow(wf) retry := util.HasCustomCode(err, util.CUSTOM_CODE_TRANSIENT) @@ -94,5 +87,5 @@ func (s *WorkflowSaver) Save(key string, namespace string, name string, nowEpoch log.WithFields(log.Fields{ "Workflow": name, }).Infof("Syncing Workflow (%v): success, processing complete.", name) - return s.metricsReporter.ReportMetrics(wf, user) + return s.metricsReporter.ReportMetrics(wf) } diff --git a/backend/src/agent/persistence/worker/workflow_saver_test.go b/backend/src/agent/persistence/worker/workflow_saver_test.go index 10a16b7ccd..358f36600c 100644 --- a/backend/src/agent/persistence/worker/workflow_saver_test.go +++ b/backend/src/agent/persistence/worker/workflow_saver_test.go @@ -30,8 +30,6 @@ import ( func TestWorkflow_Save_Success(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) workflow := util.NewWorkflow(&workflowapi.Workflow{ ObjectMeta: metav1.ObjectMeta{ @@ -43,7 +41,7 @@ func TestWorkflow_Save_Success(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -54,10 +52,8 @@ func TestWorkflow_Save_Success(t *testing.T) { func TestWorkflow_Save_NotFoundDuringGet(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -69,12 +65,10 @@ func TestWorkflow_Save_NotFoundDuringGet(t *testing.T) { func TestWorkflow_Save_ErrorDuringGet(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) workflowFake.Put("MY_NAMESPACE", "MY_NAME", nil) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -86,8 +80,6 @@ func TestWorkflow_Save_ErrorDuringGet(t *testing.T) { func TestWorkflow_Save_PermanentFailureWhileReporting(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) pipelineFake.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_PERMANENT, "My Permanent Error")) @@ -102,7 +94,7 @@ func TestWorkflow_Save_PermanentFailureWhileReporting(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -114,8 +106,6 @@ func TestWorkflow_Save_PermanentFailureWhileReporting(t *testing.T) { func TestWorkflow_Save_TransientFailureWhileReporting(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) pipelineFake.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_TRANSIENT, "My Transient Error")) @@ -130,7 +120,7 @@ func TestWorkflow_Save_TransientFailureWhileReporting(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -142,7 +132,6 @@ func TestWorkflow_Save_TransientFailureWhileReporting(t *testing.T) { func TestWorkflow_Save_SkippedDueToFinalStatue(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() // Add this will result in failure unless reporting is skipped pipelineFake.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_PERMANENT, @@ -161,7 +150,7 @@ func TestWorkflow_Save_SkippedDueToFinalStatue(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) @@ -172,8 +161,6 @@ func TestWorkflow_Save_SkippedDueToFinalStatue(t *testing.T) { func TestWorkflow_Save_FinalStatueNotSkippedDueToExceedTTL(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("MY_NAMESPACE", USER) // Add this will result in failure unless reporting is skipped pipelineFake.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_PERMANENT, @@ -195,7 +182,7 @@ func TestWorkflow_Save_FinalStatueNotSkippedDueToExceedTTL(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 1) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 1) // Sleep 2 seconds to make sure workflow passed TTL time.Sleep(2 * time.Second) @@ -210,7 +197,6 @@ func TestWorkflow_Save_FinalStatueNotSkippedDueToExceedTTL(t *testing.T) { func TestWorkflow_Save_SkippedDDueToMissingRunID(t *testing.T) { workflowFake := client.NewWorkflowClientFake() pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() // Add this will result in failure unless reporting is skipped pipelineFake.SetError(util.NewCustomError(fmt.Errorf("Error"), util.CUSTOM_CODE_PERMANENT, @@ -225,33 +211,10 @@ func TestWorkflow_Save_SkippedDDueToMissingRunID(t *testing.T) { workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) + saver := NewWorkflowSaver(workflowFake, pipelineFake, 100) err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) assert.Equal(t, false, util.HasCustomCode(err, util.CUSTOM_CODE_TRANSIENT)) assert.Equal(t, nil, err) } - -func TestWorkflow_Save_FailedToGetUser(t *testing.T) { - workflowFake := client.NewWorkflowClientFake() - pipelineFake := client.NewPipelineClientFake() - k8sClient := client.NewKubernetesCoreFake() - k8sClient.Set("ORIGINAL_NAMESPACE", USER) - - workflow := util.NewWorkflow(&workflowapi.Workflow{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "MY_NAMESPACE", - Name: "MY_NAME", - Labels: map[string]string{util.LabelKeyWorkflowRunId: "MY_UUID"}, - }, - }) - - workflowFake.Put("MY_NAMESPACE", "MY_NAME", workflow) - - saver := NewWorkflowSaver(workflowFake, pipelineFake, k8sClient, 100) - - err := saver.Save("MY_KEY", "MY_NAMESPACE", "MY_NAME", 20) - assert.NotNil(t, err) - assert.Contains(t, err.Error(), fmt.Sprintf("Failed get '%v' namespace", "MY_NAMESPACE")) -} diff --git a/backend/src/common/util/execution_status.go b/backend/src/common/util/execution_status.go index 7eff8a2064..6831e141f4 100644 --- a/backend/src/common/util/execution_status.go +++ b/backend/src/common/util/execution_status.go @@ -31,7 +31,7 @@ type NodeStatus struct { Children []string } -type RetrieveArtifact func(request *api.ReadArtifactRequest, user string) (*api.ReadArtifactResponse, error) +type RetrieveArtifact func(request *api.ReadArtifactRequest) (*api.ReadArtifactResponse, error) // Abstract interface to encapsulate the resources of the execution runtime specifically // for status information. This interface is mainly to access the status related information @@ -61,7 +61,7 @@ type ExecutionStatus interface { // This function was in metrics_reporter.go. Moved to here because it // accesses the orchestration engine specific data struct. encapsulate the // specific data struct and provide a abstract function here. - CollectionMetrics(retrieveArtifact RetrieveArtifact, user string) ([]*api.RunMetric, []error) + CollectionMetrics(retrieveArtifact RetrieveArtifact) ([]*api.RunMetric, []error) // does ExecutionStatus contain any finished node or not HasMetrics() bool diff --git a/backend/src/common/util/workflow.go b/backend/src/common/util/workflow.go index 821f69df5d..64d38dcb45 100644 --- a/backend/src/common/util/workflow.go +++ b/backend/src/common/util/workflow.go @@ -436,12 +436,12 @@ const ( maxMetricsCountLimit = 50 ) -func (w *Workflow) CollectionMetrics(retrieveArtifact RetrieveArtifact, user string) ([]*api.RunMetric, []error) { +func (w *Workflow) CollectionMetrics(retrieveArtifact RetrieveArtifact) ([]*api.RunMetric, []error) { runID := w.Labels[LabelKeyWorkflowRunId] runMetrics := make([]*api.RunMetric, 0, len(w.Status.Nodes)) partialFailures := make([]error, 0, len(w.Status.Nodes)) for _, nodeStatus := range w.Status.Nodes { - nodeMetrics, err := collectNodeMetricsOrNil(runID, &nodeStatus, retrieveArtifact, user) + nodeMetrics, err := collectNodeMetricsOrNil(runID, &nodeStatus, retrieveArtifact) if err != nil { partialFailures = append(partialFailures, err) continue @@ -460,13 +460,13 @@ func (w *Workflow) CollectionMetrics(retrieveArtifact RetrieveArtifact, user str return runMetrics, partialFailures } -func collectNodeMetricsOrNil(runID string, nodeStatus *workflowapi.NodeStatus, retrieveArtifact RetrieveArtifact, user string) ( +func collectNodeMetricsOrNil(runID string, nodeStatus *workflowapi.NodeStatus, retrieveArtifact RetrieveArtifact) ( []*api.RunMetric, error, ) { if !nodeStatus.Completed() { return nil, nil } - metricsJSON, err := readNodeMetricsJSONOrEmpty(runID, nodeStatus, retrieveArtifact, user) + metricsJSON, err := readNodeMetricsJSONOrEmpty(runID, nodeStatus, retrieveArtifact) if err != nil || metricsJSON == "" { return nil, err } @@ -499,7 +499,7 @@ func collectNodeMetricsOrNil(runID string, nodeStatus *workflowapi.NodeStatus, r } func readNodeMetricsJSONOrEmpty(runID string, nodeStatus *workflowapi.NodeStatus, - retrieveArtifact RetrieveArtifact, user string, + retrieveArtifact RetrieveArtifact, ) (string, error) { if nodeStatus.Outputs == nil || nodeStatus.Outputs.Artifacts == nil { return "", nil // No output artifacts, skip the reporting @@ -520,7 +520,7 @@ func readNodeMetricsJSONOrEmpty(runID string, nodeStatus *workflowapi.NodeStatus NodeId: nodeStatus.ID, ArtifactName: metricsArtifactName, } - artifactResponse, err := retrieveArtifact(artifactRequest, user) + artifactResponse, err := retrieveArtifact(artifactRequest) if err != nil { return "", err } diff --git a/backend/third_party_licenses/persistence_agent.csv b/backend/third_party_licenses/persistence_agent.csv index 102c483cbd..31defe0c67 100644 --- a/backend/third_party_licenses/persistence_agent.csv +++ b/backend/third_party_licenses/persistence_agent.csv @@ -15,7 +15,6 @@ github.com/colinmarc/hdfs,https://github.com/colinmarc/hdfs/blob/9746310a4d31/LI github.com/davecgh/go-spew/spew,https://github.com/davecgh/go-spew/blob/v1.1.1/LICENSE,ISC github.com/doublerebel/bellows,https://github.com/doublerebel/bellows/blob/f177d92a03d3/LICENSE,MIT github.com/emicklei/go-restful/v3,https://github.com/emicklei/go-restful/blob/v3.8.0/LICENSE,MIT -github.com/fsnotify/fsnotify,https://github.com/fsnotify/fsnotify/blob/v1.5.1/LICENSE,BSD-3-Clause github.com/go-logr/logr,https://github.com/go-logr/logr/blob/v1.2.2/LICENSE,Apache-2.0 github.com/go-openapi/errors,https://github.com/go-openapi/errors/blob/v0.20.2/LICENSE,Apache-2.0 github.com/go-openapi/jsonpointer,https://github.com/go-openapi/jsonpointer/blob/v0.19.5/LICENSE,Apache-2.0 @@ -34,7 +33,6 @@ github.com/google/uuid,https://github.com/google/uuid/blob/v1.3.0/LICENSE,BSD-3- github.com/gorilla/websocket,https://github.com/gorilla/websocket/blob/v1.5.0/LICENSE,BSD-2-Clause github.com/grpc-ecosystem/grpc-gateway,https://github.com/grpc-ecosystem/grpc-gateway/blob/v1.16.0/LICENSE.txt,BSD-3-Clause github.com/hashicorp/go-uuid,https://github.com/hashicorp/go-uuid/blob/v1.0.2/LICENSE,MPL-2.0 -github.com/hashicorp/hcl,https://github.com/hashicorp/hcl/blob/v1.0.0/LICENSE,MPL-2.0 github.com/huandu/xstrings,https://github.com/huandu/xstrings/blob/v1.3.2/LICENSE,MIT github.com/imdario/mergo,https://github.com/imdario/mergo/blob/v0.3.12/LICENSE,BSD-3-Clause github.com/jcmturner/gofork,https://github.com/jcmturner/gofork/blob/v1.0.0/LICENSE,BSD-3-Clause @@ -44,7 +42,6 @@ github.com/klauspost/compress/flate,https://github.com/klauspost/compress/blob/v github.com/klauspost/pgzip,https://github.com/klauspost/pgzip/blob/v1.2.5/LICENSE,MIT github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0 github.com/lestrrat-go/strftime,https://github.com/lestrrat-go/strftime/blob/v1.0.4/LICENSE,MIT -github.com/magiconair/properties,https://github.com/magiconair/properties/blob/v1.8.5/LICENSE.md,BSD-2-Clause github.com/mailru/easyjson,https://github.com/mailru/easyjson/blob/v0.7.7/LICENSE,MIT github.com/matttproud/golang_protobuf_extensions/pbutil,https://github.com/matttproud/golang_protobuf_extensions/blob/c182affec369/LICENSE,Apache-2.0 github.com/mitchellh/copystructure,https://github.com/mitchellh/copystructure/blob/v1.2.0/LICENSE,MIT @@ -56,7 +53,6 @@ github.com/modern-go/reflect2,https://github.com/modern-go/reflect2/blob/v1.0.2/ github.com/munnerz/goautoneg,https://github.com/munnerz/goautoneg/blob/a7dc8b61c822/LICENSE,BSD-3-Clause github.com/oklog/ulid,https://github.com/oklog/ulid/blob/v1.3.1/LICENSE,Apache-2.0 github.com/oliveagle/jsonpath,https://github.com/oliveagle/jsonpath/blob/2e52cf6e6852/LICENSE,MIT -github.com/pelletier/go-toml,https://github.com/pelletier/go-toml/blob/v1.9.4/LICENSE,Apache-2.0 github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 @@ -66,12 +62,8 @@ github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LI github.com/robfig/cron/v3,https://github.com/robfig/cron/blob/v3.0.1/LICENSE,MIT github.com/shopspring/decimal,https://github.com/shopspring/decimal/blob/v1.2.0/LICENSE,MIT github.com/sirupsen/logrus,https://github.com/sirupsen/logrus/blob/v1.8.1/LICENSE,MIT -github.com/spf13/afero,https://github.com/spf13/afero/blob/v1.8.0/LICENSE.txt,Apache-2.0 github.com/spf13/cast,https://github.com/spf13/cast/blob/v1.4.1/LICENSE,MIT -github.com/spf13/jwalterweatherman,https://github.com/spf13/jwalterweatherman/blob/v1.1.0/LICENSE,MIT github.com/spf13/pflag,https://github.com/spf13/pflag/blob/v1.0.5/LICENSE,BSD-3-Clause -github.com/spf13/viper,https://github.com/spf13/viper/blob/v1.10.1/LICENSE,MIT -github.com/subosito/gotenv,https://github.com/subosito/gotenv/blob/v1.2.0/LICENSE,MIT github.com/valyala/bytebufferpool,https://github.com/valyala/bytebufferpool/blob/v1.0.0/LICENSE,MIT github.com/valyala/fasttemplate,https://github.com/valyala/fasttemplate/blob/v1.2.1/LICENSE,MIT go.mongodb.org/mongo-driver,https://github.com/mongodb/mongo-go-driver/blob/v1.8.2/LICENSE,Apache-2.0 @@ -86,7 +78,6 @@ google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/197313 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause -gopkg.in/ini.v1,https://github.com/go-ini/ini/blob/v1.66.3/LICENSE,Apache-2.0 gopkg.in/jcmturner/aescts.v1,https://github.com/jcmturner/aescts/blob/v1.0.1/LICENSE,Apache-2.0 gopkg.in/jcmturner/dnsutils.v1,https://github.com/jcmturner/dnsutils/blob/v1.0.1/LICENSE,Apache-2.0 gopkg.in/jcmturner/gokrb5.v5,https://github.com/jcmturner/gokrb5/blob/v5.3.0/LICENSE,Apache-2.0 diff --git a/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml b/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml index bd1a0f53df..84371af208 100644 --- a/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml +++ b/manifests/kustomize/base/installs/multi-user/persistence-agent/cluster-role.yaml @@ -27,8 +27,9 @@ rules: verbs: - report - apiGroups: - - '' + - pipelines.kubeflow.org resources: - - namespaces + - runs verbs: - - get \ No newline at end of file + - reportMetrics + - readArtifact diff --git a/manifests/kustomize/base/installs/multi-user/persistence-agent/deployment-patch.yaml b/manifests/kustomize/base/installs/multi-user/persistence-agent/deployment-patch.yaml index a5e7a9fc26..1e165def42 100644 --- a/manifests/kustomize/base/installs/multi-user/persistence-agent/deployment-patch.yaml +++ b/manifests/kustomize/base/installs/multi-user/persistence-agent/deployment-patch.yaml @@ -7,14 +7,7 @@ spec: spec: containers: - name: ml-pipeline-persistenceagent - envFrom: - - configMapRef: - name: persistenceagent-config env: - name: NAMESPACE value: '' valueFrom: null - - name: KUBEFLOW_USERID_HEADER - value: kubeflow-userid - - name: KUBEFLOW_USERID_PREFIX - value: "" \ No newline at end of file diff --git a/manifests/kustomize/base/installs/multi-user/persistence-agent/kustomization.yaml b/manifests/kustomize/base/installs/multi-user/persistence-agent/kustomization.yaml index 560e0fc893..b1f65469e1 100644 --- a/manifests/kustomize/base/installs/multi-user/persistence-agent/kustomization.yaml +++ b/manifests/kustomize/base/installs/multi-user/persistence-agent/kustomization.yaml @@ -3,7 +3,3 @@ kind: Kustomization resources: - cluster-role.yaml - cluster-role-binding.yaml -configMapGenerator: -- name: persistenceagent-config - envs: - - params.env \ No newline at end of file diff --git a/manifests/kustomize/base/installs/multi-user/persistence-agent/params.env b/manifests/kustomize/base/installs/multi-user/persistence-agent/params.env deleted file mode 100644 index 4c3bab70f9..0000000000 --- a/manifests/kustomize/base/installs/multi-user/persistence-agent/params.env +++ /dev/null @@ -1 +0,0 @@ -MULTIUSER=true diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml index 30bea2326a..0d8b504278 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml @@ -25,10 +25,6 @@ spec: value: "86400" - name: NUM_WORKERS value: "2" - - name: KUBEFLOW_USERID_HEADER - value: kubeflow-userid - - name: KUBEFLOW_USERID_PREFIX - value: "" image: gcr.io/ml-pipeline/persistenceagent:dummy imagePullPolicy: IfNotPresent name: ml-pipeline-persistenceagent diff --git a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml index 077d556e10..63bdd03d6a 100644 --- a/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml +++ b/manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-role.yaml @@ -27,8 +27,9 @@ rules: verbs: - report - apiGroups: - - '' + - pipelines.kubeflow.org resources: - - namespaces + - runs verbs: - - get \ No newline at end of file + - reportMetrics + - readArtifact From 1791818323e9da5efc88cdc682c99fbe2ada05c9 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 11 Sep 2023 09:49:15 -0700 Subject: [PATCH 139/253] chore(components): release GCPC SDK 2.3.1 PiperOrigin-RevId: 564416401 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 5 +++++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- components/google-cloud/setup.py | 2 +- 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 89cc16dc10..d162ebe3c0 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.3.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.3.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 0a360f2dc9..a01fb8c71f 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,10 @@ ## Upcoming release +## Release 2.3.1 +* Make LLM pipelines compatible with KFP SDK 2.1.3 +* Require KFP SDK <=2.1.3 +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) + ## Release 2.3.0 * Add `preview.llm.infer_pipeline` and `preview.llm.rlhf_pipeline` * Add `automl_tabular_tabnet_trainer` and `automl_tabular_wide_and_deep_trainer` to `preview.automl.tabular` and `v1.automl.tabular` diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index 5615ee72d8..f311cc5692 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.3.1", + "title": "2.3.1", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.3.0", "title": "2.3.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 4cfe937969..92721a9dc9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.3.0" +__version__ = "2.3.1" diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index dbc7d91ef9..c861843f8d 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -82,7 +82,7 @@ # Pin google-api-core version for the bug fixing in 1.31.5 # https://github.com/googleapis/python-api-core/releases/tag/v1.31.5 "google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "kfp>=2.0.0b10,<3.0.0", + "kfp>=2.0.0b10,<=2.1.3", "google-cloud-aiplatform>=1.14.0,<2", ], project_urls={ From c6b236d1a0a2385421e823512bd4c37041f1af26 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 11 Sep 2023 13:19:35 -0700 Subject: [PATCH 140/253] feat(sdk): support dsl.If, dsl.Elif, and dsl.Else (#9894) * support if/elif/else * deprecate dsl.Condition * alter rebase * update release notes * address review feedback * change BinaryOperation to ConditionOperation --- sdk/RELEASE.md | 1 + sdk/python/kfp/compiler/compiler_test.py | 326 +++++++ sdk/python/kfp/compiler/compiler_utils.py | 33 +- .../kfp/compiler/pipeline_spec_builder.py | 72 +- sdk/python/kfp/dsl/__init__.py | 6 + sdk/python/kfp/dsl/pipeline_channel.py | 19 +- sdk/python/kfp/dsl/pipeline_context.py | 6 + sdk/python/kfp/dsl/tasks_group.py | 181 +++- sdk/python/kfp/dsl/tasks_group_test.py | 24 +- .../test_data/pipelines/if_elif_else.py | 51 + .../test_data/pipelines/if_elif_else.yaml | 280 ++++++ .../pipelines/if_elif_else_complex.py | 86 ++ .../pipelines/if_elif_else_complex.yaml | 910 ++++++++++++++++++ sdk/python/test_data/pipelines/if_else.py | 42 + sdk/python/test_data/pipelines/if_else.yaml | 202 ++++ sdk/python/test_data/test_data_config.yaml | 9 + 16 files changed, 2183 insertions(+), 65 deletions(-) create mode 100644 sdk/python/test_data/pipelines/if_elif_else.py create mode 100644 sdk/python/test_data/pipelines/if_elif_else.yaml create mode 100644 sdk/python/test_data/pipelines/if_elif_else_complex.py create mode 100644 sdk/python/test_data/pipelines/if_elif_else_complex.yaml create mode 100644 sdk/python/test_data/pipelines/if_else.py create mode 100644 sdk/python/test_data/pipelines/if_else.yaml diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 0395401544..bc2effc705 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -2,6 +2,7 @@ ## Features +* Add support for `dsl.If`, `dsl.Elif`, and `dsl.Else` control flow context managers; deprecate `dsl.Condition` in favor of `dsl.If` [\#9894](https://github.com/kubeflow/pipelines/pull/9894) ## Breaking changes diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 92b1f6a1b7..56407cd752 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -40,6 +40,7 @@ from kfp.dsl import OutputPath from kfp.dsl import pipeline_task from kfp.dsl import PipelineTaskFinalStatus +from kfp.dsl import tasks_group from kfp.dsl import yaml_component from kfp.dsl.types import type_utils from kfp.pipeline_spec import pipeline_spec_pb2 @@ -4161,5 +4162,330 @@ def my_pipeline( 'Component output artifact.') +@dsl.component +def flip_coin() -> str: + import random + return 'heads' if random.randint(0, 1) == 0 else 'tails' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + +@dsl.component +def flip_three_sided_coin() -> str: + import random + val = random.randint(0, 2) + + if val == 0: + return 'heads' + elif val == 1: + return 'tails' + else: + return 'draw' + + +@dsl.component +def int_zero_through_three() -> int: + import random + return random.randint(0, 3) + + +class TestConditionLogic(unittest.TestCase): + + def test_if(self): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" + ) + + def test_if_else(self): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.Else(): + print_and_return(text='Got tails!') + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" + ) + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads')" + ) + + def test_if_elif_else(self): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_three_sided_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + print_and_return(text='Got tails!') + with dsl.Else(): + print_and_return(text='Draw!') + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads'" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails'" + ) + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-3'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails')" + ) + + def test_if_multiple_elif_else(self): + + @dsl.pipeline + def int_to_string(): + int_task = int_zero_through_three() + with dsl.If(int_task.output == 0): + print_and_return(text='Got zero!') + with dsl.Elif(int_task.output == 1): + print_and_return(text='Got one!') + with dsl.Elif(int_task.output == 2): + print_and_return(text='Got two!') + with dsl.Else(): + print_and_return(text='Got three!') + + self.assertEqual( + int_to_string.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0" + ) + self.assertEqual( + int_to_string.pipeline_spec.root.dag.tasks['condition-2'] + .trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1" + ) + self.assertEqual( + int_to_string.pipeline_spec.root.dag.tasks['condition-3'] + .trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2" + ) + self.assertEqual( + int_to_string.pipeline_spec.root.dag.tasks['condition-4'] + .trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2)" + ) + + def test_nested_if_elif_else_with_pipeline_param(self): + + @dsl.pipeline + def flip_coin_pipeline(confirm: bool): + int_task = int_zero_through_three() + heads_task = flip_coin() + + with dsl.If(heads_task.output == 'heads'): + with dsl.If(int_task.output == 0): + print_and_return(text='Got zero!') + + with dsl.Elif(int_task.output == 1): + task = print_and_return(text='Got one!') + with dsl.If(confirm == True): + print_and_return(text='Confirmed: definitely got one.') + + with dsl.Elif(int_task.output == 2): + print_and_return(text='Got two!') + + with dsl.Else(): + print_and_return(text='Got three!') + + # top level conditions + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" + ) + # second level nested conditions + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag + .tasks['condition-2'].trigger_policy.condition, + "int(inputs.parameter_values[\'pipelinechannel--int-zero-through-three-Output\']) == 0" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag + .tasks['condition-3'].trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag + .tasks['condition-5'].trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag + .tasks['condition-6'].trigger_policy.condition, + "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2)" + ) + # third level nested conditions + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-3'].dag + .tasks['condition-4'].trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--confirm'] == true") + + def test_multiple_ifs_permitted(self): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.If(flip_coin_task.output == 'tails'): + print_and_return(text='Got tails!') + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'tails'" + ) + + def test_multiple_else_not_permitted(self): + with self.assertRaisesRegex( + tasks_group.InvalidControlFlowException, + r'Cannot use dsl\.Else following another dsl\.Else\. dsl\.Else can only be used following an upstream dsl\.If or dsl\.Elif\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.Else(): + print_and_return(text='Got tails!') + with dsl.Else(): + print_and_return(text='Got tails!') + + def test_else_no_if_not_supported(self): + with self.assertRaisesRegex( + tasks_group.InvalidControlFlowException, + r'dsl\.Else can only be used following an upstream dsl\.If or dsl\.Elif\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(): + with dsl.Else(): + print_and_return(text='Got unknown') + + def test_elif_no_if_not_supported(self): + with self.assertRaisesRegex( + tasks_group.InvalidControlFlowException, + r'dsl\.Elif can only be used following an upstream dsl\.If or dsl\.Elif\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.Elif(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + + def test_boolean_condition_has_helpful_error(self): + with self.assertRaisesRegex( + ValueError, + r'Got constant boolean True as a condition\. This is likely because the provided condition evaluated immediately\. At least one of the operands must be an output from an upstream task or a pipeline parameter\.' + ): + + @dsl.pipeline + def my_pipeline(): + with dsl.Condition('foo' == 'foo'): + print_and_return(text='I will always run.') + + def test_boolean_elif_has_helpful_error(self): + with self.assertRaisesRegex( + ValueError, + r'Got constant boolean False as a condition\. This is likely because the provided condition evaluated immediately\. At least one of the operands must be an output from an upstream task or a pipeline parameter\.' + ): + + @dsl.pipeline + def my_pipeline(text: str): + with dsl.If(text == 'foo'): + print_and_return(text='I will always run.') + with dsl.Elif('foo' == 'bar'): + print_and_return(text='I will never run.') + + def test_tasks_instantiated_between_if_else_and_elif_permitted(self): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads on coin one!') + + flip_coin_task_2 = flip_coin() + + with dsl.Elif(flip_coin_task_2.output == 'tails'): + print_and_return(text='Got heads on coin two!') + + flip_coin_task_3 = flip_coin() + + with dsl.Else(): + print_and_return( + text=f'Coin three result: {flip_coin_task_3.output}') + + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'tails'" + ) + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-3'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'tails')" + ) + + def test_other_control_flow_instantiated_between_if_else_not_permitted( + self): + with self.assertRaisesRegex( + tasks_group.InvalidControlFlowException, + 'dsl\.Else can only be used following an upstream dsl\.If or dsl\.Elif\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.ParallelFor(['foo', 'bar']) as item: + print_and_return(text=item) + with dsl.Else(): + print_and_return(text='Got tails!') + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/compiler/compiler_utils.py b/sdk/python/kfp/compiler/compiler_utils.py index 79c4418bdf..ccc6730b1e 100644 --- a/sdk/python/kfp/compiler/compiler_utils.py +++ b/sdk/python/kfp/compiler/compiler_utils.py @@ -119,7 +119,18 @@ def _get_parent_groups_helper( return (tasks_to_groups, groups_to_groups) -# TODO: do we really need this? +def get_channels_from_condition( + operations: List[pipeline_channel.ConditionOperation], + collected_channels: list, +) -> None: + """Appends to collected_channels each pipeline channels used in each + operand of each operation in operations.""" + for operation in operations: + for operand in [operation.left_operand, operation.right_operand]: + if isinstance(operand, pipeline_channel.PipelineChannel): + collected_channels.append(operand) + + def get_condition_channels_for_tasks( root_group: tasks_group.TasksGroup, ) -> Mapping[str, Set[pipeline_channel.PipelineChannel]]: @@ -139,16 +150,13 @@ def _get_condition_channels_for_tasks_helper( current_conditions_channels, ): new_current_conditions_channels = current_conditions_channels - if isinstance(group, tasks_group.Condition): + if isinstance(group, tasks_group._ConditionBase): new_current_conditions_channels = list(current_conditions_channels) - if isinstance(group.condition.left_operand, - pipeline_channel.PipelineChannel): - new_current_conditions_channels.append( - group.condition.left_operand) - if isinstance(group.condition.right_operand, - pipeline_channel.PipelineChannel): - new_current_conditions_channels.append( - group.condition.right_operand) + get_channels_from_condition( + group.conditions, + new_current_conditions_channels, + ) + for task in group.tasks: for channel in new_current_conditions_channels: conditions[task.name].add(channel) @@ -661,8 +669,9 @@ def get_dependencies( dependent_group = group_name_to_group.get( uncommon_upstream_groups[0], None) - if isinstance(dependent_group, - (tasks_group.Condition, tasks_group.ExitHandler)): + if isinstance( + dependent_group, + (tasks_group._ConditionBase, tasks_group.ExitHandler)): raise InvalidTopologyException( f'{ILLEGAL_CROSS_DAG_ERROR_PREFIX} A downstream task cannot depend on an upstream task within a dsl.{dependent_group.__class__.__name__} context unless the downstream is within that context too. Found task {task.name} which depends on upstream task {upstream_task.name} within an uncommon dsl.{dependent_group.__class__.__name__} context.' ) diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index b276f892c1..75be4eb647 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -709,22 +709,38 @@ def _update_task_spec_for_loop_group( input_name=pipeline_task_spec.parameter_iterator.item_input) -def _resolve_condition_operands( - left_operand: Union[str, pipeline_channel.PipelineChannel], - right_operand: Union[str, pipeline_channel.PipelineChannel], -) -> Tuple[str, str]: - """Resolves values and PipelineChannels for condition operands. +def _binary_operations_to_cel_conjunctive( + operations: List[pipeline_channel.ConditionOperation]) -> str: + """Converts a list of ConditionOperation to a CEL string with placeholders. + Each ConditionOperation will be joined the others via the conjunctive (&&). Args: - left_operand: The left operand of a condition expression. - right_operand: The right operand of a condition expression. + operations: The binary operations to convert to convert and join. Returns: - A tuple of the resolved operands values: - (left_operand_value, right_operand_value). + The binary operations as a CEL string. """ + operands = [ + _single_binary_operation_to_cel_condition(operation=bin_op) + for bin_op in operations + ] + return ' && '.join(operands) - # Pre-scan the operand to get the type of constant value if there's any. + +def _single_binary_operation_to_cel_condition( + operation: pipeline_channel.ConditionOperation) -> str: + """Converts a ConditionOperation to a CEL string with placeholders. + + Args: + operation: The binary operation to convert to a string. + + Returns: + The binary operation as a CEL string. + """ + left_operand = operation.left_operand + right_operand = operation.right_operand + + # cannot make comparisons involving particular types for value_or_reference in [left_operand, right_operand]: if isinstance(value_or_reference, pipeline_channel.PipelineChannel): parameter_type = type_utils.get_parameter_type( @@ -738,8 +754,10 @@ def _resolve_condition_operands( input_name = compiler_utils.additional_input_name_for_pipeline_channel( value_or_reference) raise ValueError( - f'Conditional requires scalar parameter values for comparison. Found input "{input_name}" of type {value_or_reference.channel_type} in pipeline definition instead.' + f'Conditional requires primitive parameter values for comparison. Found input "{input_name}" of type {value_or_reference.channel_type} in pipeline definition instead.' ) + + # ensure the types compared are the same or compatible parameter_types = set() for value_or_reference in [left_operand, right_operand]: if isinstance(value_or_reference, pipeline_channel.PipelineChannel): @@ -822,11 +840,16 @@ def _resolve_condition_operands( operand_values.append(operand_value) - return tuple(operand_values) + left_operand_value, right_operand_value = tuple(operand_values) + + condition_string = ( + f'{left_operand_value} {operation.operator} {right_operand_value}') + + return f'!({condition_string})' if operation.negate else condition_string def _update_task_spec_for_condition_group( - group: tasks_group.Condition, + group: tasks_group._ConditionBase, pipeline_task_spec: pipeline_spec_pb2.PipelineTaskSpec, ) -> None: """Updates PipelineTaskSpec for condition group. @@ -835,15 +858,9 @@ def _update_task_spec_for_condition_group( group: The condition group to update task spec for. pipeline_task_spec: The pipeline task spec to update in place. """ - left_operand_value, right_operand_value = _resolve_condition_operands( - group.condition.left_operand, group.condition.right_operand) - - condition_string = ( - f'{left_operand_value} {group.condition.operator} {right_operand_value}' - ) + condition = _binary_operations_to_cel_conjunctive(group.conditions) pipeline_task_spec.trigger_policy.CopyFrom( - pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy( - condition=condition_string)) + pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(condition=condition)) def build_task_spec_for_exit_task( @@ -954,7 +971,7 @@ def build_task_spec_for_group( group=group, pipeline_task_spec=pipeline_task_spec, ) - elif isinstance(group, tasks_group.Condition): + elif isinstance(group, tasks_group._ConditionBase): _update_task_spec_for_condition_group( group=group, pipeline_task_spec=pipeline_task_spec, @@ -1236,17 +1253,14 @@ def build_spec_by_group( _build_dag_outputs(subgroup_component_spec, subgroup_output_channels) - elif isinstance(subgroup, tasks_group.Condition): + elif isinstance(subgroup, tasks_group._ConditionBase): # "Punch the hole", adding inputs needed by its subgroups or # tasks. condition_subgroup_channels = list(subgroup_input_channels) - for operand in [ - subgroup.condition.left_operand, - subgroup.condition.right_operand, - ]: - if isinstance(operand, pipeline_channel.PipelineChannel): - condition_subgroup_channels.append(operand) + + compiler_utils.get_channels_from_condition( + subgroup.conditions, condition_subgroup_channels) subgroup_component_spec = build_component_spec_for_group( input_pipeline_channels=condition_subgroup_channels, diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index a23b640fdb..001226b02c 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -237,7 +237,10 @@ def my_pipeline(): from kfp.dsl.placeholders import IfPresentPlaceholder from kfp.dsl.structures import ContainerSpec from kfp.dsl.tasks_group import Condition + from kfp.dsl.tasks_group import Elif + from kfp.dsl.tasks_group import Else from kfp.dsl.tasks_group import ExitHandler + from kfp.dsl.tasks_group import If from kfp.dsl.tasks_group import ParallelFor __all__.extend([ 'component', @@ -246,6 +249,9 @@ def my_pipeline(): 'importer', 'ContainerSpec', 'Condition', + 'If', + 'Elif', + 'Else', 'ExitHandler', 'ParallelFor', 'Collected', diff --git a/sdk/python/kfp/dsl/pipeline_channel.py b/sdk/python/kfp/dsl/pipeline_channel.py index 66616103fb..4841928bbf 100644 --- a/sdk/python/kfp/dsl/pipeline_channel.py +++ b/sdk/python/kfp/dsl/pipeline_channel.py @@ -24,17 +24,20 @@ @dataclasses.dataclass -class ConditionOperator: - """Represents a condition expression to be used in dsl.Condition(). +class ConditionOperation: + """Represents a condition expression to be used in condition control flow + group. Attributes: operator: The operator of the condition. left_operand: The left operand. right_operand: The right operand. + negate: Whether to negate the result of the binary operation. """ operator: str left_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] right_operand: Union['PipelineParameterChannel', type_utils.PARAMETER_TYPES] + negate: bool = False # The string template used to generate the placeholder of a PipelineChannel. @@ -149,22 +152,22 @@ def __hash__(self) -> int: return hash(self.pattern) def __eq__(self, other): - return ConditionOperator('==', self, other) + return ConditionOperation('==', self, other) def __ne__(self, other): - return ConditionOperator('!=', self, other) + return ConditionOperation('!=', self, other) def __lt__(self, other): - return ConditionOperator('<', self, other) + return ConditionOperation('<', self, other) def __le__(self, other): - return ConditionOperator('<=', self, other) + return ConditionOperation('<=', self, other) def __gt__(self, other): - return ConditionOperator('>', self, other) + return ConditionOperation('>', self, other) def __ge__(self, other): - return ConditionOperator('>=', self, other) + return ConditionOperation('>=', self, other) class PipelineParameterChannel(PipelineChannel): diff --git a/sdk/python/kfp/dsl/pipeline_context.py b/sdk/python/kfp/dsl/pipeline_context.py index c1304c39ba..72ada197ae 100644 --- a/sdk/python/kfp/dsl/pipeline_context.py +++ b/sdk/python/kfp/dsl/pipeline_context.py @@ -189,6 +189,12 @@ def pop_tasks_group(self): """Removes the current TasksGroup from the stack.""" del self.groups[-1] + def get_last_tasks_group(self) -> Optional['tasks_group.TasksGroup']: + """Gets the last TasksGroup added to the pipeline at the current level + of the pipeline definition.""" + groups = self.groups[-1].groups + return groups[-1] if groups else None + def remove_task_from_groups(self, task: pipeline_task.PipelineTask): """Removes a task from the pipeline. diff --git a/sdk/python/kfp/dsl/tasks_group.py b/sdk/python/kfp/dsl/tasks_group.py index 42d1446a9d..6bf6b63cc0 100644 --- a/sdk/python/kfp/dsl/tasks_group.py +++ b/sdk/python/kfp/dsl/tasks_group.py @@ -13,8 +13,10 @@ # limitations under the License. """Definition for TasksGroup.""" +import copy import enum -from typing import Optional, Union +from typing import List, Optional, Union +import warnings from kfp.dsl import for_loop from kfp.dsl import pipeline_channel @@ -52,7 +54,7 @@ def __init__( group_type: TasksGroupType, name: Optional[str] = None, is_root: bool = False, - ): + ) -> None: """Create a new instance of TasksGroup. Args: @@ -117,7 +119,7 @@ def __init__( self, exit_task: pipeline_task.PipelineTask, name: Optional[str] = None, - ): + ) -> None: """Initializes a Condition task group.""" super().__init__( group_type=TasksGroupType.EXIT_HANDLER, @@ -138,9 +140,31 @@ def __init__( self.exit_task = exit_task -class Condition(TasksGroup): - """A class for creating conditional control flow within a pipeline - definition. +class _ConditionBase(TasksGroup): + """Parent class for condition control flow context managers (Condition, If, + Elif, Else). + + Args: + condition: A list of binary operations to be combined via conjunction. + name: The name of the condition group. + """ + + def __init__( + self, + conditions: List[pipeline_channel.ConditionOperation], + name: Optional[str] = None, + ) -> None: + super().__init__( + group_type=TasksGroupType.CONDITION, + name=name, + is_root=False, + ) + self.conditions: List[pipeline_channel.ConditionOperation] = conditions + + +class If(_ConditionBase): + """A class for creating a conditional control flow "if" block within a + pipeline. Args: condition: A comparative expression that evaluates to True or False. At least one of the operands must be an output from an upstream task or a pipeline parameter. @@ -150,22 +174,151 @@ class Condition(TasksGroup): :: task1 = my_component1(...) - with Condition(task1.output=='pizza', 'pizza-condition'): + with dsl.If(task1.output=='pizza', 'pizza-condition'): task2 = my_component2(...) """ def __init__( self, - condition: pipeline_channel.ConditionOperator, + condition, name: Optional[str] = None, - ): - """Initializes a conditional task group.""" + ) -> None: super().__init__( - group_type=TasksGroupType.CONDITION, + conditions=[condition], name=name, - is_root=False, ) - self.condition = condition + if isinstance(condition, bool): + raise ValueError( + f'Got constant boolean {condition} as a condition. This is likely because the provided condition evaluated immediately. At least one of the operands must be an output from an upstream task or a pipeline parameter.' + ) + copied_condition = copy.copy(condition) + copied_condition.negate = True + self._negated_upstream_conditions = [copied_condition] + + +class Condition(If): + """Deprecated. + + Use dsl.If instead. + """ + + def __enter__(self): + super().__enter__() + warnings.warn( + 'dsl.Condition is deprecated. Please use dsl.If instead.', + category=DeprecationWarning, + stacklevel=2) + return self + + +class Elif(_ConditionBase): + """A class for creating a conditional control flow "else if" block within a + pipeline. Can be used following an upstream dsl.If or dsl.Elif. + + Args: + condition: A comparative expression that evaluates to True or False. At least one of the operands must be an output from an upstream task or a pipeline parameter. + name: The name of the condition group. + + Example: + :: + + task1 = my_component1(...) + task2 = my_component2(...) + with dsl.If(task1.output=='pizza', 'pizza-condition'): + task3 = my_component3(...) + + with dsl.Elif(task2.output=='pasta', 'pasta-condition'): + task4 = my_component4(...) + """ + + def __init__( + self, + condition, + name: Optional[str] = None, + ) -> None: + prev_cond = pipeline_context.Pipeline.get_default_pipeline( + ).get_last_tasks_group() + if not isinstance(prev_cond, (Condition, If, Elif)): + # prefer pushing toward dsl.If rather than dsl.Condition for syntactic consistency with the if-elif-else keywords in Python + raise InvalidControlFlowException( + 'dsl.Elif can only be used following an upstream dsl.If or dsl.Elif.' + ) + + if isinstance(condition, bool): + raise ValueError( + f'Got constant boolean {condition} as a condition. This is likely because the provided condition evaluated immediately. At least one of the operands must be an output from an upstream task or a pipeline parameter.' + ) + + copied_condition = copy.copy(condition) + copied_condition.negate = True + self._negated_upstream_conditions = _shallow_copy_list_of_binary_operations( + prev_cond._negated_upstream_conditions) + [copied_condition] + + conditions = _shallow_copy_list_of_binary_operations( + prev_cond._negated_upstream_conditions) + conditions.append(condition) + + super().__init__( + conditions=conditions, + name=name, + ) + + +class Else(_ConditionBase): + """A class for creating a conditional control flow "else" block within a + pipeline. Can be used following an upstream dsl.If or dsl.Elif. + + Args: + name: The name of the condition group. + + Example: + :: + + task1 = my_component1(...) + task2 = my_component2(...) + with dsl.If(task1.output=='pizza', 'pizza-condition'): + task3 = my_component3(...) + + with dsl.Elif(task2.output=='pasta', 'pasta-condition'): + task4 = my_component4(...) + + with dsl.Else(): + my_component5(...) + """ + + def __init__( + self, + name: Optional[str] = None, + ) -> None: + prev_cond = pipeline_context.Pipeline.get_default_pipeline( + ).get_last_tasks_group() + + if isinstance(prev_cond, Else): + # prefer pushing toward dsl.If rather than dsl.Condition for syntactic consistency with the if-elif-else keywords in Python + raise InvalidControlFlowException( + 'Cannot use dsl.Else following another dsl.Else. dsl.Else can only be used following an upstream dsl.If or dsl.Elif.' + ) + if not isinstance(prev_cond, (Condition, If, Elif)): + # prefer pushing toward dsl.If rather than dsl.Condition for syntactic consistency with the if-elif-else keywords in Python + raise InvalidControlFlowException( + 'dsl.Else can only be used following an upstream dsl.If or dsl.Elif.' + ) + + super().__init__( + conditions=prev_cond._negated_upstream_conditions, + name=name, + ) + + +class InvalidControlFlowException(Exception): + pass + + +def _shallow_copy_list_of_binary_operations( + operations: List[pipeline_channel.ConditionOperation] +) -> List[pipeline_channel.ConditionOperation]: + # shallow copy is sufficient to allow us to invert the negate flag of a ConditionOperation without affecting copies. deep copy not needed and would result in many copies of the full pipeline since PipelineChannels hold references to the pipeline. + return [copy.copy(operation) for operation in operations] class ParallelFor(TasksGroup): @@ -198,7 +351,7 @@ def __init__( items: Union[for_loop.ItemList, pipeline_channel.PipelineChannel], name: Optional[str] = None, parallelism: Optional[int] = None, - ): + ) -> None: """Initializes a for loop task group.""" parallelism = parallelism or 0 if parallelism < 0: diff --git a/sdk/python/kfp/dsl/tasks_group_test.py b/sdk/python/kfp/dsl/tasks_group_test.py index 09ba5cdbc3..40c68ab372 100644 --- a/sdk/python/kfp/dsl/tasks_group_test.py +++ b/sdk/python/kfp/dsl/tasks_group_test.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from absl.testing import parameterized +import unittest + +from kfp import dsl from kfp.dsl import for_loop from kfp.dsl import pipeline_context from kfp.dsl import tasks_group -class ParallelForTest(parameterized.TestCase): +class ParallelForTest(unittest.TestCase): def test_basic(self): loop_items = ['pizza', 'hotdog', 'pasta'] @@ -58,3 +60,21 @@ def test_parallelfor_invalid_parallelism(self): 'ParallelFor parallelism must be >= 0.'): with pipeline_context.Pipeline('pipeline') as p: tasks_group.ParallelFor(items=loop_items, parallelism=-1) + + +class TestConditionDeprecated(unittest.TestCase): + + def test(self): + + @dsl.component + def foo() -> str: + return 'foo' + + @dsl.pipeline + def my_pipeline(string: str): + with self.assertWarnsRegex( + DeprecationWarning, + 'dsl\.Condition is deprecated\. Please use dsl\.If instead\.' + ): + with dsl.Condition(string == 'text'): + foo() diff --git a/sdk/python/test_data/pipelines/if_elif_else.py b/sdk/python/test_data/pipelines/if_elif_else.py new file mode 100644 index 0000000000..fdaa3428f6 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_elif_else.py @@ -0,0 +1,51 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from kfp import compiler +from kfp import dsl + + +@dsl.component +def flip_three_sided_die() -> str: + import random + val = random.randint(0, 2) + + if val == 0: + return 'heads' + elif val == 1: + return 'tails' + else: + return 'draw' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + +@dsl.pipeline +def roll_die_pipeline(): + flip_coin_task = flip_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + print_and_return(text='Got tails!') + with dsl.Else(): + print_and_return(text='Draw!') + + +if __name__ == '__main__': + compiler.Compiler().compile( + pipeline_func=roll_die_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_elif_else.yaml b/sdk/python/test_data/pipelines/if_elif_else.yaml new file mode 100644 index 0000000000..3a353f79a9 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_elif_else.yaml @@ -0,0 +1,280 @@ +# PIPELINE DEFINITION +# Name: roll-die-pipeline +components: + comp-condition-1: + dag: + tasks: + print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return + inputs: + parameters: + text: + runtimeValue: + constant: Got heads! + taskInfo: + name: print-and-return + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + comp-condition-2: + dag: + tasks: + print-and-return-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-2 + inputs: + parameters: + text: + runtimeValue: + constant: Got tails! + taskInfo: + name: print-and-return-2 + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + comp-condition-3: + dag: + tasks: + print-and-return-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-3 + inputs: + parameters: + text: + runtimeValue: + constant: Draw! + taskInfo: + name: print-and-return-3 + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + comp-flip-three-sided-die: + executorLabel: exec-flip-three-sided-die + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return: + executorLabel: exec-print-and-return + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-2: + executorLabel: exec-print-and-return-2 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-3: + executorLabel: exec-print-and-return-3 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING +deploymentSpec: + executors: + exec-flip-three-sided-die: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - flip_three_sided_die + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef flip_three_sided_die() -> str:\n import random\n val =\ + \ random.randint(0, 2)\n\n if val == 0:\n return 'heads'\n \ + \ elif val == 1:\n return 'tails'\n else:\n return 'draw'\n\ + \n" + image: python:3.7 + exec-print-and-return: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 +pipelineInfo: + name: roll-die-pipeline +root: + dag: + tasks: + condition-1: + componentRef: + name: comp-condition-1 + dependentTasks: + - flip-three-sided-die + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-three-sided-die + taskInfo: + name: condition-1 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-three-sided-die-Output'] + == 'heads' + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - flip-three-sided-die + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-three-sided-die + taskInfo: + name: condition-2 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails''' + condition-3: + componentRef: + name: comp-condition-3 + dependentTasks: + - flip-three-sided-die + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-three-sided-die + taskInfo: + name: condition-3 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && !(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails'')' + flip-three-sided-die: + cachingOptions: + enableCache: true + componentRef: + name: comp-flip-three-sided-die + taskInfo: + name: flip-three-sided-die +schemaVersion: 2.1.0 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.py b/sdk/python/test_data/pipelines/if_elif_else_complex.py new file mode 100644 index 0000000000..42623cb508 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.py @@ -0,0 +1,86 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List + +from kfp import compiler +from kfp import dsl + + +@dsl.component +def int_0_to_9999() -> int: + import random + return random.randint(0, 9999) + + +@dsl.component +def is_even_or_odd(num: int) -> str: + return 'odd' if num % 2 else 'even' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + +@dsl.component +def print_strings(strings: List[str]): + print(strings) + + +@dsl.pipeline +def lucky_number_pipeline(add_drumroll: bool = True, + repeat_if_lucky_number: bool = True, + trials: List[int] = [1, 2, 3]): + with dsl.ParallelFor(trials) as trial: + int_task = int_0_to_9999().set_caching_options(False) + with dsl.If(add_drumroll == True): + with dsl.If(trial == 3): + print_and_return(text='Adding drumroll on last trial!') + + with dsl.If(int_task.output < 5000): + + even_or_odd_task = is_even_or_odd(num=int_task.output) + + with dsl.If(even_or_odd_task.output == 'even'): + print_and_return(text='Got a low even number!') + with dsl.Else(): + print_and_return(text='Got a low odd number!') + + with dsl.Elif(int_task.output > 5000): + + even_or_odd_task = is_even_or_odd(num=int_task.output) + + with dsl.If(even_or_odd_task.output == 'even'): + print_and_return(text='Got a high even number!') + with dsl.Else(): + print_and_return(text='Got a high odd number!') + + with dsl.Else(): + print_and_return( + text='Announcing: Got the lucky number 5000! A one in 10,000 chance.' + ) + with dsl.If(repeat_if_lucky_number == True): + with dsl.ParallelFor([1, 2]) as _: + print_and_return( + text='Announcing again: Got the lucky number 5000! A one in 10,000 chance.' + ) + + print_strings(strings=dsl.Collected(even_or_odd_task.output)) + + +if __name__ == '__main__': + compiler.Compiler().compile( + pipeline_func=lucky_number_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml new file mode 100644 index 0000000000..7fe0116c46 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml @@ -0,0 +1,910 @@ +# PIPELINE DEFINITION +# Name: lucky-number-pipeline +# Inputs: +# add_drumroll: bool [Default: True] +# repeat_if_lucky_number: bool [Default: True] +# trials: list [Default: [1.0, 2.0, 3.0]] +components: + comp-condition-10: + dag: + tasks: + condition-11: + componentRef: + name: comp-condition-11 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: pipelinechannel--repeat_if_lucky_number + taskInfo: + name: condition-11 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--repeat_if_lucky_number'] + == true + print-and-return-6: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-6 + inputs: + parameters: + text: + runtimeValue: + constant: 'Announcing: Got the lucky number 5000! A one in 10,000 + chance.' + taskInfo: + name: print-and-return-6 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--repeat_if_lucky_number: + parameterType: BOOLEAN + comp-condition-11: + dag: + tasks: + for-loop-13: + componentRef: + name: comp-for-loop-13 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: pipelinechannel--repeat_if_lucky_number + parameterIterator: + itemInput: pipelinechannel--loop-item-param-12 + items: + raw: '[1, 2]' + taskInfo: + name: for-loop-13 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--repeat_if_lucky_number: + parameterType: BOOLEAN + comp-condition-2: + dag: + tasks: + condition-3: + componentRef: + name: comp-condition-3 + inputs: + parameters: + pipelinechannel--add_drumroll: + componentInputParameter: pipelinechannel--add_drumroll + pipelinechannel--trials-loop-item: + componentInputParameter: pipelinechannel--trials-loop-item + taskInfo: + name: condition-3 + triggerPolicy: + condition: int(inputs.parameter_values['pipelinechannel--trials-loop-item']) + == 3 + inputDefinitions: + parameters: + pipelinechannel--add_drumroll: + parameterType: BOOLEAN + pipelinechannel--trials-loop-item: + parameterType: NUMBER_INTEGER + comp-condition-3: + dag: + tasks: + print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return + inputs: + parameters: + text: + runtimeValue: + constant: Adding drumroll on last trial! + taskInfo: + name: print-and-return + inputDefinitions: + parameters: + pipelinechannel--add_drumroll: + parameterType: BOOLEAN + pipelinechannel--trials-loop-item: + parameterType: NUMBER_INTEGER + comp-condition-4: + dag: + tasks: + condition-5: + componentRef: + name: comp-condition-5 + dependentTasks: + - is-even-or-odd + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: is-even-or-odd + taskInfo: + name: condition-5 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-Output'] + == 'even' + condition-6: + componentRef: + name: comp-condition-6 + dependentTasks: + - is-even-or-odd + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: is-even-or-odd + taskInfo: + name: condition-6 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-Output''] + == ''even'')' + is-even-or-odd: + cachingOptions: + enableCache: true + componentRef: + name: comp-is-even-or-odd + inputs: + parameters: + num: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + taskInfo: + name: is-even-or-odd + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + comp-condition-5: + dag: + tasks: + print-and-return-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-2 + inputs: + parameters: + text: + runtimeValue: + constant: Got a low even number! + taskInfo: + name: print-and-return-2 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-Output: + parameterType: STRING + comp-condition-6: + dag: + tasks: + print-and-return-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-3 + inputs: + parameters: + text: + runtimeValue: + constant: Got a low odd number! + taskInfo: + name: print-and-return-3 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-Output: + parameterType: STRING + comp-condition-7: + dag: + outputs: + parameters: + pipelinechannel--is-even-or-odd-2-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: is-even-or-odd-2 + tasks: + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - is-even-or-odd-2 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: is-even-or-odd-2 + taskInfo: + name: condition-8 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-2-Output'] + == 'even' + condition-9: + componentRef: + name: comp-condition-9 + dependentTasks: + - is-even-or-odd-2 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: is-even-or-odd-2 + taskInfo: + name: condition-9 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-2-Output''] + == ''even'')' + is-even-or-odd-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-is-even-or-odd-2 + inputs: + parameters: + num: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + taskInfo: + name: is-even-or-odd-2 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + pipelinechannel--is-even-or-odd-2-Output: + parameterType: NUMBER_INTEGER + comp-condition-8: + dag: + tasks: + print-and-return-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-4 + inputs: + parameters: + text: + runtimeValue: + constant: Got a high even number! + taskInfo: + name: print-and-return-4 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-2-Output: + parameterType: STRING + comp-condition-9: + dag: + tasks: + print-and-return-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-5 + inputs: + parameters: + text: + runtimeValue: + constant: Got a high odd number! + taskInfo: + name: print-and-return-5 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-2-Output: + parameterType: STRING + comp-for-loop-1: + dag: + outputs: + parameters: + pipelinechannel--is-even-or-odd-2-Output: + valueFromParameter: + outputParameterKey: pipelinechannel--is-even-or-odd-2-Output + producerSubtask: condition-7 + tasks: + condition-10: + componentRef: + name: comp-condition-10 + dependentTasks: + - int-0-to-9999 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: int-0-to-9999 + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: pipelinechannel--repeat_if_lucky_number + taskInfo: + name: condition-10 + triggerPolicy: + condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + < 5000) && !(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + > 5000)' + condition-2: + componentRef: + name: comp-condition-2 + inputs: + parameters: + pipelinechannel--add_drumroll: + componentInputParameter: pipelinechannel--add_drumroll + pipelinechannel--trials-loop-item: + componentInputParameter: pipelinechannel--trials-loop-item + taskInfo: + name: condition-2 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--add_drumroll'] == + true + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - int-0-to-9999 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: int-0-to-9999 + taskInfo: + name: condition-4 + triggerPolicy: + condition: int(inputs.parameter_values['pipelinechannel--int-0-to-9999-Output']) + < 5000 + condition-7: + componentRef: + name: comp-condition-7 + dependentTasks: + - int-0-to-9999 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: int-0-to-9999 + taskInfo: + name: condition-7 + triggerPolicy: + condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + < 5000) && int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + > 5000' + int-0-to-9999: + cachingOptions: {} + componentRef: + name: comp-int-0-to-9999 + taskInfo: + name: int-0-to-9999 + inputDefinitions: + parameters: + pipelinechannel--add_drumroll: + parameterType: BOOLEAN + pipelinechannel--repeat_if_lucky_number: + parameterType: BOOLEAN + pipelinechannel--trials: + parameterType: LIST + pipelinechannel--trials-loop-item: + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + pipelinechannel--is-even-or-odd-2-Output: + parameterType: NUMBER_INTEGER + comp-for-loop-13: + dag: + tasks: + print-and-return-7: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-7 + inputs: + parameters: + text: + runtimeValue: + constant: 'Announcing again: Got the lucky number 5000! A one in + 10,000 chance.' + taskInfo: + name: print-and-return-7 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--loop-item-param-12: + parameterType: NUMBER_INTEGER + pipelinechannel--repeat_if_lucky_number: + parameterType: BOOLEAN + comp-int-0-to-9999: + executorLabel: exec-int-0-to-9999 + outputDefinitions: + parameters: + Output: + parameterType: NUMBER_INTEGER + comp-is-even-or-odd: + executorLabel: exec-is-even-or-odd + inputDefinitions: + parameters: + num: + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-is-even-or-odd-2: + executorLabel: exec-is-even-or-odd-2 + inputDefinitions: + parameters: + num: + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return: + executorLabel: exec-print-and-return + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-2: + executorLabel: exec-print-and-return-2 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-3: + executorLabel: exec-print-and-return-3 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-4: + executorLabel: exec-print-and-return-4 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-5: + executorLabel: exec-print-and-return-5 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-6: + executorLabel: exec-print-and-return-6 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-7: + executorLabel: exec-print-and-return-7 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-strings: + executorLabel: exec-print-strings + inputDefinitions: + parameters: + strings: + parameterType: LIST +deploymentSpec: + executors: + exec-int-0-to-9999: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - int_0_to_9999 + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef int_0_to_9999() -> int:\n import random\n return random.randint(0,\ + \ 9999)\n\n" + image: python:3.7 + exec-is-even-or-odd: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - is_even_or_odd + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef is_even_or_odd(num: int) -> str:\n return 'odd' if num % 2\ + \ else 'even'\n\n" + image: python:3.7 + exec-is-even-or-odd-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - is_even_or_odd + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef is_even_or_odd(num: int) -> str:\n return 'odd' if num % 2\ + \ else 'even'\n\n" + image: python:3.7 + exec-print-and-return: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-4: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-5: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-6: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-7: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-strings: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_strings + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_strings(strings: List[str]):\n print(strings)\n\n" + image: python:3.7 +pipelineInfo: + name: lucky-number-pipeline +root: + dag: + tasks: + for-loop-1: + componentRef: + name: comp-for-loop-1 + inputs: + parameters: + pipelinechannel--add_drumroll: + componentInputParameter: add_drumroll + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: repeat_if_lucky_number + pipelinechannel--trials: + componentInputParameter: trials + parameterIterator: + itemInput: pipelinechannel--trials-loop-item + items: + inputParameter: pipelinechannel--trials + taskInfo: + name: for-loop-1 + print-strings: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-strings + dependentTasks: + - for-loop-1 + inputs: + parameters: + strings: + taskOutputParameter: + outputParameterKey: pipelinechannel--is-even-or-odd-2-Output + producerTask: for-loop-1 + taskInfo: + name: print-strings + inputDefinitions: + parameters: + add_drumroll: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + repeat_if_lucky_number: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + trials: + defaultValue: + - 1.0 + - 2.0 + - 3.0 + isOptional: true + parameterType: LIST +schemaVersion: 2.1.0 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/if_else.py b/sdk/python/test_data/pipelines/if_else.py new file mode 100644 index 0000000000..1da8a074ac --- /dev/null +++ b/sdk/python/test_data/pipelines/if_else.py @@ -0,0 +1,42 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from kfp import compiler +from kfp import dsl + + +@dsl.component +def flip_coin() -> str: + import random + return 'heads' if random.randint(0, 1) == 0 else 'tails' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + +@dsl.pipeline +def flip_coin_pipeline(): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_and_return(text='Got heads!') + with dsl.Else(): + print_and_return(text='Got tails!') + + +if __name__ == '__main__': + compiler.Compiler().compile( + pipeline_func=flip_coin_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_else.yaml b/sdk/python/test_data/pipelines/if_else.yaml new file mode 100644 index 0000000000..a64ff7b87d --- /dev/null +++ b/sdk/python/test_data/pipelines/if_else.yaml @@ -0,0 +1,202 @@ +# PIPELINE DEFINITION +# Name: flip-coin-pipeline +components: + comp-condition-1: + dag: + tasks: + print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return + inputs: + parameters: + text: + runtimeValue: + constant: Got heads! + taskInfo: + name: print-and-return + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING + comp-condition-2: + dag: + tasks: + print-and-return-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-2 + inputs: + parameters: + text: + runtimeValue: + constant: Got tails! + taskInfo: + name: print-and-return-2 + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING + comp-flip-coin: + executorLabel: exec-flip-coin + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return: + executorLabel: exec-print-and-return + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-2: + executorLabel: exec-print-and-return-2 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING +deploymentSpec: + executors: + exec-flip-coin: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - flip_coin + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef flip_coin() -> str:\n import random\n return 'heads' if\ + \ random.randint(0, 1) == 0 else 'tails'\n\n" + image: python:3.7 + exec-print-and-return: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 +pipelineInfo: + name: flip-coin-pipeline +root: + dag: + tasks: + condition-1: + componentRef: + name: comp-condition-1 + dependentTasks: + - flip-coin + inputs: + parameters: + pipelinechannel--flip-coin-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-coin + taskInfo: + name: condition-1 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] + == 'heads' + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - flip-coin + inputs: + parameters: + pipelinechannel--flip-coin-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-coin + taskInfo: + name: condition-2 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] + == ''heads'')' + flip-coin: + cachingOptions: + enableCache: true + componentRef: + name: comp-flip-coin + taskInfo: + name: flip-coin +schemaVersion: 2.1.0 +sdkVersion: kfp-2.1.2 diff --git a/sdk/python/test_data/test_data_config.yaml b/sdk/python/test_data/test_data_config.yaml index 02aae9d1da..b40267f35c 100644 --- a/sdk/python/test_data/test_data_config.yaml +++ b/sdk/python/test_data/test_data_config.yaml @@ -168,6 +168,15 @@ pipelines: - module: pipeline_with_metadata_fields name: dataset_concatenator execute: false + - module: if_else + name: flip_coin_pipeline + execute: false + - module: if_elif_else + name: roll_die_pipeline + execute: false + - module: if_elif_else_complex + name: lucky_number_pipeline + execute: false components: test_data_dir: sdk/python/test_data/components read: true From 0ced6ec7d2846faefc655bad5ea549f81cfcd373 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 12 Sep 2023 11:38:40 -0700 Subject: [PATCH 141/253] feat(components): Implement chunking for embedding evaluation pipeline PiperOrigin-RevId: 564786040 --- .../evaluation_llm_embedding_pipeline.py | 13 +++++++++++++ .../llm_embedding_retrieval/component.py | 11 ++++++++--- .../component.py | 8 ++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py index d6665b2b0b..899c43c78d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py @@ -34,6 +34,9 @@ def evaluation_llm_embedding_pipeline( query_gcs_source: str, golden_docs_gcs_source: str, model_name: str, + embedding_chunk_size: int = 0, + embedding_chunk_overlap: int = 0, + embedding_retrieval_combination_function: str = 'max', batch_predict_instances_format: str = 'jsonl', batch_predict_predictions_format: str = 'jsonl', embedding_retrieval_top_n: int = 10, @@ -62,6 +65,13 @@ def evaluation_llm_embedding_pipeline( golden_docs_gcs_source: The gcs location for csv file containing mapping of each query to the golden docs. model_name: The path for model to generate embeddings. + embedding_chunk_size: The length of each document chunk. If 0, chunking is + not enabled. + embedding_chunk_overlap: The length of the overlap part between adjacent + chunks. Will only be used if embedding_chunk_size > 0. + embedding_retrieval_combination_function: The function to combine + query-chunk similarities to query-doc similarity. Supported functions are + avg, max, and sum. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. If not set, default to "jsonl". For more details about this input config, see @@ -117,6 +127,8 @@ def evaluation_llm_embedding_pipeline( service_account=service_account, network=network, runner=runner, + embedding_chunk_size=embedding_chunk_size, + embedding_chunk_overlap=embedding_chunk_overlap, dataflow_service_account=dataflow_service_account, dataflow_disk_size_gb=dataflow_disk_size_gb, dataflow_machine_type=dataflow_machine_type, @@ -181,6 +193,7 @@ def evaluation_llm_embedding_pipeline( 'gcs_output_directory' ], embedding_retrieval_top_n=embedding_retrieval_top_n, + embedding_retrieval_combination_function=embedding_retrieval_combination_function, machine_type=machine_type, service_account=service_account, network=network, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py index ba0a22dd73..da49f11904 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py @@ -34,6 +34,7 @@ def llm_embedding_retrieval( query_embedding_source_directory: Input[Artifact], doc_embedding_source_directory: Input[Artifact], embedding_retrieval_top_n: int, + embedding_retrieval_combination_function: str = 'max', display_name: str = 'llm_embedding_retrieval_component', machine_type: str = 'e2-highmem-16', service_account: str = '', @@ -53,12 +54,15 @@ def llm_embedding_retrieval( Args: project: Required. The GCP project that runs the pipeline component. location: Required. The GCP region that runs the pipeline component. - query_embedding_source_directory: Required. Directory where query embedding + query_embedding_source_directory: Required. Directory where query + embedding results are saved. + doc_embedding_source_directory: Required. Directory where doc embedding results are saved. - doc_embedding_source_directory: Required. Directory where doc embedding results - are saved. embedding_retrieval_top_n: Required. Top N docs will be retrieved for each query, based on similarity. + embedding_retrieval_combination_function: The function to combine + query-chunk similarities to query-doc similarity. Supported functions + are avg, max, and sum. display_name: The name of the Evaluation job. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: @@ -118,6 +122,7 @@ def llm_embedding_retrieval( f'--query_embedding_source_directory={query_embedding_source_directory.path}', f'--doc_embedding_source_directory={doc_embedding_source_directory.path}', f'--embedding_retrieval_top_n={embedding_retrieval_top_n}', + f'--embedding_retrieval_combination_function={embedding_retrieval_combination_function}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--gcp_resources={gcp_resources}', f'--embedding_retrieval_results_path={embedding_retrieval_results_path}', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py index 47033772af..8653b916c2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -34,6 +34,8 @@ def llm_information_retrieval_preprocessor( corpus_gcs_source: str, query_gcs_source: str, golden_docs_gcs_source: str, + embedding_chunk_size: int = 0, + embedding_chunk_overlap: int = 0, display_name: str = 'information-retrieval-preprocessor', machine_type: str = 'e2-highmem-16', service_account: str = '', @@ -61,6 +63,10 @@ def llm_information_retrieval_preprocessor( documents. golden_docs_gcs_source: Required. The path for csv file containing mapping of each query to the golden docs. + embedding_chunk_size: The length of each document chunk. If 0, chunking is + not enabled. + embedding_chunk_overlap: The length of the overlap part between adjacent + chunks. Will only be used if embedding_chunk_size > 0. display_name: The name of the Evaluation job. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: @@ -132,6 +138,8 @@ def llm_information_retrieval_preprocessor( f'--predictions_query_gcs_source={predictions_query_gcs_source}', f'--predictions_corpus_gcs_source={predictions_corpus_gcs_source}', f'--embedding_retrieval_gcs_source={embedding_retrieval_gcs_source}', + f'--embedding_chunk_size={embedding_chunk_size}', + f'--embedding_chunk_overlap={embedding_chunk_overlap}', f'--runner={runner}', f'--dataflow_service_account={dataflow_service_account}', f'--dataflow_disk_size={dataflow_disk_size_gb}', From abf05f48191b214bf5e993cd4cc725ff793d544c Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 12 Sep 2023 13:05:16 -0700 Subject: [PATCH 142/253] feat(components): fork a subset of `v1` `custom_job` and `gcp_launcher` container code to `preview` PiperOrigin-RevId: 564809626 --- .../container/preview/custom_job/__init__.py | 14 ++ .../container/preview/custom_job/launcher.py | 56 +++++ .../preview/custom_job/remote_runner.py | 97 ++++++++ .../preview/gcp_launcher/__init__.py | 14 ++ .../preview/gcp_launcher/job_remote_runner.py | 235 ++++++++++++++++++ .../preview/custom_job/__init__.py | 29 +++ 6 files changed, 445 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/launcher.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/job_remote_runner.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py diff --git a/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/__init__.py new file mode 100644 index 0000000000..11b29089f1 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2022 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Components - Custom Job Launcher and Remote Runner.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/launcher.py b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/launcher.py new file mode 100644 index 0000000000..d9c7237421 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/launcher.py @@ -0,0 +1,56 @@ +# Copyright 2021 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP launcher for custom jobs based on the AI Platform SDK.""" + +import logging +import sys + +from google_cloud_pipeline_components.container.preview.custom_job import remote_runner +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import parser_util + + +def _parse_args(args): + """Parse command line arguments.""" + _, parsed_args = parser_util.parse_default_args(args) + return vars(parsed_args) + + +def main(argv): + """Main entry. + + Expected input args are as follows: + Project - Required. The project of which the resource will be launched. + Region - Required. The region of which the resource will be launched. + Type - Required. GCP launcher is a single container. This Enum will + specify which resource to be launched. + Request payload - Required. The full serialized json of the resource spec. + Note this can contain the Pipeline Placeholders. + gcp_resources - placeholder output for returning job_id. + + Args: + argv: A list of system arguments. + """ + parsed_args = _parse_args(argv) + job_type = parsed_args['type'] + + if job_type != 'CustomJob': + raise ValueError('Incorrect job type: ' + job_type) + + logging.info('Job started for type: ' + job_type) + + remote_runner.create_custom_job(**parsed_args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py new file mode 100644 index 0000000000..d72f35833a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/preview/custom_job/remote_runner.py @@ -0,0 +1,97 @@ +# Copyright 2022 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP launcher for custom jobs based on the AI Platform SDK.""" + +import json + +from google.api_core import retry +from google_cloud_pipeline_components.container.preview.gcp_launcher import job_remote_runner +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import gcp_labels_util + +_CUSTOM_JOB_RETRY_DEADLINE_SECONDS = 10.0 * 60.0 +LABELS_PAYLOAD_KEY = 'labels' + + +def insert_system_labels_into_payload(payload): + job_spec = json.loads(payload) + job_spec[LABELS_PAYLOAD_KEY] = gcp_labels_util.attach_system_labels( + job_spec[LABELS_PAYLOAD_KEY] if LABELS_PAYLOAD_KEY in job_spec else {} + ) + return json.dumps(job_spec) + + +def create_custom_job_with_client(job_client, parent, job_spec): + create_custom_job_fn = None + try: + create_custom_job_fn = job_client.create_custom_job( + parent=parent, custom_job=job_spec + ) + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) + return create_custom_job_fn + + +def get_custom_job_with_client(job_client, job_name): + get_custom_job_fn = None + try: + get_custom_job_fn = job_client.get_custom_job( + name=job_name, + retry=retry.Retry(deadline=_CUSTOM_JOB_RETRY_DEADLINE_SECONDS), + ) + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) + return get_custom_job_fn + + +def create_custom_job( + type, + project, + location, + payload, + gcp_resources, +): + """Create and poll custom job status till it reaches a final state. + + This follows the typical launching logic: + 1. Read if the custom job already exists in gcp_resources + - If already exists, jump to step 3 and poll the job status. This happens + if the launcher container experienced unexpected termination, such as + preemption + 2. Deserialize the payload into the job spec and create the custom job + 3. Poll the custom job status every _POLLING_INTERVAL_IN_SECONDS seconds + - If the custom job is succeeded, return succeeded + - If the custom job is cancelled/paused, it's an unexpected scenario so + return failed + - If the custom job is running, continue polling the status + Also retry on ConnectionError up to + job_remote_runner._CONNECTION_ERROR_RETRY_LIMIT times during the poll. + """ + remote_runner = job_remote_runner.JobRemoteRunner( + type, project, location, gcp_resources + ) + + try: + # Create custom job if it does not exist + job_name = remote_runner.check_if_job_exists() + if job_name is None: + job_name = remote_runner.create_job( + create_custom_job_with_client, + insert_system_labels_into_payload(payload), + ) + + # Poll custom job status until "JobState.JOB_STATE_SUCCEEDED" + remote_runner.poll_job(get_custom_job_with_client, job_name) + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/__init__.py b/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/__init__.py new file mode 100644 index 0000000000..4baaf5d93f --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2021 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP Launcher Modules for google cloud components.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/job_remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/job_remote_runner.py new file mode 100644 index 0000000000..1ff4e6fd8e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/preview/gcp_launcher/job_remote_runner.py @@ -0,0 +1,235 @@ +# Copyright 2021 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common module for launching and managing the Vertex Job resources.""" + +import json +import logging +import os +from os import path +import re +import time +from typing import Optional + +from google.api_core import client_options +from google.api_core import gapic_v1 +import google.auth +import google.auth.transport.requests +from google.cloud import aiplatform_v1beta1 +from google.cloud.aiplatform_v1beta1.types import job_state as gca_job_state +from google_cloud_pipeline_components.container.utils import execution_context +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util +from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources +import requests + +from google.protobuf import json_format + +_POLLING_INTERVAL_IN_SECONDS = 20 +_CONNECTION_ERROR_RETRY_LIMIT = 5 + +_JOB_COMPLETE_STATES = ( + gca_job_state.JobState.JOB_STATE_SUCCEEDED, + gca_job_state.JobState.JOB_STATE_FAILED, + gca_job_state.JobState.JOB_STATE_CANCELLED, + gca_job_state.JobState.JOB_STATE_PAUSED, +) + +_JOB_ERROR_STATES = ( + gca_job_state.JobState.JOB_STATE_FAILED, + gca_job_state.JobState.JOB_STATE_CANCELLED, + gca_job_state.JobState.JOB_STATE_PAUSED, +) + +# Job error codes mapping can be found in: +# https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto +_JOB_USER_ERROR_CODES = ( + 3, # INVALID_ARGUMENT + 5, # NOT_FOUND + 7, # PERMISSION_DENIED + 6, # ALREADY_EXISTS + 9, # FAILED_PRECONDITION + 11, # OUT_OF_RANGE + 12, # UNIMPLEMENTED +) + + +class JobRemoteRunner: + """Common module for creating and poll jobs on the Vertex Platform.""" + + def __init__(self, job_type, project, location, gcp_resources): + """Initializes a job client and other common attributes.""" + self.job_type = job_type + self.project = project + self.location = location + self.gcp_resources = gcp_resources + self.client_options = client_options.ClientOptions( + api_endpoint=location + '-aiplatform.googleapis.com' + ) + self.client_info = gapic_v1.client_info.ClientInfo( + user_agent='google-cloud-pipeline-components' + ) + self.job_client = aiplatform_v1beta1.JobServiceClient( + client_options=self.client_options, client_info=self.client_info + ) + self.job_uri_prefix = f'https://{self.client_options.api_endpoint}/v1beta1/' + self.poll_job_name = '' + + def check_if_job_exists(self) -> Optional[str]: + """Check if the job already exists.""" + if ( + path.exists(self.gcp_resources) + and os.stat(self.gcp_resources).st_size != 0 + ): + with open(self.gcp_resources) as f: + serialized_gcp_resources = f.read() + job_resources = json_format.Parse( + serialized_gcp_resources, GcpResources() + ) + # Resources should only contain one item. + if len(job_resources.resources) != 1: + raise ValueError( + 'gcp_resources should contain one resource, found' + f' {len(job_resources.resources)}' + ) + + job_name_group = re.findall( + f'{self.job_uri_prefix}(.*)', + job_resources.resources[0].resource_uri, + ) + + if not job_name_group or not job_name_group[0]: + raise ValueError( + 'Job Name in gcp_resource is not formatted correctly or is empty.' + ) + job_name = job_name_group[0] + + logging.info( + '%s name already exists: %s. Continue polling the status', + self.job_type, + job_name, + ) + return job_name + else: + return None + + def create_job(self, create_job_fn, payload) -> str: + """Create a job.""" + parent = f'projects/{self.project}/locations/{self.location}' + # TODO(kevinbnaughton) remove empty fields from the spec temporarily. + job_spec = json_util.recursive_remove_empty( + json.loads(payload, strict=False) + ) + create_job_response = create_job_fn(self.job_client, parent, job_spec) + job_name = create_job_response.name + + # Write the job proto to output. + job_resources = GcpResources() + job_resource = job_resources.resources.add() + job_resource.resource_type = self.job_type + job_resource.resource_uri = f'{self.job_uri_prefix}{job_name}' + + with open(self.gcp_resources, 'w') as f: + f.write(json_format.MessageToJson(job_resources)) + + return job_name + + def poll_job(self, get_job_fn, job_name: str): + """Poll the job status.""" + with execution_context.ExecutionContext( + on_cancel=lambda: self.send_cancel_request(job_name) + ): + retry_count = 0 + while True: + try: + get_job_response = get_job_fn(self.job_client, job_name) + retry_count = 0 + # Handle transient connection error. + except ConnectionError as err: + retry_count += 1 + if retry_count < _CONNECTION_ERROR_RETRY_LIMIT: + logging.warning( + ( + 'ConnectionError (%s) encountered when polling job: %s.' + ' Trying to recreate the API client.' + ), + err, + job_name, + ) + # Recreate the Python API client. + self.job_client = aiplatform_v1beta1.JobServiceClient( + self.client_options, self.client_info + ) + logging.info( + 'Waiting for %s seconds for next poll.', + _POLLING_INTERVAL_IN_SECONDS, + ) + time.sleep(_POLLING_INTERVAL_IN_SECONDS) + continue + else: + # TODO(ruifang) propagate the error. + # Exit with an internal error code. + error_util.exit_with_internal_error( + f'Request failed after {_CONNECTION_ERROR_RETRY_LIMIT} retries.' + ) + + if get_job_response.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED: + logging.info( + 'Get%s response state =%s', self.job_type, get_job_response.state + ) + return get_job_response + elif get_job_response.state in _JOB_ERROR_STATES: + # TODO(ruifang) propagate the error. + if get_job_response.error.code in _JOB_USER_ERROR_CODES: + raise ValueError( + 'Job failed with value error in error state: {}.'.format( + get_job_response.state + ) + ) + else: + raise RuntimeError( + 'Job failed with error state: {}.'.format( + get_job_response.state + ) + ) + else: + logging.info( + ( + 'Job %s is in a non-final state %s.' + ' Waiting for %s seconds for next poll.' + ), + job_name, + get_job_response.state, + _POLLING_INTERVAL_IN_SECONDS, + ) + time.sleep(_POLLING_INTERVAL_IN_SECONDS) + + def send_cancel_request(self, job_name: str): + if not job_name: + return + creds, _ = google.auth.default( + scopes=['https://www.googleapis.com/auth/cloud-platform'] + ) + if not creds.valid: + creds.refresh(google.auth.transport.requests.Request()) + headers = { + 'Content-type': 'application/json', + 'Authorization': 'Bearer ' + creds.token, + } + # Vertex AI cancel APIs: + # https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.hyperparameterTuningJobs/cancel + # https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs/cancel + # https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/cancel + requests.post( + url=f'{self.job_uri_prefix}{job_name}:cancel', data='', headers=headers + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py new file mode 100644 index 0000000000..ac8c9aeb78 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# fmt: off +"""Run KFP components as [Vertex AI Custom Training Jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) with +customized worker and cloud configurations. +""" +# fmt: on + +from google_cloud_pipeline_components.preview.custom_job.component import custom_training_job as CustomTrainingJobOp +from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_from_component +from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_op_from_component + +__all__ = [ + 'CustomTrainingJobOp', + 'create_custom_training_job_op_from_component', + 'create_custom_training_job_from_component', +] From 4bee3d8dc2ee9c33d87e1058bac2a94d899dd4a5 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 12 Sep 2023 16:07:25 -0700 Subject: [PATCH 143/253] chore(components): Add public-image-gcpc-v2-scan tag for latest gcpc image PiperOrigin-RevId: 564861482 --- components/google-cloud/cloudbuild.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/components/google-cloud/cloudbuild.yaml b/components/google-cloud/cloudbuild.yaml index 714c86e1ed..0b9ec701bd 100644 --- a/components/google-cloud/cloudbuild.yaml +++ b/components/google-cloud/cloudbuild.yaml @@ -2,4 +2,5 @@ steps: - name: 'gcr.io/kaniko-project/executor:latest' args: - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:$_IMAGE_SUFFIX + - --destination=gcr.io/$PROJECT_ID/google-cloud-pipeline-components:public-image-gcpc-v2-scan - --cache=false From 2a4460cb633fab22173341bf37b30f4b340e8040 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 13 Sep 2023 10:56:46 -0700 Subject: [PATCH 144/253] chore(components): internal PiperOrigin-RevId: 565100600 --- components/google-cloud/RELEASE.md | 2 ++ .../v1/custom_job/component.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index a01fb8c71f..2bc2b3c2a6 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,7 @@ ## Upcoming release +* Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` + ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 * Require KFP SDK <=2.1.3 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py index 62bafbe6b5..25c0b259f3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py @@ -120,9 +120,7 @@ def custom_training_job( }, }, 'labels': labels, - 'encryption_spec_key_name': { - 'kms_key_name': encryption_spec_key_name - }, + 'encryption_spec': {'kms_key_name': encryption_spec_key_name}, }, gcp_resources=gcp_resources, ) From a39980a337f1b9e872ec5e529bcefada69923553 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Wed, 13 Sep 2023 14:04:29 -0700 Subject: [PATCH 145/253] chore(frontend): Improve structure of NewExperimentFC. (#9951) * chore(frontend): Improve structure of NewExperimentFC. * fix condition * extend timeout for one test --- frontend/src/pages/NewRun.test.tsx | 2 +- .../functional_components/NewExperimentFC.tsx | 57 +++++++++---------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/frontend/src/pages/NewRun.test.tsx b/frontend/src/pages/NewRun.test.tsx index 60baef079d..db00f398f7 100644 --- a/frontend/src/pages/NewRun.test.tsx +++ b/frontend/src/pages/NewRun.test.tsx @@ -1699,7 +1699,7 @@ describe('NewRun', () => { await screen.findByDisplayValue('prefilled value 1'); await screen.findByLabelText('param-2'); await screen.findByDisplayValue('prefilled value 2'); - }); + }, 10000); it('trims whitespace from the pipeline params', async () => { tree = shallow(); diff --git a/frontend/src/pages/functional_components/NewExperimentFC.tsx b/frontend/src/pages/functional_components/NewExperimentFC.tsx index 4cc53e8caf..4304adfc4c 100644 --- a/frontend/src/pages/functional_components/NewExperimentFC.tsx +++ b/frontend/src/pages/functional_components/NewExperimentFC.tsx @@ -14,21 +14,21 @@ * limitations under the License. */ +import Button from '@material-ui/core/Button'; import React, { useEffect, useState } from 'react'; +import { useMutation, useQuery } from 'react-query'; +import { commonCss, fontsize, padding } from 'src/Css'; +import { V2beta1Experiment } from 'src/apisv2beta1/experiment'; +import { V2beta1PipelineVersion } from 'src/apisv2beta1/pipeline'; import BusyButton from 'src/atoms/BusyButton'; -import Button from '@material-ui/core/Button'; import Input from 'src/atoms/Input'; -import { V2beta1Experiment } from 'src/apisv2beta1/experiment'; +import { QUERY_PARAMS, RoutePage } from 'src/components/Router'; import { Apis } from 'src/lib/Apis'; -import { PageProps } from 'src/pages/Page'; -import { RoutePage, QUERY_PARAMS } from 'src/components/Router'; import { URLParser } from 'src/lib/URLParser'; -import { classes, stylesheet } from 'typestyle'; -import { commonCss, padding, fontsize } from 'src/Css'; import { errorToMessage } from 'src/lib/Utils'; import { getLatestVersion } from 'src/pages/NewRunV2'; -import { useMutation } from 'react-query'; -import { V2beta1PipelineVersion } from 'src/apisv2beta1/pipeline'; +import { PageProps } from 'src/pages/Page'; +import { classes, stylesheet } from 'typestyle'; const css = stylesheet({ errorMessage: { @@ -52,29 +52,26 @@ export function NewExperimentFC(props: NewExperimentFCProps) { const [description, setDescription] = useState(''); const [experimentName, setExperimentName] = useState(''); const [isbeingCreated, setIsBeingCreated] = useState(false); - const [errorMessage, setErrorMessage] = useState(''); - const [latestVersion, setLatestVersion] = useState(); const [experimentResponse, setExperimentResponse] = useState(); const [errMsgFromApi, setErrMsgFromApi] = useState(); const pipelineId = urlParser.get(QUERY_PARAMS.pipelineId); + const { data: latestVersion } = useQuery( + ['pipeline_versions', pipelineId], + () => getLatestVersion(pipelineId!), + { enabled: !!pipelineId }, + ); + useEffect(() => { updateToolbar({ actions: {}, breadcrumbs: [{ displayName: 'Experiments', href: RoutePage.EXPERIMENTS }], pageTitle: 'New experiment', }); + // Initialize toolbar only once during the first render. // eslint-disable-next-line react-hooks/exhaustive-deps }, []); - useEffect(() => { - if (pipelineId) { - (async () => { - setLatestVersion(await getLatestVersion(pipelineId)); - })(); - } - }, [pipelineId]); - // Handle the redirection work when createExperiment is succeed useEffect(() => { if (experimentResponse) { @@ -97,17 +94,11 @@ export function NewExperimentFC(props: NewExperimentFCProps) { open: true, }); } + // Only trigger this effect when search string parameters change. + // Do not rerun this effect if updateSnackbar callback has changes to avoid re-rendering. + // Do not rerun this effect if pipelineId has changes to avoid re-rendering. // eslint-disable-next-line react-hooks/exhaustive-deps - }, [experimentResponse]); - - // Handle the error when createExperiment() is failed - useEffect(() => { - if (!experimentName) { - setErrorMessage('Experiment name is required'); - } else { - setErrorMessage(''); - } - }, [experimentName]); + }, [experimentResponse, latestVersion]); useEffect(() => { if (errMsgFromApi) { @@ -118,8 +109,9 @@ export function NewExperimentFC(props: NewExperimentFCProps) { title: 'Experiment creation failed', }); } + // Do not rerun this effect if updateDialog callback has changes to avoid re-rendering. // eslint-disable-next-line react-hooks/exhaustive-deps - }, [errMsgFromApi]); + }, [errMsgFromApi, updateDialog]); const newExperimentMutation = useMutation((experiment: V2beta1Experiment) => { return Apis.experimentServiceApiV2.createExperiment(experiment); @@ -136,6 +128,7 @@ export function NewExperimentFC(props: NewExperimentFCProps) { newExperimentMutation.mutate(newExperiment, { onSuccess: response => { setExperimentResponse(response); + setErrMsgFromApi(undefined); }, onError: async err => { setErrMsgFromApi(await errorToMessage(err)); @@ -174,7 +167,7 @@ export function NewExperimentFC(props: NewExperimentFCProps) {
Cancel -
{errorMessage}
+
+ {experimentName ? '' : 'Experiment name is required'} +
From 510f23a05252032dd7f69f6f09eeed8453d31007 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 13 Sep 2023 15:24:02 -0700 Subject: [PATCH 146/253] chore(components): remove error __init__.py file PiperOrigin-RevId: 565177719 --- .../preview/custom_job/__init__.py | 29 ------------------- 1 file changed, 29 deletions(-) delete mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py deleted file mode 100644 index ac8c9aeb78..0000000000 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2023 The Kubeflow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# fmt: off -"""Run KFP components as [Vertex AI Custom Training Jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) with -customized worker and cloud configurations. -""" -# fmt: on - -from google_cloud_pipeline_components.preview.custom_job.component import custom_training_job as CustomTrainingJobOp -from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_from_component -from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_op_from_component - -__all__ = [ - 'CustomTrainingJobOp', - 'create_custom_training_job_op_from_component', - 'create_custom_training_job_from_component', -] From cc2cd5891822ff841d4447dfd097764d26a2dda5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 13 Sep 2023 15:31:02 -0700 Subject: [PATCH 147/253] fix(sdk): fix --no-deps flag usage (#9982) * update --no-deps flag usage * update runtime test code --- sdk/python/kfp/dsl/component_factory.py | 56 +++++++++++++------ sdk/python/kfp/dsl/component_factory_test.py | 11 ++-- .../test_data/components/add_numbers.yaml | 4 +- .../component_with_metadata_fields.yaml | 4 +- .../component_with_pip_install.yaml | 11 ++-- .../component_with_task_final_status.yaml | 4 +- .../test_data/components/concat_message.yaml | 4 +- .../test_data/components/container_io.yaml | 2 +- .../components/container_no_input.yaml | 2 +- .../container_with_artifact_output.yaml | 2 +- .../container_with_concat_placeholder.yaml | 2 +- .../container_with_if_placeholder.yaml | 2 +- ...container_with_placeholder_in_fstring.yaml | 2 +- .../containerized_python_component.yaml | 2 +- .../test_data/components/dict_input.yaml | 4 +- sdk/python/test_data/components/identity.yaml | 4 +- .../test_data/components/input_artifact.yaml | 4 +- .../test_data/components/nested_return.yaml | 4 +- .../test_data/components/output_metrics.yaml | 4 +- .../test_data/components/preprocess.yaml | 4 +- .../component_with_optional_inputs.yaml | 4 +- .../component_with_pip_index_urls.yaml | 11 ++-- .../components_with_optional_artifacts.yaml | 6 +- .../test_data/pipelines/if_elif_else.yaml | 8 +-- .../pipelines/if_elif_else_complex.yaml | 22 ++++---- sdk/python/test_data/pipelines/if_else.yaml | 8 +-- ...lightweight_python_functions_pipeline.yaml | 6 +- ...tweight_python_functions_with_outputs.yaml | 10 ++-- .../parallelfor_fan_in/artifacts_complex.yaml | 12 ++-- .../parallelfor_fan_in/artifacts_simple.yaml | 6 +- .../conditional_producer_and_consumers.yaml | 6 +- .../nested_with_parameters.yaml | 10 ++-- .../parameters_complex.yaml | 16 +++--- .../parallelfor_fan_in/parameters_simple.yaml | 6 +- .../pipeline_producer_consumer.yaml | 10 ++-- .../pipelines/pipeline_as_exit_task.yaml | 10 ++-- .../pipelines/pipeline_in_pipeline.yaml | 6 +- .../pipeline_in_pipeline_complex.yaml | 6 +- ...pipeline_in_pipeline_loaded_from_yaml.yaml | 8 +-- .../pipelines/pipeline_with_condition.yaml | 12 ++-- ...peline_with_dynamic_importer_metadata.yaml | 4 +- .../pipelines/pipeline_with_env.yaml | 4 +- .../pipelines/pipeline_with_exit_handler.yaml | 8 +-- .../pipeline_with_google_artifact_type.yaml | 16 +++--- .../pipelines/pipeline_with_importer.yaml | 6 +- .../pipelines/pipeline_with_loops.yaml | 18 +++--- .../pipeline_with_loops_and_conditions.yaml | 28 +++++----- .../pipeline_with_metadata_fields.yaml | 6 +- .../pipeline_with_metrics_outputs.yaml | 6 +- .../pipeline_with_multiple_exit_handlers.yaml | 16 +++--- .../pipeline_with_nested_conditions.yaml | 18 +++--- .../pipelines/pipeline_with_nested_loops.yaml | 8 +-- .../pipelines/pipeline_with_outputs.yaml | 6 +- ...pipeline_with_parallelfor_parallelism.yaml | 14 ++--- ...ipeline_with_params_containing_format.yaml | 8 +-- .../pipelines/pipeline_with_placeholders.yaml | 12 ++-- .../pipelines/pipeline_with_retry.yaml | 4 +- .../pipeline_with_task_final_status.yaml | 8 +-- ...th_task_using_ignore_upstream_failure.yaml | 6 +- .../pipeline_with_task_final_status.yaml | 8 +-- 60 files changed, 271 insertions(+), 248 deletions(-) diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index cb43340b1c..b3547d980e 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -99,13 +99,21 @@ def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str: return ' '.join(options) + ' ' +def make_pip_install_command( + install_parts: List[str], + index_url_options: str, +) -> str: + concat_package_list = ' '.join( + [repr(str(package)) for package in install_parts]) + return f'python3 -m pip install --quiet --no-warn-script-location {index_url_options}{concat_package_list}' + + _install_python_packages_script_template = ''' if ! [ -x "$(command -v pip)" ]; then python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip fi -PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \ - --no-warn-script-location {index_url_options}{concat_package_list} && "$0" "$@" +PIP_DISABLE_PIP_VERSION_CHECK=1 {pip_install_commands} && "$0" "$@" ''' @@ -122,30 +130,42 @@ def _get_packages_to_install_command( # container component, and they haven't already specified a KFP dep # themselves, we install KFP for them inject_kfp_install = install_kfp_package and target_image is None and not kfp_in_user_pkgs + if not inject_kfp_install and not packages_to_install: + return [] + pip_install_strings = [] + index_url_options = make_index_url_options(pip_index_urls) + if inject_kfp_install: if kfp_package_path: - packages_to_install.append(kfp_package_path) + kfp_pip_install_command = make_pip_install_command( + install_parts=[kfp_package_path], + index_url_options=index_url_options, + ) else: - packages_to_install.extend(_get_injected_kfp_imports()) + kfp_pip_install_command = make_pip_install_command( + install_parts=[ + f'kfp=={kfp.__version__}', + '--no-deps', + 'typing-extensions>=3.7.4,<5; python_version<"3.9"', + ], + index_url_options=index_url_options, + ) + pip_install_strings.append(kfp_pip_install_command) - if packages_to_install: - concat_package_list = ' '.join( - [repr(str(package)) for package in packages_to_install]) - index_url_options = make_index_url_options(pip_index_urls) + if packages_to_install: + pip_install_strings.append(' && ') - install_python_packages_script = _install_python_packages_script_template.format( + if packages_to_install: + user_packages_pip_install_command = make_pip_install_command( + install_parts=packages_to_install, index_url_options=index_url_options, - concat_package_list=concat_package_list) - return ['sh', '-c', install_python_packages_script] - - return [] - + ) + pip_install_strings.append(user_packages_pip_install_command) -def _get_injected_kfp_imports() -> List[str]: return [ - f'kfp=={kfp.__version__}', - '--no-deps', - 'typing-extensions>=3.7.4,<5; python_version<"3.9"', + 'sh', '-c', + _install_python_packages_script_template.format( + pip_install_commands=' '.join(pip_install_strings)) ] diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py index aa71c11b4f..1b3f388e7f 100644 --- a/sdk/python/kfp/dsl/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -41,12 +41,11 @@ def test_with_no_user_packages_to_install(self): command = component_factory._get_packages_to_install_command( packages_to_install=packages_to_install) - self.assertEqual( strip_kfp_version(command), strip_kfp_version([ 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' ])) def test_with_no_user_packages_to_install_and_install_kfp_false(self): @@ -70,7 +69,7 @@ def test_with_no_user_packages_to_install_and_kfp_package_path(self): strip_kfp_version(command), strip_kfp_version([ 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n' + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'git+https://github.com/kubeflow/pipelines.git@master#subdirectory=sdk/python\' && "$0" "$@"\n' ])) def test_with_no_user_packages_to_install_and_kfp_package_path_and_install_kfp_false( @@ -98,7 +97,7 @@ def test_with_user_packages_to_install_and_kfp_package_path_and_install_kfp_fals strip_kfp_version(command), strip_kfp_version([ 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n' + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'sklearn\' && "$0" "$@"\n' ])) def test_with_no_user_packages_to_install_and_kfp_package_path_and_target_image( @@ -134,7 +133,7 @@ def test_with_user_packages_to_install_and_no_pip_index_url(self): strip_kfp_version(command), strip_kfp_version([ 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.1.3\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && python3 -m pip install --quiet --no-warn-script-location \'package1\' \'package2\' && "$0" "$@"\n' ])) def test_with_packages_to_install_with_pip_index_url(self): @@ -150,7 +149,7 @@ def test_with_packages_to_install_with_pip_index_url(self): strip_kfp_version(command), strip_kfp_version([ 'sh', '-c', - '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' \'kfp==2.1.2\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && "$0" "$@"\n' + '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'kfp==2.1.3\' \'--no-deps\' \'typing-extensions>=3.7.4,<5; python_version<"3.9"\' && python3 -m pip install --quiet --no-warn-script-location --index-url https://myurl.org/simple --trusted-host https://myurl.org/simple \'package1\' \'package2\' && "$0" "$@"\n' ])) diff --git a/sdk/python/test_data/components/add_numbers.yaml b/sdk/python/test_data/components/add_numbers.yaml index 9831bb3943..bee5dff8f1 100644 --- a/sdk/python/test_data/components/add_numbers.yaml +++ b/sdk/python/test_data/components/add_numbers.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -83,4 +83,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/component_with_metadata_fields.yaml b/sdk/python/test_data/components/component_with_metadata_fields.yaml index d83c24412d..0429b3fe7f 100644 --- a/sdk/python/test_data/components/component_with_metadata_fields.yaml +++ b/sdk/python/test_data/components/component_with_metadata_fields.yaml @@ -48,7 +48,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -126,4 +126,4 @@ root: description: The concatenated string. parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/component_with_pip_install.yaml b/sdk/python/test_data/components/component_with_pip_install.yaml index 5a867befd1..d1ae009cfa 100644 --- a/sdk/python/test_data/components/component_with_pip_install.yaml +++ b/sdk/python/test_data/components/component_with_pip_install.yaml @@ -17,10 +17,11 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location --index-url\ - \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ - 3.9\"' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location --index-url https://pypi.org/simple\ + \ --trusted-host https://pypi.org/simple 'kfp==2.1.3' '--no-deps' 'typing-extensions>=3.7.4,<5;\ + \ python_version<\"3.9\"' && python3 -m pip install --quiet --no-warn-script-location\ + \ --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple\ + \ 'yapf' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -48,4 +49,4 @@ root: taskInfo: name: component-with-pip-install schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/component_with_task_final_status.yaml b/sdk/python/test_data/components/component_with_task_final_status.yaml index 2f8f36a303..b88209fca6 100644 --- a/sdk/python/test_data/components/component_with_task_final_status.yaml +++ b/sdk/python/test_data/components/component_with_task_final_status.yaml @@ -24,7 +24,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -63,4 +63,4 @@ root: isOptional: true parameterType: TASK_FINAL_STATUS schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/concat_message.yaml b/sdk/python/test_data/components/concat_message.yaml index 978f67b5d5..2f518f3ce7 100644 --- a/sdk/python/test_data/components/concat_message.yaml +++ b/sdk/python/test_data/components/concat_message.yaml @@ -32,7 +32,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -84,4 +84,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_io.yaml b/sdk/python/test_data/components/container_io.yaml index 19812e3046..e575a3fa30 100644 --- a/sdk/python/test_data/components/container_io.yaml +++ b/sdk/python/test_data/components/container_io.yaml @@ -57,4 +57,4 @@ root: output_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_no_input.yaml b/sdk/python/test_data/components/container_no_input.yaml index a9f3bcad81..cc373a5599 100644 --- a/sdk/python/test_data/components/container_no_input.yaml +++ b/sdk/python/test_data/components/container_no_input.yaml @@ -24,4 +24,4 @@ root: taskInfo: name: container-no-input schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_with_artifact_output.yaml b/sdk/python/test_data/components/container_with_artifact_output.yaml index ba35de0d70..baf5d5e257 100644 --- a/sdk/python/test_data/components/container_with_artifact_output.yaml +++ b/sdk/python/test_data/components/container_with_artifact_output.yaml @@ -79,4 +79,4 @@ root: model_config_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_with_concat_placeholder.yaml b/sdk/python/test_data/components/container_with_concat_placeholder.yaml index f234d6477b..95d486495a 100644 --- a/sdk/python/test_data/components/container_with_concat_placeholder.yaml +++ b/sdk/python/test_data/components/container_with_concat_placeholder.yaml @@ -73,4 +73,4 @@ root: output_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_with_if_placeholder.yaml b/sdk/python/test_data/components/container_with_if_placeholder.yaml index 165fafcde0..e81b17aeda 100644 --- a/sdk/python/test_data/components/container_with_if_placeholder.yaml +++ b/sdk/python/test_data/components/container_with_if_placeholder.yaml @@ -81,4 +81,4 @@ root: output_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/container_with_placeholder_in_fstring.yaml b/sdk/python/test_data/components/container_with_placeholder_in_fstring.yaml index 2dde86e657..cdf6fc7b1f 100644 --- a/sdk/python/test_data/components/container_with_placeholder_in_fstring.yaml +++ b/sdk/python/test_data/components/container_with_placeholder_in_fstring.yaml @@ -63,4 +63,4 @@ root: schemaTitle: system.Artifact schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.16 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/containerized_python_component.yaml b/sdk/python/test_data/components/containerized_python_component.yaml index 17c146a193..2e3423e2d7 100644 --- a/sdk/python/test_data/components/containerized_python_component.yaml +++ b/sdk/python/test_data/components/containerized_python_component.yaml @@ -67,4 +67,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/dict_input.yaml b/sdk/python/test_data/components/dict_input.yaml index a3acf422be..d0b73f3b50 100644 --- a/sdk/python/test_data/components/dict_input.yaml +++ b/sdk/python/test_data/components/dict_input.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -60,4 +60,4 @@ root: struct: parameterType: STRUCT schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/identity.yaml b/sdk/python/test_data/components/identity.yaml index afb45e1bf4..97fc9c3835 100644 --- a/sdk/python/test_data/components/identity.yaml +++ b/sdk/python/test_data/components/identity.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -76,4 +76,4 @@ root: Output: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/input_artifact.yaml b/sdk/python/test_data/components/input_artifact.yaml index 935ccf999f..3aef4ef579 100644 --- a/sdk/python/test_data/components/input_artifact.yaml +++ b/sdk/python/test_data/components/input_artifact.yaml @@ -25,7 +25,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -65,4 +65,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/nested_return.yaml b/sdk/python/test_data/components/nested_return.yaml index db89274404..337e20c6ba 100644 --- a/sdk/python/test_data/components/nested_return.yaml +++ b/sdk/python/test_data/components/nested_return.yaml @@ -23,7 +23,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -63,4 +63,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/output_metrics.yaml b/sdk/python/test_data/components/output_metrics.yaml index 59ff838903..abc3e39fdb 100644 --- a/sdk/python/test_data/components/output_metrics.yaml +++ b/sdk/python/test_data/components/output_metrics.yaml @@ -27,7 +27,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -79,4 +79,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/components/preprocess.yaml b/sdk/python/test_data/components/preprocess.yaml index 8b117f75d2..003cfec408 100644 --- a/sdk/python/test_data/components/preprocess.yaml +++ b/sdk/python/test_data/components/preprocess.yaml @@ -56,7 +56,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -173,4 +173,4 @@ root: output_parameter_path: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml index c17a2dda7b..bbf7062cf1 100644 --- a/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml +++ b/sdk/python/test_data/pipelines/component_with_optional_inputs.yaml @@ -29,7 +29,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -70,4 +70,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml index 069b56c836..9b0afa75ac 100644 --- a/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml +++ b/sdk/python/test_data/pipelines/component_with_pip_index_urls.yaml @@ -17,10 +17,11 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location --index-url\ - \ https://pypi.org/simple --trusted-host https://pypi.org/simple 'yapf'\ - \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ - 3.9\"' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location --index-url https://pypi.org/simple\ + \ --trusted-host https://pypi.org/simple 'kfp==2.1.3' '--no-deps' 'typing-extensions>=3.7.4,<5;\ + \ python_version<\"3.9\"' && python3 -m pip install --quiet --no-warn-script-location\ + \ --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple\ + \ 'yapf' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -47,4 +48,4 @@ root: taskInfo: name: component-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml index be6e3b8456..3b034d8542 100644 --- a/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml +++ b/sdk/python/test_data/pipelines/components_with_optional_artifacts.yaml @@ -126,7 +126,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -157,7 +157,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -241,4 +241,4 @@ root: schemaVersion: 0.0.1 isOptional: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/if_elif_else.yaml b/sdk/python/test_data/pipelines/if_elif_else.yaml index 3a353f79a9..a222a43d73 100644 --- a/sdk/python/test_data/pipelines/if_elif_else.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else.yaml @@ -108,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -139,7 +139,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -168,7 +168,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -197,7 +197,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml index 7fe0116c46..ca7f09b1a1 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml @@ -546,7 +546,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -575,7 +575,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -604,7 +604,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -633,7 +633,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -662,7 +662,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -691,7 +691,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -720,7 +720,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -749,7 +749,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -778,7 +778,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -807,7 +807,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -836,7 +836,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh diff --git a/sdk/python/test_data/pipelines/if_else.yaml b/sdk/python/test_data/pipelines/if_else.yaml index a64ff7b87d..02232f779f 100644 --- a/sdk/python/test_data/pipelines/if_else.yaml +++ b/sdk/python/test_data/pipelines/if_else.yaml @@ -79,7 +79,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -108,7 +108,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -137,7 +137,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -199,4 +199,4 @@ root: taskInfo: name: flip-coin schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml index 86942c1035..8b28ec4cd0 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_pipeline.yaml @@ -78,7 +78,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -242,4 +242,4 @@ root: message: parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml index 34a2d445eb..ac8c2d12bb 100644 --- a/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/lightweight_python_functions_with_outputs.yaml @@ -81,7 +81,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -110,7 +110,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -139,7 +139,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -168,7 +168,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -281,4 +281,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml index efaf520b65..e85d521966 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_complex.yaml @@ -285,7 +285,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -317,7 +317,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -349,7 +349,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -381,7 +381,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -411,7 +411,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -494,4 +494,4 @@ root: schemaTitle: system.Dataset schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml index ebfe1626dc..5e38c9cb30 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/artifacts_simple.yaml @@ -90,7 +90,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -138,7 +138,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -213,4 +213,4 @@ root: schemaVersion: 0.0.1 isArtifactList: true schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index 920854731b..bf110a3192 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -160,7 +160,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -229,4 +229,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index 9d605894d6..e8a4ff9021 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -150,7 +150,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,7 +207,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -235,7 +235,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -291,4 +291,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index 1c3ac78cff..efade68e71 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -253,7 +253,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -281,7 +281,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -338,7 +338,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -367,7 +367,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -395,7 +395,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -491,4 +491,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index 1775baf68f..e537b147b8 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -75,7 +75,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -113,7 +113,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -184,4 +184,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index 84703103ae..dc9c8b9ada 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -235,7 +235,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -263,7 +263,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -292,7 +292,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -364,4 +364,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml index acee25db35..0280af0066 100644 --- a/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml +++ b/sdk/python/test_data/pipelines/pipeline_as_exit_task.yaml @@ -129,7 +129,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -158,7 +158,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -187,7 +187,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -216,7 +216,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -270,4 +270,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml index b5ccf82dc6..4c587ca06f 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -103,7 +103,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -156,4 +156,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml index 89b94ee481..4f40dc08a5 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_complex.yaml @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -190,7 +190,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -245,4 +245,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml index 299f167fca..9d84ea8c08 100644 --- a/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml +++ b/sdk/python/test_data/pipelines/pipeline_in_pipeline_loaded_from_yaml.yaml @@ -152,7 +152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -181,7 +181,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -210,7 +210,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -270,4 +270,4 @@ root: taskInfo: name: print-op1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml index fb3b2a18bf..10bc356118 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_condition.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_condition.yaml @@ -88,7 +88,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -118,7 +118,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -148,7 +148,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -177,7 +177,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -274,4 +274,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml index 881e90e849..ecc8bdf199 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_dynamic_importer_metadata.yaml @@ -94,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -183,4 +183,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_env.yaml b/sdk/python/test_data/pipelines/pipeline_with_env.yaml index 190dcddb41..25d2f7cea8 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_env.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_env.yaml @@ -41,7 +41,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -81,4 +81,4 @@ root: taskInfo: name: print-env-op schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml index 77b304058a..fdb340b1e7 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_exit_handler.yaml @@ -65,7 +65,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -94,7 +94,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -123,7 +123,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -177,4 +177,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml index ca47a62006..63acac5901 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.yaml @@ -56,9 +56,10 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ - 3.9\"' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -91,9 +92,10 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ - \ 'kfp==2.1.2' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"\ - 3.9\"' && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\ + \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) @@ -154,4 +156,4 @@ root: taskInfo: name: model-producer schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml index 7cbd1febcc..ceb685de40 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_importer.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_importer.yaml @@ -127,7 +127,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -161,7 +161,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -239,4 +239,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml index 4ece667f08..61f902553a 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops.yaml @@ -171,7 +171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -200,7 +200,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -228,7 +228,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -256,7 +256,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -284,7 +284,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -312,7 +312,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -340,7 +340,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -368,7 +368,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -440,4 +440,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml index 2ee2812445..ae099dcd54 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_loops_and_conditions.yaml @@ -602,7 +602,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -633,7 +633,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -664,7 +664,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -694,7 +694,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -722,7 +722,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -751,7 +751,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -780,7 +780,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -809,7 +809,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -838,7 +838,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -867,7 +867,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -896,7 +896,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -925,7 +925,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -954,7 +954,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -1048,4 +1048,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml index 66c29bd1f8..8b2b1a8f87 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metadata_fields.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -97,7 +97,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -176,4 +176,4 @@ root: schemaVersion: 0.0.1 description: The final concatenated dataset. schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml index c77082feb7..665041c65c 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_metrics_outputs.yaml @@ -60,7 +60,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -91,7 +91,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -152,4 +152,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml index f8f7a3a20b..2207f0f022 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_multiple_exit_handlers.yaml @@ -125,7 +125,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -154,7 +154,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -183,7 +183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -212,7 +212,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -241,7 +241,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -270,7 +270,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -299,7 +299,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -403,4 +403,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml index 0acc74c83b..817c7c76c2 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_conditions.yaml @@ -147,7 +147,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -177,7 +177,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,7 +207,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -237,7 +237,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -267,7 +267,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -296,7 +296,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -325,7 +325,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -354,7 +354,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -442,4 +442,4 @@ root: taskInfo: name: print-op-2 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml index 32f83fc03b..279c732088 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_nested_loops.yaml @@ -145,7 +145,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -174,7 +174,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -203,7 +203,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -262,4 +262,4 @@ root: isOptional: true parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml index 478e3b776b..1815071bb5 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_outputs.yaml @@ -104,7 +104,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -133,7 +133,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,4 +207,4 @@ root: schemaTitle: system.Artifact schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml index 940b9e3673..eaac51c057 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_parallelfor_parallelism.yaml @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,7 +207,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -235,7 +235,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -263,7 +263,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -291,7 +291,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -319,7 +319,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -369,4 +369,4 @@ root: loop_parameter: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml index e00a15a3f0..60b1e7536a 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_params_containing_format.yaml @@ -74,7 +74,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -103,7 +103,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,4 +207,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml index df2aa2cfa3..21ef1aafec 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_placeholders.yaml @@ -55,7 +55,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -83,7 +83,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -111,7 +111,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -139,7 +139,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -167,7 +167,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -264,4 +264,4 @@ root: taskInfo: name: print-op-5 schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml index 137162068c..8e81c93350 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_retry.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_retry.yaml @@ -30,7 +30,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -80,4 +80,4 @@ root: isOptional: true parameterType: NUMBER_DOUBLE schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml index b95c0cebf4..18ae5fa44d 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -130,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -186,4 +186,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml index da4c224ed7..0ab5a9632e 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml +++ b/sdk/python/test_data/pipelines/pipeline_with_task_using_ignore_upstream_failure.yaml @@ -35,7 +35,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -64,7 +64,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -121,4 +121,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 diff --git a/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml index b95c0cebf4..18ae5fa44d 100644 --- a/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml +++ b/sdk/runtime_tests/test_data/pipeline_with_task_final_status.yaml @@ -68,7 +68,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -101,7 +101,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -130,7 +130,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -186,4 +186,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.2 +sdkVersion: kfp-2.1.3 From cfe671c485d4ee8514290ee81ca2785e8bda5c9b Mon Sep 17 00:00:00 2001 From: Yifan Xiao Date: Wed, 13 Sep 2023 15:43:47 -0700 Subject: [PATCH 148/253] chore(components): Update the pipelines for the Tabular Workflow feat(components): Expose feature selection pipeline PiperOrigin-RevId: 565182978 --- components/google-cloud/RELEASE.md | 1 + .../forecasting/forecasting_ensemble.py | 96 +- .../forecasting/forecasting_stage_1_tuner.py | 4 +- .../forecasting/forecasting_stage_2_tuner.py | 4 +- .../learn_to_learn_forecasting_pipeline.yaml | 271 +- ...ence_to_sequence_forecasting_pipeline.yaml | 271 +- ...sion_transformer_forecasting_pipeline.yaml | 271 +- ...es_dense_encoder_forecasting_pipeline.yaml | 271 +- .../preview/automl/tabular/__init__.py | 4 + .../tabular/auto_feature_engineering.py | 91 + ...ml_tabular_feature_selection_pipeline.yaml | 252 +- .../tabular/automl_tabular_v2_pipeline.yaml | 7153 +++++++++++++---- ...illation_stage_feature_transform_engine.py | 246 + .../automl/tabular/feature_selection.py | 4 +- .../tabular/feature_selection_pipeline.yaml | 1638 ++++ .../tabular/feature_transform_engine.py | 82 +- .../tabnet_hyperparameter_tuning_job.py | 4 +- ...et_hyperparameter_tuning_job_pipeline.yaml | 149 +- .../preview/automl/tabular/tabnet_trainer.py | 4 +- .../tabular/tabnet_trainer_pipeline.yaml | 155 +- .../preview/automl/tabular/utils.py | 157 +- ...wide_and_deep_hyperparameter_tuning_job.py | 4 +- ...ep_hyperparameter_tuning_job_pipeline.yaml | 147 +- .../automl/tabular/wide_and_deep_trainer.py | 4 +- .../wide_and_deep_trainer_pipeline.yaml | 155 +- ...st_hyperparameter_tuning_job_pipeline.yaml | 145 +- .../tabular/xgboost_trainer_pipeline.yaml | 143 +- .../bqml_arima_predict_pipeline.yaml | 20 +- .../bqml_arima_train_pipeline.yaml | 127 +- .../forecasting/prophet_predict_pipeline.yaml | 66 +- .../v1/automl/forecasting/prophet_trainer.py | 6 +- .../forecasting/prophet_trainer_pipeline.yaml | 95 +- .../tabular/automl_tabular_pipeline.yaml | 275 +- .../v1/automl/tabular/cv_trainer.py | 4 +- .../v1/automl/tabular/ensemble.py | 4 +- .../v1/automl/tabular/finalizer.py | 2 +- .../v1/automl/tabular/infra_validator.py | 2 +- .../automl/tabular/split_materialized_data.py | 2 +- .../v1/automl/tabular/stage_1_tuner.py | 4 +- .../automl/tabular/stats_and_example_gen.py | 4 +- .../training_configurator_and_validator.py | 2 +- .../v1/automl/tabular/transform.py | 4 +- .../v1/automl/tabular/utils.py | 3 + 43 files changed, 9806 insertions(+), 2540 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 2bc2b3c2a6..91c5d1e8ba 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,6 +1,7 @@ ## Upcoming release * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` +* Add feature_selection_pipeline to preview.automl.tabular. ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py index 662ec172a7..4cf088feaf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -16,6 +16,7 @@ from typing import Optional +from google_cloud_pipeline_components import utils from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel from kfp import dsl from kfp.dsl import Artifact @@ -37,6 +38,7 @@ def automl_forecasting_ensemble( prediction_image_uri: str, gcp_resources: dsl.OutputPath(str), model_architecture: Output[Artifact], + example_instance: Output[Artifact], unmanaged_container_model: Output[UnmanagedContainerModel], explanation_metadata: dsl.OutputPath(dict), explanation_metadata_artifact: Output[Artifact], @@ -71,8 +73,57 @@ def automl_forecasting_ensemble( explanation_metadata: The explanation metadata used by Vertex online and batch explanations. explanation_metadata_artifact: The explanation metadata used by Vertex online and batch explanations in the format of a KFP Artifact. explanation_parameters: The explanation parameters used by Vertex online and batch explanations. + example_instance: An example instance which may be used as an input for predictions. """ # fmt: on + job_id = dsl.PIPELINE_JOB_ID_PLACEHOLDER + task_id = dsl.PIPELINE_TASK_ID_PLACEHOLDER + image_uri = 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325' + display_name = f'automl-forecasting-ensemble-{job_id}-{task_id}' + + error_file_path = f'{root_dir}/{job_id}/{task_id}/error.pb' + model_relative_path = f'{job_id}/{task_id}/model' + explanation_metadata_paths = ( + f'{explanation_metadata},{explanation_metadata_artifact.uri}' + ) + + job_args = [ + 'forecasting_mp_ensemble', + f'--transform_output_path={transform_output.uri}', + f'--error_file_path={error_file_path}', + f'--metadata_path={metadata.uri}', + f'--tuning_result_input_path={tuning_result_input.uri}', + f'--instance_baseline_path={instance_baseline.uri}', + f'--instance_schema_path={instance_schema_path.uri}', + f'--prediction_docker_uri={prediction_image_uri}', + f'--model_relative_output_path={model_relative_path}', + f'--explanation_metadata_path={explanation_metadata_paths}', + f'--explanation_parameters_path={explanation_parameters}', + f'--model_architecture_path={model_architecture.uri}', + f'--example_instance_path={example_instance.uri}', + '--use_json=true', + '--executor_input={{$.json_escape[1]}}', + ] + + payload = { + 'display_name': display_name, + 'encryption_spec': { + 'kms_key_name': encryption_spec_key_name, + }, + 'job_spec': { + 'worker_pool_specs': [{ + 'replica_count': 1, + 'machine_spec': { + 'machine_type': 'n1-highmem-8', + }, + 'container_spec': { + 'image_uri': f'{image_uri}', + 'args': job_args, + }, + }] + }, + } + return dsl.ContainerSpec( image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', command=[ @@ -91,49 +142,6 @@ def automl_forecasting_ensemble( '--gcp_resources', gcp_resources, '--payload', - dsl.ConcatPlaceholder( - items=[ - ( - '{"display_name":' - f' "automl-tabular-ensemble-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' - ' "encryption_spec": {"kms_key_name":"' - ), - encryption_spec_key_name, - ( - '"}, "job_spec": {"worker_pool_specs": [{"replica_count":' - ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' - ' "container_spec": {"image_uri":"' - ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', - '", "args": ["forecasting_mp_ensemble', - '", "--transform_output_path=', - transform_output.uri, - '", "--error_file_path=', - root_dir, - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb', - '", "--metadata_path=', - metadata.uri, - '", "--tuning_result_input_path=', - tuning_result_input.uri, - '", "--instance_baseline_path=', - instance_baseline.uri, - '", "--instance_schema_path=', - instance_schema_path.uri, - '", "--prediction_docker_uri=', - prediction_image_uri, - '", "--model_relative_output_path=', - f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/model', - '", "--explanation_metadata_path=', - explanation_metadata, - ',', - explanation_metadata_artifact.uri, - '", "--explanation_parameters_path=', - explanation_parameters, - '", "--model_architecture_path=', - model_architecture.uri, - '", "--use_json=true', - '", "--executor_input={{$.json_escape[1]}}"]}}]}}', - ] - ), + utils.container_component_dumps(payload), ], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py index d1acbae54c..cd39d4d6e6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -110,14 +110,14 @@ def automl_forecasting_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', '", "args": ["forecasting_mp_l2l_stage_1_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', '", "--reduce_search_space_mode=', reduce_search_space_mode, f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py index 9c8aab1566..ff96d9215f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -107,14 +107,14 @@ def automl_forecasting_stage_2_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', '", "args": ["forecasting_mp_l2l_stage_2_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', '", "--training_base_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml index 03944e1674..a18391bf96 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -129,6 +129,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -218,6 +223,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -2996,6 +3006,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -3252,7 +3317,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -4037,7 +4102,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4045,25 +4110,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4089,7 +4154,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4166,37 +4231,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4499,7 +4564,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4507,25 +4572,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4551,7 +4616,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4628,37 +4693,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5819,25 +5884,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5856,25 +5918,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5897,11 +5956,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5940,11 +5999,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5983,7 +6042,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6303,8 +6362,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6320,7 +6379,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6490,10 +6557,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6526,10 +6593,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6562,7 +6629,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-predictions-column-2: container: args: @@ -6591,7 +6658,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-importer: importer: artifactUri: @@ -7121,7 +7188,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -7167,7 +7234,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-string-not-empty: container: args: @@ -7233,7 +7300,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -7269,7 +7336,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -7314,7 +7381,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The AutoML Forecasting pipeline. name: learn-to-learn-forecasting diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml index 469ebc3139..244c0d16d1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -127,6 +127,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -216,6 +221,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -2978,6 +2988,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -3234,7 +3299,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -4019,7 +4084,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4027,25 +4092,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4071,7 +4136,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4148,37 +4213,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4481,7 +4546,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4489,25 +4554,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4533,7 +4598,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4610,37 +4675,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5801,25 +5866,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5838,25 +5900,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5879,11 +5938,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5922,11 +5981,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5965,7 +6024,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6285,8 +6344,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6302,7 +6361,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6472,10 +6539,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6508,10 +6575,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6544,7 +6611,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-predictions-column-2: container: args: @@ -6573,7 +6640,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-importer: importer: artifactUri: @@ -7103,7 +7170,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -7149,7 +7216,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-string-not-empty: container: args: @@ -7215,7 +7282,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -7251,7 +7318,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -7296,7 +7363,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. name: sequence-to-sequence-forecasting diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml index eb1ab81b2a..15da388493 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -126,6 +126,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -215,6 +220,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -2971,6 +2981,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -3227,7 +3292,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -4012,7 +4077,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4020,25 +4085,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4064,7 +4129,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4141,37 +4206,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4474,7 +4539,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4482,25 +4547,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4526,7 +4591,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4603,37 +4668,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5794,25 +5859,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5831,25 +5893,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5872,11 +5931,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5915,11 +5974,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5958,7 +6017,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6278,8 +6337,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6295,7 +6354,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6465,10 +6532,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6501,10 +6568,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6537,7 +6604,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-predictions-column-2: container: args: @@ -6566,7 +6633,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-importer: importer: artifactUri: @@ -7096,7 +7163,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -7142,7 +7209,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-string-not-empty: container: args: @@ -7208,7 +7275,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -7244,7 +7311,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -7289,7 +7356,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. name: temporal-fusion-transformer-forecasting diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml index 1280f38b7d..954d4f5ef5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -129,6 +129,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -218,6 +223,11 @@ components: parameterType: STRING outputDefinitions: artifacts: + example_instance: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: An example instance which may be used as an input for predictions. explanation_metadata_artifact: artifactType: schemaTitle: system.Artifact @@ -2996,6 +3006,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -3252,7 +3317,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -4037,7 +4102,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4045,25 +4110,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4089,7 +4154,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4166,37 +4231,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4499,7 +4564,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4507,25 +4572,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4551,7 +4616,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4628,37 +4693,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5819,25 +5884,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5856,25 +5918,22 @@ deploymentSpec: - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --payload - - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", - \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": - {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", - "\", \"args\": [\"forecasting_mp_ensemble", "\", \"--transform_output_path=", - "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--error_file_path=", - "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", - "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", - \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", - "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", - "\", \"--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema_path''].uri}}", - "\", \"--prediction_docker_uri=", "{{$.inputs.parameters[''prediction_image_uri'']}}", - "\", \"--model_relative_output_path=", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", - "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", - ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", "\", - \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", - "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", - "\", \"--use_json=true", "\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", + "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, + "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", + "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", + "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", + "--instance_baseline_path={{$.inputs.artifacts[''instance_baseline''].uri}}", + "--instance_schema_path={{$.inputs.artifacts[''instance_schema_path''].uri}}", + "--prediction_docker_uri={{$.inputs.parameters[''prediction_image_uri'']}}", + "--model_relative_output_path={{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model", + "--explanation_metadata_path={{$.outputs.parameters[''explanation_metadata''].output_file}},{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "--explanation_parameters_path={{$.outputs.parameters[''explanation_parameters''].output_file}}", + "--model_architecture_path={{$.outputs.artifacts[''model_architecture''].uri}}", + "--example_instance_path={{$.outputs.artifacts[''example_instance''].uri}}", + "--use_json=true", "--executor_input={{$.json_escape[1]}}"]}}]}}' command: - python3 - -u @@ -5897,11 +5956,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5940,11 +5999,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230817_0125", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -5983,7 +6042,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6303,8 +6362,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6320,7 +6379,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6490,10 +6557,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6526,10 +6593,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230817_0125',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230817_0125',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230817_0125',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230817_0125',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6562,7 +6629,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-predictions-column-2: container: args: @@ -6591,7 +6658,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-importer: importer: artifactUri: @@ -7121,7 +7188,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -7167,7 +7234,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-string-not-empty: container: args: @@ -7233,7 +7300,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -7269,7 +7336,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -7314,7 +7381,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. name: time-series-dense-encoder-forecasting diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py index 02da0b3e29..4268da69ff 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -16,6 +16,8 @@ import os +from google_cloud_pipeline_components.preview.automl.tabular.auto_feature_engineering import automated_feature_engineering as AutoFeatureEngineeringOp +from google_cloud_pipeline_components.preview.automl.tabular.distillation_stage_feature_transform_engine import distillation_stage_feature_transform_engine as DistillationStageFeatureTransformEngineOp from google_cloud_pipeline_components.preview.automl.tabular.feature_selection import tabular_feature_ranking_and_selection as FeatureSelectionOp from google_cloud_pipeline_components.preview.automl.tabular.feature_transform_engine import feature_transform_engine as FeatureTransformEngineOp from google_cloud_pipeline_components.preview.automl.tabular.tabnet_hyperparameter_tuning_job import tabnet_hyperparameter_tuning_job as TabNetHyperparameterTuningJobOp @@ -27,12 +29,14 @@ from kfp import components __all__ = [ + 'AutoFeatureEngineeringOp', 'FeatureSelectionOp', 'WideAndDeepHyperparameterTuningJobOp', 'WideAndDeepTrainerOp', 'TabNetHyperparameterTuningJobOp', 'TabNetTrainerOp', 'FeatureTransformEngineOp', + 'DistillationStageFeatureTransformEngineOp', 'XGBoostHyperparameterTuningJobOp', 'XGBoostTrainerOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py new file mode 100644 index 0000000000..2ac6fed1b0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py @@ -0,0 +1,91 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Auto Feature Engineering component spec.""" + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def automated_feature_engineering( + root_dir: str, + project: str, + location: str, + gcp_resources: dsl.OutputPath(str), + materialized_data: dsl.Output[dsl.Dataset], + feature_ranking: dsl.Output[dsl.Artifact], + target_column: Optional[str] = '', + weight_column: Optional[str] = '', + data_source_csv_filenames: Optional[str] = '', + data_source_bigquery_table_path: Optional[str] = '', + bigquery_staging_full_dataset_id: Optional[str] = '', + materialized_examples_format: Optional[str] = 'tfrecords_gzip', +): + """find the top features from the dataset.""" + # fmt: off + return dsl.ContainerSpec( + image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + '--payload', + dsl.ConcatPlaceholder( + items=[ + ( + '{"display_name":' + f' "auto-feature-engineering-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' + ), + ( + '"job_spec": {"worker_pool_specs": [{"replica_count":' + ' 1, "machine_spec": {"machine_type": "n1-standard-16"},' + ' "container_spec": {"image_uri":"' + ), + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + '", "args": ["feature_engineering", "--project=', project, + '", "--location=', location, '", "--data_source_bigquery_table_path=', + data_source_bigquery_table_path, + '", "--target_column=', + target_column, + '", "--weight_column=', + weight_column, + '", "--bigquery_staging_full_dataset_id=', + bigquery_staging_full_dataset_id, + '", "--materialized_data_path=', + root_dir, f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized_data", ', + ' "--materialized_examples_path=', + root_dir, f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized", ' + ' "--error_file_path=', + root_dir, f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.pb", ' + ' "--materialized_data_artifact_path=', + materialized_data.uri, + '", "--feature_ranking_path=', + feature_ranking.uri, '"]}}]}}' + ] + ), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml index 2ff954e4ac..134953ae7c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -5346,7 +5346,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5354,25 +5354,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5398,7 +5398,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -5475,37 +5475,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5808,7 +5808,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5816,25 +5816,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5860,7 +5860,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -5937,37 +5937,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6270,7 +6270,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6278,25 +6278,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6322,7 +6322,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -6399,37 +6399,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6732,7 +6732,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6740,25 +6740,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6784,7 +6784,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -6861,37 +6861,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7194,7 +7194,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7202,25 +7202,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7246,7 +7246,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -7323,37 +7323,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -8811,9 +8811,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8854,9 +8854,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8897,7 +8897,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8909,7 +8909,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8938,7 +8938,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8950,7 +8950,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8979,7 +8979,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8991,7 +8991,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -9020,7 +9020,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -9035,7 +9035,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9044,7 +9044,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9053,7 +9053,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9073,9 +9073,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9120,9 +9120,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9167,7 +9167,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9188,7 +9188,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -9219,7 +9219,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9240,7 +9240,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -10717,7 +10717,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", @@ -10730,7 +10730,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", @@ -10763,7 +10763,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10796,7 +10796,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml index c2ac05de8e..aa56e2832e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -13,6 +13,9 @@ # dataset_level_custom_transformation_definitions: list # dataset_level_transformations: list # disable_early_stopping: bool [Default: False] +# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] +# distill_batch_predict_max_replica_count: int [Default: 40.0] +# distill_batch_predict_starting_replica_count: int [Default: 25.0] # enable_probabilistic_inference: bool [Default: False] # encryption_spec_key_name: str [Default: ''] # evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] @@ -33,6 +36,7 @@ # feature_transform_engine_dataflow_max_num_workers: int [Default: 25.0] # legacy_transformations_path: str [Default: ''] # location: str +# materialized_examples_format: str [Default: 'tfrecords_gzip'] # max_selected_features: int [Default: 1000.0] # model_description: str [Default: ''] # model_display_name: str [Default: ''] @@ -60,6 +64,7 @@ # test_fraction: float [Default: -1.0] # tf_auto_transform_features: dict # tf_custom_transformation_definitions: list +# tf_transform_execution_engine: str [Default: ''] # tf_transformations_path: str [Default: ''] # train_budget_milli_node_hours: float # training_fraction: float [Default: -1.0] @@ -68,8 +73,10 @@ # weight_column: str [Default: ''] # Outputs: # feature-attribution-2-feature_attributions: system.Metrics +# feature-attribution-3-feature_attributions: system.Metrics # feature-attribution-feature_attributions: system.Metrics # model-evaluation-2-evaluation_metrics: system.Metrics +# model-evaluation-3-evaluation_metrics: system.Metrics # model-evaluation-evaluation_metrics: system.Metrics components: comp-automl-tabular-cv-trainer: @@ -458,6 +465,112 @@ components: description: 'GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-ensemble-3: + executorLabel: exec-automl-tabular-ensemble-3 + inputDefinitions: + artifacts: + dataset_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The schema of the dataset. + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The instance baseline + + used to calculate explanations.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + tuning_result_input: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'AutoML Tabular tuning + + result.' + warmup_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'The warm up data. Ensemble component will save the + + warm up data together with the model artifact, used to warm up the model + + when prediction server starts.' + isOptional: true + parameters: + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + export_additional_model_without_custom_ops: + defaultValue: false + description: 'True if export + + an additional model without custom TF operators to the + + `model_without_custom_ops` output.' + isOptional: true + parameterType: BOOLEAN + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + outputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model. + model_architecture: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The architecture of the output model. + model_without_custom_ops: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The output model without custom TF operators, this output will + be empty unless `export_additional_model_without_custom_ops` is set. + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + parameters: + explanation_metadata: + description: The explanation parameters used by Vertex online and batch + explanations. + parameterType: STRUCT + explanation_parameters: + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING comp-automl-tabular-finalizer: @@ -508,6 +621,17 @@ components: description: 'google.UnmanagedContainerModel for model to be validated.' + comp-automl-tabular-infra-validator-3: + executorLabel: exec-automl-tabular-infra-validator-3 + inputDefinitions: + artifacts: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'google.UnmanagedContainerModel for model + + to be validated.' comp-automl-tabular-stage-1-tuner: executorLabel: exec-automl-tabular-stage-1-tuner inputDefinitions: @@ -637,6 +761,137 @@ components: description: 'GCP resources created by this component. For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-automl-tabular-stage-1-tuner-2: + executorLabel: exec-automl-tabular-stage-1-tuner-2 + inputDefinitions: + artifacts: + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The materialized eval split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The materialized train + + split.' + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + deadline_hours: + description: 'Number of hours the cross-validation trainer + + should run.' + parameterType: NUMBER_DOUBLE + disable_early_stopping: + defaultValue: false + description: 'True if disable early stopping. Default + + value is false.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for running the Cross-validation trainer. + parameterType: STRING + num_parallel_trials: + description: Number of parallel training trials. + parameterType: NUMBER_INTEGER + num_selected_features: + defaultValue: 0.0 + description: 'Number of selected features. The number of + + features to learn in the NN models.' + isOptional: true + parameterType: NUMBER_INTEGER + num_selected_trials: + description: 'Number of selected trials. The number of weak + + learners in the final model is 5 * num_selected_trials.' + parameterType: NUMBER_INTEGER + project: + description: Project to run Cross-validation trainer. + parameterType: STRING + reduce_search_space_mode: + defaultValue: regular + description: 'The reduce search space mode. Possible + + values: "regular" (default), "minimal", "full".' + isOptional: true + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distillation: + defaultValue: false + description: 'True if in distillation mode. The default value + + is false.' + isOptional: true + parameterType: BOOLEAN + single_run_max_secs: + description: Max number of seconds each training trial runs. + parameterType: NUMBER_INTEGER + study_spec_parameters_override: + defaultValue: [] + description: 'JSON study spec. E.g., + + [{"parameter_id": "model_type","categorical_value_spec": {"values": + + ["nn"]}}]' + isOptional: true + parameterType: LIST + tune_feature_selection_rate: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + worker_pool_specs_override_json: + defaultValue: [] + description: 'JSON worker pool specs. E.g., + + [{"machine_spec": {"machine_type": + + "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": + + "n1-standard-16"}}]' + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + tuning_result_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The trained model and architectures. + parameters: + execution_metrics: + description: Core metrics in dictionary of component execution. + parameterType: STRUCT + gcp_resources: + description: 'GCP resources created by this component. For more details, + see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING comp-bool-identity: @@ -1510,10 +1765,18 @@ components: artifactSelectors: - outputArtifactKey: feature-attribution-2-feature_attributions producerSubtask: condition-5 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-7 model-evaluation-2-evaluation_metrics: artifactSelectors: - outputArtifactKey: model-evaluation-2-evaluation_metrics producerSubtask: condition-5 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-7 tasks: automl-tabular-cv-trainer-2: cachingOptions: @@ -1699,8 +1962,7 @@ components: inputs: parameters: value: - runtimeValue: - constant: 0.0 + componentInputParameter: pipelinechannel--run_distillation taskInfo: name: check-if-is-distillation calculate-training-parameters-2: @@ -1810,16 +2072,183 @@ components: triggerPolicy: condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] == 'false' - training-configurator-and-validator-2: - cachingOptions: - enableCache: true + condition-7: componentRef: - name: comp-training-configurator-and-validator-2 + name: comp-condition-7 dependentTasks: + - automl-tabular-ensemble-2 + - bool-identity-2 + - bool-identity-3 - calculate-training-parameters-2 + - get-bigquery-destination-output-uri + - get-bigquery-destination-output-uri-2 + - training-configurator-and-validator-2 inputs: artifacts: - dataset_stats: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-2 + pipelinechannel--feature-transform-engine-instance_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + pipelinechannel--feature-transform-engine-transform_output: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output + pipelinechannel--training-configurator-and-validator-2-instance_baseline: + taskOutputArtifact: + outputArtifactKey: instance_baseline + producerTask: training-configurator-and-validator-2 + pipelinechannel--training-configurator-and-validator-2-metadata: + taskOutputArtifact: + outputArtifactKey: metadata + producerTask: training-configurator-and-validator-2 + parameters: + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + pipelinechannel--bool-identity-2-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-2 + pipelinechannel--bool-identity-3-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: bool-identity-3 + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + taskOutputParameter: + outputParameterKey: distill_stage_1_deadline_hours + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + taskOutputParameter: + outputParameterKey: reduce_search_space_mode + producerTask: calculate-training-parameters-2 + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + taskOutputParameter: + outputParameterKey: stage_1_single_run_max_secs + producerTask: calculate-training-parameters-2 + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + pipelinechannel--feature-transform-engine-bigquery_train_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_train_split_uri + pipelinechannel--feature-transform-engine-bigquery_validation_split_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_validation_split_uri + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + pipelinechannel--get-bigquery-destination-output-uri-2-bigquery_destination_output_uri: + taskOutputParameter: + outputParameterKey: bigquery_destination_output_uri + producerTask: get-bigquery-destination-output-uri-2 + pipelinechannel--get-bigquery-destination-output-uri-bigquery_destination_output_uri: + taskOutputParameter: + outputParameterKey: bigquery_destination_output_uri + producerTask: get-bigquery-destination-output-uri + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--string-not-empty-Output: + componentInputParameter: pipelinechannel--string-not-empty-Output + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: distill + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] + == 'true' + get-bigquery-destination-output-uri: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-bigquery-destination-output-uri + inputs: + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_train_split_uri + model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + table_prefix: + runtimeValue: + constant: train + taskInfo: + name: get-bigquery-destination-output-uri + get-bigquery-destination-output-uri-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-bigquery-destination-output-uri-2 + inputs: + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_validation_split_uri + model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + table_prefix: + runtimeValue: + constant: validation + taskInfo: + name: get-bigquery-destination-output-uri-2 + training-configurator-and-validator-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator-2 + dependentTasks: + - calculate-training-parameters-2 + inputs: + artifacts: + dataset_stats: componentInputArtifact: pipelinechannel--feature-transform-engine-dataset_stats instance_schema: componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema @@ -1899,6 +2328,8 @@ components: parameters: pipelinechannel--apply_feature_selection_tuning: parameterType: BOOLEAN + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING pipelinechannel--cv_trainer_worker_pool_specs_override: parameterType: LIST pipelinechannel--dataflow_service_account: @@ -1909,6 +2340,12 @@ components: parameterType: BOOLEAN pipelinechannel--disable_early_stopping: parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER pipelinechannel--enable_probabilistic_inference: parameterType: BOOLEAN pipelinechannel--encryption_spec_key_name: @@ -1941,8 +2378,18 @@ components: parameterType: STRING pipelinechannel--feature-transform-engine-bigquery_test_split_uri: parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_train_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_validation_split_uri: + parameterType: STRING pipelinechannel--feature-transform-engine-split_example_counts: parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER pipelinechannel--location: parameterType: STRING pipelinechannel--model_description: @@ -1993,10 +2440,18 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 model-evaluation-2-evaluation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 comp-condition-5: dag: outputs: @@ -2483,213 +2938,159 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 - comp-exit-handler-1: + comp-condition-7: dag: outputs: artifacts: - feature-attribution-2-feature_attributions: + feature-attribution-3-feature_attributions: artifactSelectors: - - outputArtifactKey: feature-attribution-2-feature_attributions - producerSubtask: condition-4 - feature-attribution-feature_attributions: - artifactSelectors: - - outputArtifactKey: feature-attribution-feature_attributions - producerSubtask: condition-2 - model-evaluation-2-evaluation_metrics: - artifactSelectors: - - outputArtifactKey: model-evaluation-2-evaluation_metrics - producerSubtask: condition-4 - model-evaluation-evaluation_metrics: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-8 + model-evaluation-3-evaluation_metrics: artifactSelectors: - - outputArtifactKey: model-evaluation-evaluation_metrics - producerSubtask: condition-2 + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-8 tasks: - condition-2: + automl-tabular-ensemble-3: + cachingOptions: + enableCache: true componentRef: - name: comp-condition-2 + name: comp-automl-tabular-ensemble-3 dependentTasks: - - feature-transform-engine - - merge-materialized-splits - - string-not-empty + - automl-tabular-stage-1-tuner-2 + - distillation-stage-feature-transform-engine inputs: artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-instance_schema: - taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-training_schema: - taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: + dataset_schema: + componentInputArtifact: pipelinechannel--feature-transform-engine-instance_schema + instance_baseline: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-2-instance_baseline + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-2-metadata + transform_output: taskOutputArtifact: outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--merge-materialized-splits-splits: + producerTask: distillation-stage-feature-transform-engine + tuning_result_input: taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model + outputArtifactKey: tuning_result_output + producerTask: automl-tabular-stage-1-tuner-2 parameters: - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override - pipelinechannel--dataflow_service_account: - componentInputParameter: pipelinechannel--dataflow_service_account - pipelinechannel--dataflow_subnetwork: - componentInputParameter: pipelinechannel--dataflow_subnetwork - pipelinechannel--dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference - pipelinechannel--encryption_spec_key_name: + encryption_spec_key_name: componentInputParameter: pipelinechannel--encryption_spec_key_name - pipelinechannel--evaluation_batch_explain_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type - pipelinechannel--evaluation_batch_explain_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count - pipelinechannel--evaluation_batch_explain_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count - pipelinechannel--evaluation_batch_predict_machine_type: - componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type - pipelinechannel--evaluation_batch_predict_max_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count - pipelinechannel--evaluation_batch_predict_starting_replica_count: - componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count - pipelinechannel--evaluation_dataflow_disk_size_gb: - componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb - pipelinechannel--evaluation_dataflow_machine_type: - componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type - pipelinechannel--evaluation_dataflow_max_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers - pipelinechannel--evaluation_dataflow_starting_num_workers: - componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: + export_additional_model_without_custom_ops: componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing - pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine - pipelinechannel--location: + location: componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value - pipelinechannel--prediction_type: - componentInputParameter: pipelinechannel--prediction_type - pipelinechannel--project: + project: componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles - pipelinechannel--root_dir: + root_dir: componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuning_result_artifact_uri: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials - pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--target_column: - componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--weight_column: - componentInputParameter: pipelinechannel--weight_column taskInfo: - name: stage_1_tuning_result_artifact_uri_not_empty - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'true' - condition-4: + name: automl-tabular-ensemble-3 + automl-tabular-infra-validator-3: + cachingOptions: + enableCache: true componentRef: - name: comp-condition-4 + name: comp-automl-tabular-infra-validator-3 dependentTasks: - - feature-transform-engine - - merge-materialized-splits - - split-materialized-data - - string-not-empty + - automl-tabular-ensemble-3 inputs: artifacts: - pipelinechannel--feature-transform-engine-dataset_stats: - taskOutputArtifact: - outputArtifactKey: dataset_stats - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-feature_ranking: + unmanaged_container_model: taskOutputArtifact: - outputArtifactKey: feature_ranking - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-instance_schema: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + taskInfo: + name: automl-tabular-infra-validator-3 + automl-tabular-stage-1-tuner-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-automl-tabular-stage-1-tuner-2 + dependentTasks: + - distillation-stage-feature-transform-engine + - split-materialized-data-2 + inputs: + artifacts: + materialized_eval_split: taskOutputArtifact: - outputArtifactKey: instance_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-training_schema: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data-2 + materialized_train_split: taskOutputArtifact: - outputArtifactKey: training_schema - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-transform_output: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data-2 + metadata: + componentInputArtifact: pipelinechannel--training-configurator-and-validator-2-metadata + transform_output: taskOutputArtifact: outputArtifactKey: transform_output - producerTask: feature-transform-engine - pipelinechannel--merge-materialized-splits-splits: + producerTask: distillation-stage-feature-transform-engine + parameters: + deadline_hours: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours + disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + num_selected_trials: + runtimeValue: + constant: 1.0 + project: + componentInputParameter: pipelinechannel--project + reduce_search_space_mode: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_distillation: + runtimeValue: + constant: 1.0 + single_run_max_secs: + componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs + worker_pool_specs_override_json: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + taskInfo: + name: automl-tabular-stage-1-tuner-2 + condition-8: + componentRef: + name: comp-condition-8 + dependentTasks: + - automl-tabular-ensemble-3 + - model-upload-3 + inputs: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: taskOutputArtifact: - outputArtifactKey: splits - producerTask: merge-materialized-splits - pipelinechannel--parent_model: - componentInputArtifact: pipelinechannel--parent_model - pipelinechannel--split-materialized-data-materialized_eval_split: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data - pipelinechannel--split-materialized-data-materialized_train_split: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + pipelinechannel--model-upload-3-model: taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data + outputArtifactKey: model + producerTask: model-upload-3 parameters: - pipelinechannel--apply_feature_selection_tuning: - componentInputParameter: pipelinechannel--apply_feature_selection_tuning - pipelinechannel--cv_trainer_worker_pool_specs_override: - componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + pipelinechannel--bool-identity-2-Output: + componentInputParameter: pipelinechannel--bool-identity-2-Output + pipelinechannel--bool-identity-3-Output: + componentInputParameter: pipelinechannel--bool-identity-3-Output pipelinechannel--dataflow_service_account: componentInputParameter: pipelinechannel--dataflow_service_account pipelinechannel--dataflow_subnetwork: componentInputParameter: pipelinechannel--dataflow_subnetwork pipelinechannel--dataflow_use_public_ips: componentInputParameter: pipelinechannel--dataflow_use_public_ips - pipelinechannel--disable_early_stopping: - componentInputParameter: pipelinechannel--disable_early_stopping - pipelinechannel--enable_probabilistic_inference: - componentInputParameter: pipelinechannel--enable_probabilistic_inference pipelinechannel--encryption_spec_key_name: componentInputParameter: pipelinechannel--encryption_spec_key_name pipelinechannel--evaluation_batch_explain_machine_type: @@ -2712,86 +3113,48 @@ components: componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers pipelinechannel--evaluation_dataflow_starting_num_workers: componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers - pipelinechannel--export_additional_model_without_custom_ops: - componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops - pipelinechannel--fast_testing: - componentInputParameter: pipelinechannel--fast_testing pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_downsampled_test_split_uri - producerTask: feature-transform-engine + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri pipelinechannel--feature-transform-engine-bigquery_test_split_uri: - taskOutputParameter: - outputParameterKey: bigquery_test_split_uri - producerTask: feature-transform-engine - pipelinechannel--feature-transform-engine-split_example_counts: - taskOutputParameter: - outputParameterKey: split_example_counts - producerTask: feature-transform-engine + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri pipelinechannel--location: componentInputParameter: pipelinechannel--location - pipelinechannel--model_description: - componentInputParameter: pipelinechannel--model_description - pipelinechannel--num_selected_features: - componentInputParameter: pipelinechannel--num_selected_features - pipelinechannel--optimization_objective: - componentInputParameter: pipelinechannel--optimization_objective - pipelinechannel--optimization_objective_precision_value: - componentInputParameter: pipelinechannel--optimization_objective_precision_value - pipelinechannel--optimization_objective_recall_value: - componentInputParameter: pipelinechannel--optimization_objective_recall_value pipelinechannel--prediction_type: componentInputParameter: pipelinechannel--prediction_type pipelinechannel--project: componentInputParameter: pipelinechannel--project - pipelinechannel--quantiles: - componentInputParameter: pipelinechannel--quantiles pipelinechannel--root_dir: componentInputParameter: pipelinechannel--root_dir - pipelinechannel--run_distillation: - componentInputParameter: pipelinechannel--run_distillation - pipelinechannel--run_evaluation: - componentInputParameter: pipelinechannel--run_evaluation - pipelinechannel--set-optional-inputs-model_display_name: - componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name - pipelinechannel--stage_1_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_1_num_parallel_trials - pipelinechannel--stage_1_tuner_worker_pool_specs_override: - componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override - pipelinechannel--stage_2_num_parallel_trials: - componentInputParameter: pipelinechannel--stage_2_num_parallel_trials - pipelinechannel--stage_2_num_selected_trials: - componentInputParameter: pipelinechannel--stage_2_num_selected_trials pipelinechannel--string-not-empty-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: string-not-empty - pipelinechannel--study_spec_parameters_override: - componentInputParameter: pipelinechannel--study_spec_parameters_override + componentInputParameter: pipelinechannel--string-not-empty-Output pipelinechannel--target_column: componentInputParameter: pipelinechannel--target_column - pipelinechannel--train_budget_milli_node_hours: - componentInputParameter: pipelinechannel--train_budget_milli_node_hours - pipelinechannel--weight_column: - componentInputParameter: pipelinechannel--weight_column taskInfo: - name: stage_1_tuning_result_artifact_uri_empty + name: is-evaluation triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] - == 'false' - feature-transform-engine: + condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] + == 'true' + distillation-stage-feature-transform-engine: cachingOptions: enableCache: true componentRef: - name: comp-feature-transform-engine + name: comp-distillation-stage-feature-transform-engine + dependentTasks: + - get-bp-bq-output-table + - get-bp-bq-output-table-2 + - get-transform-config-path inputs: parameters: bigquery_staging_full_dataset_id: componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id - data_source_bigquery_table_path: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path - data_source_csv_filenames: - componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + bigquery_train_full_table_uri: + taskOutputParameter: + outputParameterKey: bq_output_table_uri + producerTask: get-bp-bq-output-table + bigquery_validate_full_table_uri: + taskOutputParameter: + outputParameterKey: bq_output_table_uri + producerTask: get-bp-bq-output-table-2 dataflow_disk_size_gb: componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb dataflow_machine_type: @@ -2802,126 +3165,239 @@ components: componentInputParameter: pipelinechannel--dataflow_service_account dataflow_subnetwork: componentInputParameter: pipelinechannel--dataflow_subnetwork - dataflow_use_public_ips: - componentInputParameter: pipelinechannel--dataflow_use_public_ips - dataset_level_custom_transformation_definitions: - componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions - dataset_level_transformations: - componentInputParameter: pipelinechannel--dataset_level_transformations encryption_spec_key_name: componentInputParameter: pipelinechannel--encryption_spec_key_name - feature_selection_algorithm: - componentInputParameter: pipelinechannel--feature_selection_algorithm - legacy_transformations_path: - componentInputParameter: pipelinechannel--legacy_transformations_path location: componentInputParameter: pipelinechannel--location - max_selected_features: - componentInputParameter: pipelinechannel--max_selected_features - predefined_split_key: - componentInputParameter: pipelinechannel--predefined_split_key prediction_type: componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project root_dir: componentInputParameter: pipelinechannel--root_dir - run_distill: - componentInputParameter: pipelinechannel--run_distillation - run_feature_selection: - componentInputParameter: pipelinechannel--run_feature_selection - stratified_split_key: - componentInputParameter: pipelinechannel--stratified_split_key target_column: componentInputParameter: pipelinechannel--target_column - test_fraction: - componentInputParameter: pipelinechannel--test_fraction - tf_auto_transform_features: - componentInputParameter: pipelinechannel--tf_auto_transform_features - tf_custom_transformation_definitions: - componentInputParameter: pipelinechannel--tf_custom_transformation_definitions - tf_transformations_path: - componentInputParameter: pipelinechannel--tf_transformations_path - training_fraction: - componentInputParameter: pipelinechannel--training_fraction - validation_fraction: - componentInputParameter: pipelinechannel--validation_fraction + transform_config_path: + taskOutputParameter: + outputParameterKey: transform_config_path + producerTask: get-transform-config-path weight_column: componentInputParameter: pipelinechannel--weight_column taskInfo: - name: feature-transform-engine - merge-materialized-splits: + name: distillation-stage-feature-transform-engine + get-bp-bq-output-table: cachingOptions: enableCache: true componentRef: - name: comp-merge-materialized-splits + name: comp-get-bp-bq-output-table dependentTasks: - - split-materialized-data + - model-batch-predict-3 inputs: artifacts: - split_0: - taskOutputArtifact: - outputArtifactKey: materialized_train_split - producerTask: split-materialized-data - split_1: + bp_job: taskOutputArtifact: - outputArtifactKey: materialized_eval_split - producerTask: split-materialized-data + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-3 taskInfo: - name: merge-materialized-splits - split-materialized-data: + name: get-bp-bq-output-table + get-bp-bq-output-table-2: cachingOptions: enableCache: true componentRef: - name: comp-split-materialized-data + name: comp-get-bp-bq-output-table-2 dependentTasks: - - feature-transform-engine + - model-batch-predict-4 inputs: artifacts: - materialized_data: + bp_job: taskOutputArtifact: - outputArtifactKey: materialized_data - producerTask: feature-transform-engine + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-4 taskInfo: - name: split-materialized-data - string-not-empty: + name: get-bp-bq-output-table-2 + get-transform-config-path: cachingOptions: enableCache: true componentRef: - name: comp-string-not-empty + name: comp-get-transform-config-path inputs: - parameters: - value: - componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + artifacts: + transform_output_dir_artifact: + componentInputArtifact: pipelinechannel--feature-transform-engine-transform_output taskInfo: - name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty - inputDefinitions: - artifacts: - pipelinechannel--parent_model: - artifactType: + name: get-transform-config-path + model-batch-predict-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-3 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--get-bigquery-destination-output-uri-bigquery_destination_output_uri + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_train_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-3 + model-batch-predict-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-4 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model + parameters: + bigquery_destination_output_uri: + componentInputParameter: pipelinechannel--get-bigquery-destination-output-uri-2-bigquery_destination_output_uri + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_validation_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: bigquery + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-4 + model-upload-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-upload-3 + dependentTasks: + - automl-tabular-ensemble-3 + - automl-tabular-infra-validator-3 + inputs: + artifacts: + explanation_metadata_artifact: + taskOutputArtifact: + outputArtifactKey: explanation_metadata_artifact + producerTask: automl-tabular-ensemble-3 + unmanaged_container_model: + taskOutputArtifact: + outputArtifactKey: unmanaged_container_model + producerTask: automl-tabular-ensemble-3 + parameters: + display_name: + runtimeValue: + constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + taskOutputParameter: + outputParameterKey: explanation_parameters + producerTask: automl-tabular-ensemble-3 + location: + componentInputParameter: pipelinechannel--location + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: model-upload-3 + split-materialized-data-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data-2 + dependentTasks: + - distillation-stage-feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: distillation-stage-feature-transform-engine + taskInfo: + name: split-materialized-data-2 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--feature-transform-engine-transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-2-instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--training-configurator-and-validator-2-metadata: + artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 parameters: - pipelinechannel--apply_feature_selection_tuning: - parameterType: BOOLEAN pipelinechannel--bigquery_staging_full_dataset_id: parameterType: STRING - pipelinechannel--cv_trainer_worker_pool_specs_override: - parameterType: LIST + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: + parameterType: STRING + pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: + parameterType: NUMBER_INTEGER pipelinechannel--dataflow_service_account: parameterType: STRING pipelinechannel--dataflow_subnetwork: parameterType: STRING pipelinechannel--dataflow_use_public_ips: parameterType: BOOLEAN - pipelinechannel--dataset_level_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--dataset_level_transformations: - parameterType: LIST pipelinechannel--disable_early_stopping: parameterType: BOOLEAN - pipelinechannel--enable_probabilistic_inference: - parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER pipelinechannel--encryption_spec_key_name: parameterType: STRING pipelinechannel--evaluation_batch_explain_machine_type: @@ -2946,9 +3422,13 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--export_additional_model_without_custom_ops: parameterType: BOOLEAN - pipelinechannel--fast_testing: - parameterType: BOOLEAN - pipelinechannel--feature_selection_algorithm: + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_train_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_validation_split_uri: parameterType: STRING pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: parameterType: NUMBER_INTEGER @@ -2956,1302 +3436,3679 @@ components: parameterType: STRING pipelinechannel--feature_transform_engine_dataflow_max_num_workers: parameterType: NUMBER_INTEGER - pipelinechannel--legacy_transformations_path: - parameterType: STRING - pipelinechannel--location: - parameterType: STRING - pipelinechannel--max_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--model_description: + pipelinechannel--get-bigquery-destination-output-uri-2-bigquery_destination_output_uri: parameterType: STRING - pipelinechannel--num_selected_features: - parameterType: NUMBER_INTEGER - pipelinechannel--optimization_objective: + pipelinechannel--get-bigquery-destination-output-uri-bigquery_destination_output_uri: parameterType: STRING - pipelinechannel--optimization_objective_precision_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--optimization_objective_recall_value: - parameterType: NUMBER_DOUBLE - pipelinechannel--predefined_split_key: + pipelinechannel--location: parameterType: STRING pipelinechannel--prediction_type: parameterType: STRING pipelinechannel--project: parameterType: STRING - pipelinechannel--quantiles: - parameterType: LIST pipelinechannel--root_dir: parameterType: STRING - pipelinechannel--run_distillation: - parameterType: BOOLEAN - pipelinechannel--run_evaluation: - parameterType: BOOLEAN - pipelinechannel--run_feature_selection: - parameterType: BOOLEAN - pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: - parameterType: STRING - pipelinechannel--set-optional-inputs-data_source_csv_filenames: - parameterType: STRING - pipelinechannel--set-optional-inputs-model_display_name: - parameterType: STRING pipelinechannel--stage_1_num_parallel_trials: parameterType: NUMBER_INTEGER pipelinechannel--stage_1_tuner_worker_pool_specs_override: parameterType: LIST - pipelinechannel--stage_1_tuning_result_artifact_uri: - parameterType: STRING - pipelinechannel--stage_2_num_parallel_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stage_2_num_selected_trials: - parameterType: NUMBER_INTEGER - pipelinechannel--stratified_split_key: + pipelinechannel--string-not-empty-Output: parameterType: STRING - pipelinechannel--study_spec_parameters_override: - parameterType: LIST pipelinechannel--target_column: parameterType: STRING - pipelinechannel--test_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--tf_auto_transform_features: - parameterType: STRUCT - pipelinechannel--tf_custom_transformation_definitions: - parameterType: LIST - pipelinechannel--tf_transformations_path: - parameterType: STRING - pipelinechannel--train_budget_milli_node_hours: - parameterType: NUMBER_DOUBLE - pipelinechannel--training_fraction: - parameterType: NUMBER_DOUBLE - pipelinechannel--validation_fraction: - parameterType: NUMBER_DOUBLE pipelinechannel--weight_column: parameterType: STRING outputDefinitions: artifacts: - feature-attribution-2-feature_attributions: + feature-attribution-3-feature_attributions: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 - feature-attribution-feature_attributions: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-2-evaluation_metrics: - artifactType: - schemaTitle: system.Metrics - schemaVersion: 0.0.1 - model-evaluation-evaluation_metrics: + model-evaluation-3-evaluation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 - comp-feature-attribution: - executorLabel: exec-feature-attribution - inputDefinitions: - artifacts: - predictions_bigquery_source: - artifactType: - schemaTitle: google.BQTable - schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true - parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: - defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service + comp-condition-8: + dag: + outputs: + artifacts: + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature_attributions + producerSubtask: feature-attribution-3 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: evaluation_metrics + producerSubtask: model-evaluation-3 + tasks: + feature-attribution-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-attribution-3 + dependentTasks: + - model-batch-explanation-3 + inputs: + artifacts: + predictions_gcs_source: + taskOutputArtifact: + outputArtifactKey: gcs_output_directory + producerTask: model-batch-explanation-3 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + location: + componentInputParameter: pipelinechannel--location + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + taskInfo: + name: feature-attribution-3 + model-batch-explanation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-explanation-3 + inputs: + artifacts: + explanation_metadata_artifact: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + explanation_parameters: + componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + generate_explanation: + runtimeValue: + constant: 1.0 + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + taskInfo: + name: model-batch-explanation-3 + model-batch-predict-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-batch-predict-5 + inputs: + artifacts: + unmanaged_container_model: + componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model + parameters: + bigquery_source_input_uri: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + gcs_destination_output_uri_prefix: + componentInputParameter: pipelinechannel--root_dir + instances_format: + runtimeValue: + constant: bigquery + job_display_name: + runtimeValue: + constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + location: + componentInputParameter: pipelinechannel--location + machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + predictions_format: + runtimeValue: + constant: jsonl + project: + componentInputParameter: pipelinechannel--project + starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + taskInfo: + name: model-batch-predict-5 + model-evaluation-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-3 + dependentTasks: + - model-batch-predict-5 + inputs: + artifacts: + batch_prediction_job: + taskOutputArtifact: + outputArtifactKey: batchpredictionjob + producerTask: model-batch-predict-5 + parameters: + dataflow_disk_size: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + dataflow_max_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataflow_workers_num: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + ground_truth_column: + componentInputParameter: pipelinechannel--target_column + ground_truth_format: + runtimeValue: + constant: jsonl + location: + componentInputParameter: pipelinechannel--location + prediction_label_column: + runtimeValue: + constant: '' + prediction_score_column: + runtimeValue: + constant: '' + predictions_format: + runtimeValue: + constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + taskInfo: + name: model-evaluation-3 + model-evaluation-import-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-model-evaluation-import-3 + dependentTasks: + - feature-attribution-3 + - model-evaluation-3 + inputs: + artifacts: + feature_attributions: + taskOutputArtifact: + outputArtifactKey: feature_attributions + producerTask: feature-attribution-3 + metrics: + taskOutputArtifact: + outputArtifactKey: evaluation_metrics + producerTask: model-evaluation-3 + model: + componentInputArtifact: pipelinechannel--model-upload-3-model + parameters: + dataset_path: + componentInputParameter: pipelinechannel--feature-transform-engine-bigquery_test_split_uri + dataset_type: + runtimeValue: + constant: bigquery + display_name: + runtimeValue: + constant: AutoML Tabular + problem_type: + componentInputParameter: pipelinechannel--prediction_type + taskInfo: + name: model-evaluation-import-3 + inputDefinitions: + artifacts: + pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + pipelinechannel--model-upload-3-model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + parameters: + pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: + parameterType: STRUCT + pipelinechannel--bool-identity-2-Output: + parameterType: STRING + pipelinechannel--bool-identity-3-Output: + parameterType: STRING + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + parameterType: STRING + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--string-not-empty-Output: + parameterType: STRING + pipelinechannel--target_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-distillation-stage-feature-transform-engine: + executorLabel: exec-distillation-stage-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + ''projectId.datasetId'' format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + ''vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}''. + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + bigquery_train_full_table_uri: + description: 'BigQuery full table id for our + + train split output by pre-distillation FTE with soft target included.' + parameterType: STRING + bigquery_validate_full_table_uri: + description: 'BigQuery full table id for our + + validation split output by pre-distillation FTE with soft target + + included.' + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + prediction_type: + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + target_column: + description: Target column of input data. + parameterType: STRING + transform_config_path: + description: 'Path to the transform config output by the + + pre-distillation FTE component.' + parameterType: STRING + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + gcp_resources: + description: 'GCP resources created by this component. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-exit-handler-1: + dag: + outputs: + artifacts: + feature-attribution-2-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-2-feature_attributions + producerSubtask: condition-4 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: condition-4 + feature-attribution-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-feature_attributions + producerSubtask: condition-2 + model-evaluation-2-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-2-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: condition-4 + model-evaluation-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-evaluation_metrics + producerSubtask: condition-2 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model + parameters: + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuning_result_artifact_uri: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_not_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'true' + condition-4: + componentRef: + name: comp-condition-4 + dependentTasks: + - feature-transform-engine + - merge-materialized-splits + - split-materialized-data + - string-not-empty + inputs: + artifacts: + pipelinechannel--feature-transform-engine-dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-feature_ranking: + taskOutputArtifact: + outputArtifactKey: feature_ranking + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-transform_output: + taskOutputArtifact: + outputArtifactKey: transform_output + producerTask: feature-transform-engine + pipelinechannel--merge-materialized-splits-splits: + taskOutputArtifact: + outputArtifactKey: splits + producerTask: merge-materialized-splits + pipelinechannel--parent_model: + componentInputArtifact: pipelinechannel--parent_model + pipelinechannel--split-materialized-data-materialized_eval_split: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + pipelinechannel--split-materialized-data-materialized_train_split: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + parameters: + pipelinechannel--apply_feature_selection_tuning: + componentInputParameter: pipelinechannel--apply_feature_selection_tuning + pipelinechannel--bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + pipelinechannel--cv_trainer_worker_pool_specs_override: + componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override + pipelinechannel--dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + pipelinechannel--dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + pipelinechannel--dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + pipelinechannel--disable_early_stopping: + componentInputParameter: pipelinechannel--disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: pipelinechannel--distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count + pipelinechannel--enable_probabilistic_inference: + componentInputParameter: pipelinechannel--enable_probabilistic_inference + pipelinechannel--encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + pipelinechannel--evaluation_batch_explain_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type + pipelinechannel--evaluation_batch_explain_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count + pipelinechannel--evaluation_batch_explain_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count + pipelinechannel--evaluation_batch_predict_machine_type: + componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type + pipelinechannel--evaluation_batch_predict_max_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count + pipelinechannel--evaluation_batch_predict_starting_replica_count: + componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count + pipelinechannel--evaluation_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb + pipelinechannel--evaluation_dataflow_machine_type: + componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type + pipelinechannel--evaluation_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers + pipelinechannel--evaluation_dataflow_starting_num_workers: + componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers + pipelinechannel--export_additional_model_without_custom_ops: + componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops + pipelinechannel--fast_testing: + componentInputParameter: pipelinechannel--fast_testing + pipelinechannel--feature-transform-engine-bigquery_downsampled_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_downsampled_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_test_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_test_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_train_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_train_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-bigquery_validation_split_uri: + taskOutputParameter: + outputParameterKey: bigquery_validation_split_uri + producerTask: feature-transform-engine + pipelinechannel--feature-transform-engine-split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + pipelinechannel--feature_transform_engine_dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + pipelinechannel--location: + componentInputParameter: pipelinechannel--location + pipelinechannel--model_description: + componentInputParameter: pipelinechannel--model_description + pipelinechannel--num_selected_features: + componentInputParameter: pipelinechannel--num_selected_features + pipelinechannel--optimization_objective: + componentInputParameter: pipelinechannel--optimization_objective + pipelinechannel--optimization_objective_precision_value: + componentInputParameter: pipelinechannel--optimization_objective_precision_value + pipelinechannel--optimization_objective_recall_value: + componentInputParameter: pipelinechannel--optimization_objective_recall_value + pipelinechannel--prediction_type: + componentInputParameter: pipelinechannel--prediction_type + pipelinechannel--project: + componentInputParameter: pipelinechannel--project + pipelinechannel--quantiles: + componentInputParameter: pipelinechannel--quantiles + pipelinechannel--root_dir: + componentInputParameter: pipelinechannel--root_dir + pipelinechannel--run_distillation: + componentInputParameter: pipelinechannel--run_distillation + pipelinechannel--run_evaluation: + componentInputParameter: pipelinechannel--run_evaluation + pipelinechannel--set-optional-inputs-model_display_name: + componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name + pipelinechannel--stage_1_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_1_num_parallel_trials + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override + pipelinechannel--stage_2_num_parallel_trials: + componentInputParameter: pipelinechannel--stage_2_num_parallel_trials + pipelinechannel--stage_2_num_selected_trials: + componentInputParameter: pipelinechannel--stage_2_num_selected_trials + pipelinechannel--string-not-empty-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: string-not-empty + pipelinechannel--study_spec_parameters_override: + componentInputParameter: pipelinechannel--study_spec_parameters_override + pipelinechannel--target_column: + componentInputParameter: pipelinechannel--target_column + pipelinechannel--train_budget_milli_node_hours: + componentInputParameter: pipelinechannel--train_budget_milli_node_hours + pipelinechannel--weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: stage_1_tuning_result_artifact_uri_empty + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] + == 'false' + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: pipelinechannel--bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: pipelinechannel--feature_transform_engine_dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: pipelinechannel--dataflow_service_account + dataflow_subnetwork: + componentInputParameter: pipelinechannel--dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: pipelinechannel--dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: pipelinechannel--dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: pipelinechannel--dataset_level_transformations + encryption_spec_key_name: + componentInputParameter: pipelinechannel--encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: pipelinechannel--feature_selection_algorithm + legacy_transformations_path: + componentInputParameter: pipelinechannel--legacy_transformations_path + location: + componentInputParameter: pipelinechannel--location + materialized_examples_format: + componentInputParameter: pipelinechannel--materialized_examples_format + max_selected_features: + componentInputParameter: pipelinechannel--max_selected_features + predefined_split_key: + componentInputParameter: pipelinechannel--predefined_split_key + prediction_type: + componentInputParameter: pipelinechannel--prediction_type + project: + componentInputParameter: pipelinechannel--project + root_dir: + componentInputParameter: pipelinechannel--root_dir + run_feature_selection: + componentInputParameter: pipelinechannel--run_feature_selection + stratified_split_key: + componentInputParameter: pipelinechannel--stratified_split_key + target_column: + componentInputParameter: pipelinechannel--target_column + test_fraction: + componentInputParameter: pipelinechannel--test_fraction + tf_auto_transform_features: + componentInputParameter: pipelinechannel--tf_auto_transform_features + tf_custom_transformation_definitions: + componentInputParameter: pipelinechannel--tf_custom_transformation_definitions + tf_transform_execution_engine: + componentInputParameter: pipelinechannel--tf_transform_execution_engine + tf_transformations_path: + componentInputParameter: pipelinechannel--tf_transformations_path + training_fraction: + componentInputParameter: pipelinechannel--training_fraction + validation_fraction: + componentInputParameter: pipelinechannel--validation_fraction + weight_column: + componentInputParameter: pipelinechannel--weight_column + taskInfo: + name: feature-transform-engine + merge-materialized-splits: + cachingOptions: + enableCache: true + componentRef: + name: comp-merge-materialized-splits + dependentTasks: + - split-materialized-data + inputs: + artifacts: + split_0: + taskOutputArtifact: + outputArtifactKey: materialized_train_split + producerTask: split-materialized-data + split_1: + taskOutputArtifact: + outputArtifactKey: materialized_eval_split + producerTask: split-materialized-data + taskInfo: + name: merge-materialized-splits + split-materialized-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-split-materialized-data + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + materialized_data: + taskOutputArtifact: + outputArtifactKey: materialized_data + producerTask: feature-transform-engine + taskInfo: + name: split-materialized-data + string-not-empty: + cachingOptions: + enableCache: true + componentRef: + name: comp-string-not-empty + inputs: + parameters: + value: + componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri + taskInfo: + name: check-if-is-stage-1-tuning-result-artifact-uri-not-empty + inputDefinitions: + artifacts: + pipelinechannel--parent_model: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + pipelinechannel--apply_feature_selection_tuning: + parameterType: BOOLEAN + pipelinechannel--bigquery_staging_full_dataset_id: + parameterType: STRING + pipelinechannel--cv_trainer_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--dataflow_service_account: + parameterType: STRING + pipelinechannel--dataflow_subnetwork: + parameterType: STRING + pipelinechannel--dataflow_use_public_ips: + parameterType: BOOLEAN + pipelinechannel--dataset_level_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--dataset_level_transformations: + parameterType: LIST + pipelinechannel--disable_early_stopping: + parameterType: BOOLEAN + pipelinechannel--distill_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--distill_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--distill_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--enable_probabilistic_inference: + parameterType: BOOLEAN + pipelinechannel--encryption_spec_key_name: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_explain_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_explain_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_machine_type: + parameterType: STRING + pipelinechannel--evaluation_batch_predict_max_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_batch_predict_starting_replica_count: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_machine_type: + parameterType: STRING + pipelinechannel--evaluation_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--evaluation_dataflow_starting_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--export_additional_model_without_custom_ops: + parameterType: BOOLEAN + pipelinechannel--fast_testing: + parameterType: BOOLEAN + pipelinechannel--feature_selection_algorithm: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_disk_size_gb: + parameterType: NUMBER_INTEGER + pipelinechannel--feature_transform_engine_dataflow_machine_type: + parameterType: STRING + pipelinechannel--feature_transform_engine_dataflow_max_num_workers: + parameterType: NUMBER_INTEGER + pipelinechannel--legacy_transformations_path: + parameterType: STRING + pipelinechannel--location: + parameterType: STRING + pipelinechannel--materialized_examples_format: + parameterType: STRING + pipelinechannel--max_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--model_description: + parameterType: STRING + pipelinechannel--num_selected_features: + parameterType: NUMBER_INTEGER + pipelinechannel--optimization_objective: + parameterType: STRING + pipelinechannel--optimization_objective_precision_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--optimization_objective_recall_value: + parameterType: NUMBER_DOUBLE + pipelinechannel--predefined_split_key: + parameterType: STRING + pipelinechannel--prediction_type: + parameterType: STRING + pipelinechannel--project: + parameterType: STRING + pipelinechannel--quantiles: + parameterType: LIST + pipelinechannel--root_dir: + parameterType: STRING + pipelinechannel--run_distillation: + parameterType: BOOLEAN + pipelinechannel--run_evaluation: + parameterType: BOOLEAN + pipelinechannel--run_feature_selection: + parameterType: BOOLEAN + pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: + parameterType: STRING + pipelinechannel--set-optional-inputs-data_source_csv_filenames: + parameterType: STRING + pipelinechannel--set-optional-inputs-model_display_name: + parameterType: STRING + pipelinechannel--stage_1_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_1_tuner_worker_pool_specs_override: + parameterType: LIST + pipelinechannel--stage_1_tuning_result_artifact_uri: + parameterType: STRING + pipelinechannel--stage_2_num_parallel_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stage_2_num_selected_trials: + parameterType: NUMBER_INTEGER + pipelinechannel--stratified_split_key: + parameterType: STRING + pipelinechannel--study_spec_parameters_override: + parameterType: LIST + pipelinechannel--target_column: + parameterType: STRING + pipelinechannel--test_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--tf_auto_transform_features: + parameterType: STRUCT + pipelinechannel--tf_custom_transformation_definitions: + parameterType: LIST + pipelinechannel--tf_transform_execution_engine: + parameterType: STRING + pipelinechannel--tf_transformations_path: + parameterType: STRING + pipelinechannel--train_budget_milli_node_hours: + parameterType: NUMBER_DOUBLE + pipelinechannel--training_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--validation_fraction: + parameterType: NUMBER_DOUBLE + pipelinechannel--weight_column: + parameterType: STRING + outputDefinitions: + artifacts: + feature-attribution-2-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + feature-attribution-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-2-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + model-evaluation-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + comp-feature-attribution: + executorLabel: exec-feature-attribution + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-2: + executorLabel: exec-feature-attribution-2 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-attribution-3: + executorLabel: exec-feature-attribution-3 + inputDefinitions: + artifacts: + predictions_bigquery_source: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + description: 'BigQuery table + + with prediction or explanation data to be used for this evaluation. For + + prediction results, the table column should be named "predicted_*".' + isOptional: true + predictions_gcs_source: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'An artifact with its + + URI pointing toward a GCS directory with prediction or explanation files + + to be used for this evaluation. For prediction results, the files should + + be named "prediction.results-*" or "predictions_". For explanation + + results, the files should be named "explanation.results-*".' + isOptional: true + parameters: + dataflow_disk_size: + defaultValue: 50.0 + description: 'The disk size (in GB) of the machine + + executing the evaluation run. If not set, defaulted to `50`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + description: 'The machine type executing the + + evaluation run. If not set, defaulted to `n1-standard-4`.' + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 5.0 + description: 'The max number of workers + + executing the evaluation run. If not set, defaulted to `25`.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Service account to run the + + dataflow job. If not set, dataflow will use the default worker service + + account. For more details, see + + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 1.0 + description: 'The number of workers executing the + + evaluation run. If not set, defaulted to `10`.' + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption key + + for the Dataflow job. If this is set, then all resources created by the + + Dataflow job will be encrypted with the provided encryption key.' + isOptional: true + parameterType: STRING + force_direct_runner: + defaultValue: false + description: 'Flag to use Beam DirectRunner. If set to true, + + use Apache Beam DirectRunner to execute the task locally instead of + + launching a Dataflow job.' + isOptional: true + parameterType: BOOLEAN + location: + defaultValue: us-central1 + description: 'Location running feature attribution. If not + + set, defaulted to `us-central1`.' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + description: 'The file format for the batch + + prediction results. `jsonl`, `csv`, and `bigquery` are the allowed + + formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + isOptional: true + parameterType: STRING + project: + description: Project to run feature attribution container. + parameterType: STRING + outputDefinitions: + artifacts: + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + description: 'Serialized gcp_resources proto tracking the dataflow + + job. For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + description: '[Deprecated] A forecasting time series identifier column. + Raises an + + exception if used - use the "time_series_identifier_column" field + + instead.' + isOptional: true + parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: '(deprecated) Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform - account. For more details, see + row-level TF transformations. Can be one of: "dataflow" (by default) or - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' - isOptional: true - parameterType: STRING - dataflow_subnetwork: - defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork + "bigquery". Using "bigquery" as the execution engine is experimental and - name, when empty the default subnetwork will be used. More details: + is for allowlisted customers only. In addition, executing on "bigquery" - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' - isOptional: true - parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow + only supports auto transformations (i.e., specified by - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the + tf_auto_transform_features) and will raise an error when - evaluation run. If not set, defaulted to `10`.' + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: + parameterType: STRING + timestamp_split_key: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' + description: Timestamp split key. isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. isOptional: true - parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. isOptional: true parameterType: STRING - project: - description: Project to run feature attribution container. - parameterType: STRING outputDefinitions: artifacts: - feature_attributions: + dataset_stats: artifactType: - schemaTitle: system.Metrics + schemaTitle: system.Artifact schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-attribution-2: - executorLabel: exec-feature-attribution-2 - inputDefinitions: - artifacts: - predictions_bigquery_source: + description: The stats of the dataset. + feature_ranking: artifactType: - schemaTitle: google.BQTable + schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'BigQuery table + description: 'The ranking of features, all features supported in the - with prediction or explanation data to be used for this evaluation. For + dataset will be included. For "AMI" algorithm, array features won''t be - prediction results, the table column should be named "predicted_*".' - isOptional: true - predictions_gcs_source: + available in the ranking as arrays are not supported yet.' + instance_schema: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test - URI pointing toward a GCS directory with prediction or explanation files + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the - to be used for this evaluation. For prediction results, the files should + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the - be named "prediction.results-*" or "predictions_". For explanation + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to - results, the files should be named "explanation.results-*".' - isOptional: true - parameters: - dataflow_disk_size: - defaultValue: 50.0 - description: 'The disk size (in GB) of the machine + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, - executing the evaluation run. If not set, defaulted to `50`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-4 - description: 'The machine type executing the + see - evaluation run. If not set, defaulted to `n1-standard-4`.' - isOptional: true + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING - dataflow_max_workers_num: - defaultValue: 5.0 - description: 'The max number of workers + split_example_counts: + description: 'JSON string of data split example counts for train, - executing the evaluation run. If not set, defaulted to `25`.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: + validate, and test splits.' + parameterType: STRING + comp-get-bigquery-destination-output-uri: + executorLabel: exec-get-bigquery-destination-output-uri + inputDefinitions: + parameters: + bigquery_source_input_uri: + parameterType: STRING + model_display_name: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING - dataflow_subnetwork: + table_prefix: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' - isOptional: true - parameterType: BOOLEAN - dataflow_workers_num: - defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' - isOptional: true - parameterType: NUMBER_INTEGER - encryption_spec_key_name: + outputDefinitions: + parameters: + bigquery_destination_output_uri: + parameterType: STRING + comp-get-bigquery-destination-output-uri-2: + executorLabel: exec-get-bigquery-destination-output-uri-2 + inputDefinitions: + parameters: + bigquery_source_input_uri: + parameterType: STRING + model_display_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' - isOptional: true - parameterType: BOOLEAN - location: - defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' + table_prefix: + defaultValue: '' isOptional: true parameterType: STRING - predictions_format: - defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' - isOptional: true + outputDefinitions: + parameters: + bigquery_destination_output_uri: parameterType: STRING - project: - description: Project to run feature attribution container. + comp-get-bp-bq-output-table: + executorLabel: exec-get-bp-bq-output-table + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + parameters: + bq_output_table_uri: + parameterType: STRING + comp-get-bp-bq-output-table-2: + executorLabel: exec-get-bp-bq-output-table-2 + inputDefinitions: + artifacts: + bp_job: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The batch prediction job artifact. + outputDefinitions: + parameters: + bq_output_table_uri: + parameterType: STRING + comp-get-transform-config-path: + executorLabel: exec-get-transform-config-path + inputDefinitions: + artifacts: + transform_output_dir_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Transform output dir str, output by FTE. + outputDefinitions: + parameters: + transform_config_path: + parameterType: STRING + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: parameterType: STRING outputDefinitions: artifacts: - feature_attributions: + artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-merge-materialized-splits: + executorLabel: exec-merge-materialized-splits + inputDefinitions: + artifacts: + split_0: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The first materialized split. + split_1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The second materialized split. + outputDefinitions: + artifacts: + splits: artifactType: - schemaTitle: system.Metrics + schemaTitle: system.Artifact schemaVersion: 0.0.1 - parameters: - gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow - - job. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' - parameterType: STRING - comp-feature-transform-engine: - executorLabel: exec-feature-transform-engine + comp-model-batch-explanation: + executorLabel: exec-model-batch-explanation inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true parameters: - autodetect_csv_schema: - defaultValue: false - description: 'If True, infers the column types - - when importing CSVs into BigQuery.' + accelerator_count: + defaultValue: 0.0 isOptional: true - parameterType: BOOLEAN - bigquery_staging_full_dataset_id: + parameterType: NUMBER_INTEGER + accelerator_type: defaultValue: '' - description: 'Dataset in - - "projectId.datasetId" format for storing intermediate-FTE BigQuery - - tables. If the specified dataset does not exist in BigQuery, FTE will - - create the dataset. If no bigquery_staging_full_dataset_id is specified, - - all intermediate tables will be stored in a dataset created under the - - provided project in the input data source''s location during FTE - - execution called - - "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". - - All tables generated by FTE will have a 30 day TTL.' isOptional: true parameterType: STRING - data_source_bigquery_table_path: + bigquery_destination_output_uri: defaultValue: '' - description: 'BigQuery input data - - source to run feature transform on.' isOptional: true parameterType: STRING - data_source_csv_filenames: + bigquery_source_input_uri: defaultValue: '' - description: 'CSV input data source to run - - feature transform on.' isOptional: true parameterType: STRING - dataflow_disk_size_gb: - defaultValue: 40.0 - description: 'The disk size, in gigabytes, to use - - on each Dataflow worker instance. If not set, default to 40.' - isOptional: true - parameterType: NUMBER_INTEGER - dataflow_machine_type: - defaultValue: n1-standard-16 - description: 'The machine type used for dataflow - - jobs. If not set, default to n1-standard-16.' + encryption_spec_key_name: + defaultValue: '' isOptional: true parameterType: STRING - dataflow_max_num_workers: - defaultValue: 25.0 - description: 'The number of workers to run the - - dataflow job. If not set, default to 25.' + explanation_metadata: + defaultValue: {} isOptional: true - parameterType: NUMBER_INTEGER - dataflow_service_account: + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: defaultValue: '' - description: 'Custom service account to run - - Dataflow jobs.' isOptional: true parameterType: STRING - dataflow_subnetwork: + gcs_source_uris: + defaultValue: [] + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING - dataflow_use_public_ips: - defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 isOptional: true - parameterType: BOOLEAN - dataset_level_custom_transformation_definitions: - defaultValue: [] - description: "List of dataset-level custom transformation definitions. \ - \ Custom,\nbring-your-own dataset-level transform functions, where users\ - \ can define\nand import their own transform function and use it with\ - \ FTE's built-in\ntransformations. Using custom transformations is an\ - \ experimental feature\nand it is currently not supported during batch\ - \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ - ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ - \ function\n together with FTE's built-in transformations: .. code-block::\n\ - \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ - \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ - join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ - , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ - \ } ]" + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 isOptional: true - parameterType: LIST - dataset_level_transformations: - defaultValue: [] - description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ - \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ - bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ - , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ - \ supported built-in\n transformations:\n Join: Joins features from\ - \ right_table_uri. For each join key, the\n left table keys will\ - \ be included and the right table keys will\n be dropped.\n \ - \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ - \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ - ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ - \ Arguments:\n right_table_uri: Right table BigQuery\ - \ uri to join\n with input_full_table_id.\n join_keys:\ - \ Features to join on. For each\n nested list, the first\ - \ element is a left table column\n and the second is its\ - \ corresponding right table column.\n TimeAggregate: Creates a new\ - \ feature composed of values of an\n existing feature from a fixed\ - \ time period ago or in the future.\n Ex: A feature for sales by\ - \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ - time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ - : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ - :\n \"target_col\", \"output_column\": \"output_col\" }\n \ - \ Arguments:\n time_difference: Number of time_difference_units\ - \ to\n look back or into the future on our\n \ - \ time_difference_target_column.\n time_difference_units:\ - \ Units of time_difference to\n look back or into the future\ - \ on our\n time_difference_target_column. Must be one of\ - \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ - \ *\n 'YEAR'\n time_series_identifier_columns:\ - \ Names of the\n time series identifier columns.\n \ - \ time_column: Name of the time column.\n time_difference_target_column:\ - \ Column we wish to get\n the value of time_difference time_difference_units\ - \ in\n the past or future.\n output_column: Name\ - \ of our new time aggregate\n feature.\n is_future:\ - \ Whether we wish to look\n forward in time. Defaults to\ - \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ - \ Performs a partition by reduce operation (one of max,\n\ - \ min, avg, or sum) with a fixed historic time period. Ex:\n\ - \ Getting avg sales (the reduce column) for each store\n\ - \ (partition_by_column) over the previous 5 days\n \ - \ (time_column, time_ago_units, and time_ago).\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ - , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ - : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ - \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ - : \"partition_by_reduce_max_output\" }\n Arguments:\n \ - \ reduce_column: Column to apply the reduce operation\n \ - \ on. Reduce operations include the\n following: Max,\ - \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ - \ partition by.\n time_column: Time column for\ - \ the partition by\n operation's window function.\n \ - \ time_ago: Number of time_ago_units to look back on\n \ - \ our target_column, starting from time_column\n (inclusive).\n\ - \ time_ago_units: Units of time_ago to look back on\n \ - \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ - \ output_column: Name of our output feature." + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-2: + executorLabel: exec-model-batch-explanation-2 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 isOptional: true - parameterType: LIST - encryption_spec_key_name: + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: defaultValue: '' - description: Customer-managed encryption key. isOptional: true parameterType: STRING - feature_selection_algorithm: - defaultValue: AMI - description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ - , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ - \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ - \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ - \ Mutual Information Maximization): Reference paper: Mohamed\n \ - \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ - \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ - \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ - \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ - \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ - Feature\n selection using Joint Mutual Information Maximisation,\u201D\ - \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ - \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ - \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ - \ and Chris Ding. \"Feature selection based on mutual information\n\ - \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ - \n IEEE Transactions on pattern analysis and machine intelligence\n\ - \ 27, no.\n 8: 1226-1238." + bigquery_destination_output_uri: + defaultValue: '' isOptional: true parameterType: STRING - feature_selection_execution_engine: - defaultValue: dataflow - description: Execution engine to run feature selection, value can be dataflow, - bigquery. + bigquery_source_input_uri: + defaultValue: '' isOptional: true parameterType: STRING - forecasting_apply_windowing: - defaultValue: true - description: Whether to apply window strategy. + encryption_spec_key_name: + defaultValue: '' isOptional: true - parameterType: BOOLEAN - forecasting_available_at_forecast_columns: + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: defaultValue: [] - description: 'Forecasting - - available at forecast columns.' isOptional: true parameterType: LIST - forecasting_context_window: - defaultValue: -1.0 - description: Forecasting context window. + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + machine_type: + defaultValue: '' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 isOptional: true parameterType: NUMBER_INTEGER - forecasting_forecast_horizon: - defaultValue: -1.0 - description: Forecasting horizon. + max_replica_count: + defaultValue: 0.0 isOptional: true parameterType: NUMBER_INTEGER - forecasting_holiday_regions: - defaultValue: [] - description: 'The geographical region based on which the - - holiday effect is applied in modeling by adding holiday categorical - - array feature that include all holidays matching the date. This option - - only allowed when data granularity is day. By default, holiday effect - - modeling is disabled. To turn it on, specify the holiday region using - - this option. - - Top level: * ''GLOBAL'' - - Second level: continental regions: * ''NA'': North America - - * ''JAPAC'': Japan and Asia Pacific - - * ''EMEA'': Europe, the Middle East and Africa - - * ''LAC'': Latin America and the Caribbean - - Third level: countries from ISO 3166-1 Country codes. - - Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' - * ''AE'' - - * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' - * ''CN'' * ''CO'' - - * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' - * ''FI'' * ''FR'' - - * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' - * ''IR'' * ''IT'' - - * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' - * ''NO'' * ''NZ'' - - * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' - * ''SA'' * ''SE'' - - * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' - * ''VE'' * ''VN'' - - * ''ZA''' + model_parameters: + defaultValue: {} isOptional: true - parameterType: LIST - forecasting_predefined_window_column: + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + project: + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING + comp-model-batch-explanation-3: + executorLabel: exec-model-batch-explanation-3 + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + accelerator_type: defaultValue: '' - description: Forecasting predefined window column. isOptional: true parameterType: STRING - forecasting_time_column: + bigquery_destination_output_uri: defaultValue: '' - description: Forecasting time column. isOptional: true parameterType: STRING - forecasting_time_series_attribute_columns: - defaultValue: [] - description: 'Forecasting - - time series attribute columns.' + bigquery_source_input_uri: + defaultValue: '' isOptional: true - parameterType: LIST - forecasting_time_series_identifier_column: - description: '[Deprecated] A forecasting time series identifier column. - Raises an - - exception if used - use the "time_series_identifier_column" field - - instead.' + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' isOptional: true parameterType: STRING - forecasting_time_series_identifier_columns: - defaultValue: [] - description: The list of forecasting time series identifier columns. + explanation_metadata: + defaultValue: {} isOptional: true - parameterType: LIST - forecasting_unavailable_at_forecast_columns: + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + isOptional: true + parameterType: STRING + gcs_source_uris: defaultValue: [] - description: 'Forecasting - - unavailable at forecast columns.' isOptional: true parameterType: LIST - forecasting_window_max_count: - defaultValue: -1.0 - description: Forecasting window max count. + generate_explanation: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + instances_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + job_display_name: + parameterType: STRING + labels: + defaultValue: {} isOptional: true - parameterType: NUMBER_INTEGER - forecasting_window_stride_length: - defaultValue: -1.0 - description: Forecasting window stride length. + parameterType: STRUCT + location: + defaultValue: us-central1 isOptional: true - parameterType: NUMBER_INTEGER - group_columns: + parameterType: STRING + machine_type: + defaultValue: '' isOptional: true - parameterType: LIST - group_temporal_total_weight: + parameterType: STRING + manual_batch_tuning_parameters_batch_size: defaultValue: 0.0 isOptional: true - parameterType: NUMBER_DOUBLE - group_total_weight: + parameterType: NUMBER_INTEGER + max_replica_count: defaultValue: 0.0 isOptional: true - parameterType: NUMBER_DOUBLE - legacy_transformations_path: - defaultValue: '' + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl isOptional: true parameterType: STRING - location: - description: Location for the created GCP services. + project: parameterType: STRING - materialized_examples_format: - defaultValue: tfrecords_gzip - description: 'The format to use for the - - materialized examples. Should be either ''tfrecords_gzip'' (default) or - - ''parquet''.' + starting_replica_count: + defaultValue: 0.0 isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + artifacts: + batchpredictionjob: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + bigquery_output_table: + artifactType: + schemaTitle: google.BQTable + schemaVersion: 0.0.1 + gcs_output_directory: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + gcp_resources: parameterType: STRING - max_selected_features: - defaultValue: 1000.0 - description: 'Maximum number of features to + comp-model-batch-predict: + executorLabel: exec-model-batch-predict + inputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same - select. If specified, the transform config will be purged by only using + ancestor Location. Starting this job has no impact on any existing - the selected features that ranked top in the feature ranking, which has + deployments of the Model and their resources. Either this or - the ranking value for all supported features. If the number of input + unmanaged_container_model must be specified.' + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. - features is smaller than max_selected_features specified, we will still + This should be used for models that are not uploaded to Vertex. Either - run the feature selection process and generate the feature ranking, no + this or model must be specified.' + isOptional: true + parameters: + accelerator_count: + defaultValue: 0.0 + description: 'The number of accelerators to attach - features will be excluded. The value will be set to 1000 by default if + to the `machine_type`. Only used if `machine_type` is set. For more - run_feature_selection is enabled.' + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: NUMBER_INTEGER - model_type: - description: 'Model type, which we wish to engineer features + accelerator_type: + defaultValue: '' + description: 'The type of accelerator(s) that may be - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, - or + attached to the machine as per `accelerator_count`. Only used if - tide. Defaults to the empty value, `None`.' + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: STRING - multimodal_image_columns: - defaultValue: [] - description: 'List of multimodal image + bigquery_destination_output_uri: + defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In - columns. Defaults to an empty list.' - isOptional: true - parameterType: LIST - multimodal_tabular_columns: - defaultValue: [] - description: 'List of multimodal tabular + the given project a new dataset is created with name - columns. Defaults to an empty list' - isOptional: true - parameterType: LIST - multimodal_text_columns: - defaultValue: [] - description: 'List of multimodal text + ``prediction__`` where is made - columns. Defaults to an empty list' - isOptional: true - parameterType: LIST - multimodal_timeseries_columns: - defaultValue: [] - description: 'List of multimodal timeseries + BigQuery-dataset-name compatible (for example, most special characters - columns. Defaults to an empty list' - isOptional: true - parameterType: LIST - predefined_split_key: - defaultValue: '' - description: Predefined split key. - isOptional: true - parameterType: STRING - prediction_type: - defaultValue: '' - description: 'Model prediction type. One of + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - "classification", "regression", "time_series".' - isOptional: true - parameterType: STRING - project: - description: Project to run feature transform engine. - parameterType: STRING - root_dir: - description: The Cloud Storage location to store the output. - parameterType: STRING - run_distill: - defaultValue: false - description: 'Whether the distillation should be applied + "based on ISO-8601" format. In the dataset two tables will be created, - to the training.' - isOptional: true - parameterType: BOOLEAN - run_feature_selection: - defaultValue: false - description: 'Whether the feature selection + ``predictions``, and ``errors``. If the Model has both ``instance`` - should be applied to the dataset.' - isOptional: true - parameterType: BOOLEAN - stats_gen_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform + and ``prediction`` schemata defined then the tables have columns as - statistics generation. Can be one of: "dataflow" (by default) or + follows: The ``predictions`` table contains instances for which the - "bigquery". Using "bigquery" as the execution engine is experimental.' + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING - stratified_split_key: + bigquery_source_input_uri: defaultValue: '' - description: Stratified split key. + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' isOptional: true parameterType: STRING - target_column: + encryption_spec_key_name: defaultValue: '' - description: Target column of input data. + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' isOptional: true parameterType: STRING - temporal_total_weight: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_DOUBLE - test_fraction: - defaultValue: -1.0 - description: Fraction of input data for testing. - isOptional: true - parameterType: NUMBER_DOUBLE - tf_auto_transform_features: - defaultValue: {} - description: "Dict mapping auto and/or type-resolutions to\nTF transform\ - \ features. FTE will automatically configure a set of\nbuilt-in transformations\ - \ for each feature based on its data statistics.\nIf users do not want\ - \ auto type resolution, but want the set of\ntransformations for a given\ - \ type to be automatically generated, they\nmay specify pre-resolved transformations\ - \ types. The following type hint\ndict keys are supported: * 'auto' *\ - \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ - \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ - , \"feature3\"], } Note that the target and\n weight column may not\ - \ be included as an auto transformation unless\n users are running\ - \ forecasting." - isOptional: true - parameterType: STRUCT - tf_custom_transformation_definitions: - defaultValue: [] - description: "List of\nTensorFlow-based custom transformation definitions.\ - \ Custom,\nbring-your-own transform functions, where users can define\ - \ and import\ntheir own transform function and use it with FTE's built-in\n\ - transformations.\n Example: .. code-block:: python [ { \"transformation\"\ - : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ - :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ - ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ - \ transform function together with FTE's built-in transformations:\ - \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ - ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ - ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ - ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ - :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ - :\n [\"feature_1_multiplied_two\"] } ]" + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' isOptional: true parameterType: LIST - tf_transform_execution_engine: - defaultValue: dataflow - description: 'Execution engine to perform + explanation_metadata: + defaultValue: {} + description: 'Explanation metadata - row-level TF transformations. Can be one of: "dataflow" (by default) or + configuration for this BatchPredictionJob. Can be specified only if - "bigquery". Using "bigquery" as the execution engine is experimental and + `generate_explanation` is set to `True`. This value overrides the - is for allowlisted customers only. In addition, executing on "bigquery" + value of `Model.explanation_metadata`. All fields of - only supports auto transformations (i.e., specified by + `explanation_metadata` are optional in the request. If a field of the - tf_auto_transform_features) and will raise an error when + `explanation_metadata` object is not populated, the corresponding - tf_custom_transformation_definitions or tf_transformations_path is set.' + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' isOptional: true - parameterType: STRING - tf_transformations_path: - defaultValue: '' - description: "Path to TensorFlow-based\ntransformation configuration. Path\ - \ to a JSON file used to specified\nFTE's TF transformation configurations.\ - \ In the following, we provide\nsome sample transform configurations\ - \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ - \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ - \ of multiple transformations on a\nsingle column is also supported. For\ - \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ - , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ - \ FTE's currently supported built-in\ntransformations:\n Datetime:\ - \ Extracts datetime featues from a column containing\n timestamp\ - \ strings.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ - :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the datetime\ - \ transformation on.\n output_columns: Names of output\n\ - \ columns, one for each datetime_features element.\n \ - \ time_format: Datetime format string. Time format is\n \ - \ a combination of Date + Time Delimiter (optional) + Time\n\ - \ (optional) directives. Valid date directives are as\n\ - \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ - \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ - \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ - \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ - \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ - \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ - \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ - \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ - \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ - \ are as follows * 'T' * ' ' Valid time directives are\ - \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ - \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ - \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ - \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ - \ List of datetime\n features to be extract. Each entry\ - \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ - \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ - \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ - \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ - \ Log: Performs the natural log on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Log\",\n \ - \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the log transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n ZScale:\ - \ Performs Z-scale normalization on a numeric column.\n Example:\ - \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ - , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform the z-scale transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n Vocabulary:\ - \ Converts strings to integers, where each unique string\n gets\ - \ a unique integer representation.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ - : [\"feature_1\"] }\n Arguments:\n input_columns:\ - \ A list with a single column to\n perform the vocabulary\ - \ transformation on.\n output_columns: A list with a single\n\ - \ output column name, corresponding to the output of our\n\ - \ transformation.\n top_k: Number of the most\ - \ frequent words\n in the vocabulary to use for generating\ - \ dictionary\n lookup indices. If not specified, all words\ - \ in the\n vocabulary will be used. Defaults to None.\n\ - \ frequency_threshold: Limit the vocabulary\n \ - \ only to words whose number of occurrences in the input\n \ - \ exceeds frequency_threshold. If not specified, all words\n \ - \ in the vocabulary will be included. If both top_k and\n\ - \ frequency_threshold are specified, a word must satisfy\n\ - \ both conditions to be included. Defaults to None.\n \ - \ Categorical: Transforms categorical columns to integer columns.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ - : 10 }\n Arguments:\n input_columns: A list with\ - \ a single column to\n perform the categorical transformation\ - \ on.\n output_columns: A list with a single\n \ - \ output column name, corresponding to the output of our\n \ - \ transformation.\n top_k: Number of the most frequent\ - \ words\n in the vocabulary to use for generating dictionary\n\ - \ lookup indices. If not specified, all words in the\n\ - \ vocabulary will be used.\n frequency_threshold:\ - \ Limit the vocabulary\n only to words whose number of\ - \ occurrences in the input\n exceeds frequency_threshold.\ - \ If not specified, all words\n in the vocabulary will\ - \ be included. If both top_k and\n frequency_threshold\ - \ are specified, a word must satisfy\n both conditions\ - \ to be included.\n Reduce: Given a column where each entry is a\ - \ numeric array,\n reduces arrays according to our reduce_mode.\n\ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ - :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ - \ Arguments:\n input_columns: A list with a single\ - \ column to\n perform the reduce transformation on.\n \ - \ output_columns: A list with a single\n output\ - \ column name, corresponding to the output of our\n transformation.\n\ - \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ - \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ - \ of last k elements when\n 'LAST_K' reduce mode is used.\ - \ Defaults to 1.\n SplitString: Given a column of strings, splits\ - \ strings into token\n arrays.\n Example: .. code-block::\ - \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ - : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the split string transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ separator: Separator to split input string\n into tokens.\ - \ Defaults to ' '.\n missing_token: Missing token to use\ - \ when\n no string is included. Defaults to ' _MISSING_\ - \ '.\n NGram: Given a column of strings, splits strings into token\ - \ arrays\n where each token is an integer.\n Example:\ - \ .. code-block:: python { \"transformation\": \"NGram\",\n \ - \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ - \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ - \ number and <= max_ngram_size. Defaults to\n 1.\n \ - \ max_ngram_size: Maximum n-gram size. Must\n \ - \ be a positive number and >= min_ngram_size. Defaults to\n \ - \ 2.\n top_k: Number of the most frequent words\n \ - \ in the vocabulary to use for generating dictionary\n \ - \ lookup indices. If not specified, all words in the\n \ - \ vocabulary will be used. Defaults to None.\n \ - \ frequency_threshold: Limit the\n dictionary's vocabulary\ - \ only to words whose number of\n occurrences in the input\ - \ exceeds frequency_threshold. If\n not specified, all\ - \ words in the vocabulary will be\n included. If both top_k\ - \ and frequency_threshold are\n specified, a word must\ - \ satisfy both conditions to be\n included. Defaults to\ - \ None.\n separator: Separator to split input string\n \ - \ into tokens. Defaults to ' '.\n missing_token:\ - \ Missing token to use when\n no string is included. Defaults\ - \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ - \ such that elements <\n min_value are assigned min_value, and\ - \ elements > max_value are\n assigned max_value.\n Example:\ - \ .. code-block:: python { \"transformation\": \"Clip\",\n \ - \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ - col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ - \ input_columns: A list with a single column to\n \ - \ perform the n-gram transformation on.\n output_columns:\ - \ A list with a single\n output column name, corresponding\ - \ to the output of our\n transformation.\n \ - \ min_value: Number where all values below\n min_value\ - \ are set to min_value. If no min_value is\n provided,\ - \ min clipping will not occur. Defaults to None.\n max_value:\ - \ Number where all values above\n max_value are set to\ - \ max_value If no max_value is\n provided, max clipping\ - \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ - \ multi-hot encoding on a categorical\n array column.\n \ - \ Example: .. code-block:: python { \"transformation\":\n \ - \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ - \ of classes is determened by the largest number included in\n\ - \ the input if it is numeric or the total number of unique\n\ - \ values of the input if it is type str. If the input is has\n\ - \ type str and an element contians separator tokens, the input\n\ - \ will be split at separator indices, and the each element\ - \ of\n the split list will be considered a seperate class.\ - \ For\n example,\n Input: .. code-block:: python\ - \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ - \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ - ], # Example 3 ]\n Output (with default separator=\"\ - \ \"): .. code-block:: python [\n [1, 1], # Example\ - \ 0 [1, 1], # Example 1\n [1, 0], # Example\ - \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ - \ input_columns: A list with a single column to\n perform\ - \ the multi-hot-encoding on.\n output_columns: A list with\ - \ a single\n output column name, corresponding to the output\ - \ of our\n transformation.\n top_k: Number\ - \ of the most frequent words\n in the vocabulary to use\ - \ for generating dictionary\n lookup indices. If not specified,\ - \ all words in the\n vocabulary will be used. Defaults\ - \ to None.\n frequency_threshold: Limit the\n \ - \ dictionary's vocabulary only to words whose number of\n \ - \ occurrences in the input exceeds frequency_threshold. If\n \ - \ not specified, all words in the vocabulary will be\n \ - \ included. If both top_k and frequency_threshold are\n \ - \ specified, a word must satisfy both conditions to be\n\ - \ included. Defaults to None.\n separator:\ - \ Separator to split input string\n into tokens. Defaults\ - \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ - \ column.\n Example: .. code-block:: python { \"transformation\"\ - :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ - :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ - \ input_columns: A list with a single column to\n \ - \ perform max-abs-scale on.\n output_columns: A list\ - \ with a single\n output column name, corresponding to\ - \ the output of our\n transformation.\n Custom: Transformations\ - \ defined in\n tf_custom_transformation_definitions are included\ - \ here in the\n TensorFlow-based transformation configuration.\ - \ For example,\n given the following tf_custom_transformation_definitions:\ - \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ - ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ - \ \"function_name\": \"plus_one_transform\" } ] We can include\ - \ the\n following transformation: .. code-block:: python {\n\ - \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ - \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ - \ that\n input_columns must still be included in our arguments\ - \ and\n output_columns is optional. All other arguments are those\n\ - \ defined in custom_transform_fn.py, which includes `\"x\"` in\ - \ this\n case. See tf_custom_transformation_definitions above.\n\ - \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ - \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ - \ string for legacy style transformations. Note that\n legacy_transformations_path\ - \ and tf_auto_transform_features\n cannot both be specified." + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' + isOptional: true + parameterType: STRUCT + gcs_destination_output_uri_prefix: + defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING - timestamp_split_key: + gcs_source_uris: + defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + isOptional: true + parameterType: LIST + generate_explanation: + defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' + isOptional: true + parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: defaultValue: '' - description: Timestamp split key. + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" isOptional: true parameterType: STRING - training_fraction: - defaultValue: -1.0 - description: Fraction of input data for training. + instances_format: + defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." isOptional: true - parameterType: NUMBER_DOUBLE - validation_fraction: - defaultValue: -1.0 - description: Fraction of input data for validation. + parameterType: STRING + job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." isOptional: true - parameterType: NUMBER_DOUBLE - weight_column: + parameterType: STRING + labels: + defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' + isOptional: true + parameterType: STRING + machine_type: defaultValue: '' - description: Weight column of input data. + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' + isOptional: true + parameterType: STRING + manual_batch_tuning_parameters_batch_size: + defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' + isOptional: true + parameterType: NUMBER_INTEGER + max_replica_count: + defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' + isOptional: true + parameterType: NUMBER_INTEGER + model_parameters: + defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters + isOptional: true + parameterType: STRUCT + predictions_format: + defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." isOptional: true parameterType: STRING + project: + description: Project to create the BatchPredictionJob. + parameterType: STRING + starting_replica_count: + defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' + isOptional: true + parameterType: NUMBER_INTEGER outputDefinitions: artifacts: - dataset_stats: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The stats of the dataset. - feature_ranking: + batchpredictionjob: artifactType: - schemaTitle: system.Artifact + schemaTitle: google.VertexBatchPredictionJob schemaVersion: 0.0.1 - description: 'The ranking of features, all features supported in the + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table - dataset will be included. For "AMI" algorithm, array features won''t be + instead.**] Artifact - available in the ranking as arrays are not supported yet.' - instance_schema: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - materialized_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - description: The materialized dataset. - training_schema: + representation of the created batch prediction job.' + bigquery_output_table: artifactType: - schemaTitle: system.Artifact + schemaTitle: google.BQTable schemaVersion: 0.0.1 - transform_output: + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' + gcs_output_directory: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: The transform output artifact. - parameters: - bigquery_downsampled_test_split_uri: - description: 'BigQuery URI for the downsampled test - - split to pass to the batch prediction component during batch explain.' - parameterType: STRING - bigquery_test_split_uri: - description: 'BigQuery URI for the test split to pass to the - - batch prediction component during evaluation.' - parameterType: STRING - bigquery_train_split_uri: - description: 'BigQuery URI for the train split to pass to the + description: 'Artifact tracking the batch prediction job output. This is + only - batch prediction component during distillation.' - parameterType: STRING - bigquery_validation_split_uri: - description: 'BigQuery URI for the validation split to + available if - pass to the batch prediction component during distillation.' - parameterType: STRING + gcs_destination_output_uri_prefix is specified.' + parameters: gcp_resources: - description: 'GCP resources created by this component. For more details, + description: 'Serialized gcp_resources proto tracking the batch prediction + job. - see + For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING - split_example_counts: - description: 'JSON string of data split example counts for train, - - validate, and test splits.' - parameterType: STRING - comp-importer: - executorLabel: exec-importer - inputDefinitions: - parameters: - uri: - parameterType: STRING - outputDefinitions: - artifacts: - artifact: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-merge-materialized-splits: - executorLabel: exec-merge-materialized-splits - inputDefinitions: - artifacts: - split_0: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The first materialized split. - split_1: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - description: The second materialized split. - outputDefinitions: - artifacts: - splits: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-model-batch-explanation: - executorLabel: exec-model-batch-explanation + comp-model-batch-predict-2: + executorLabel: exec-model-batch-predict-2 inputDefinitions: artifacts: - explanation_metadata_artifact: + model: artifactType: - schemaTitle: system.Artifact + schemaTitle: google.VertexModel schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' isOptional: true parameters: accelerator_count: defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: NUMBER_INTEGER accelerator_type: defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: STRING bigquery_destination_output_uri: defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING bigquery_source_input_uri: defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' isOptional: true parameterType: STRING encryption_spec_key_name: defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' isOptional: true parameterType: STRING + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' + isOptional: true + parameterType: LIST explanation_metadata: defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' isOptional: true parameterType: STRUCT gcs_destination_output_uri_prefix: defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." isOptional: true parameterType: LIST generate_explanation: defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' isOptional: true parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING instances_format: defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." isOptional: true parameterType: STRING job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true parameterType: STRING labels: defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' isOptional: true parameterType: STRUCT location: defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' isOptional: true parameterType: STRING machine_type: defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: STRING manual_batch_tuning_parameters_batch_size: defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters isOptional: true parameterType: STRUCT predictions_format: defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." isOptional: true parameterType: STRING project: + description: Project to create the BatchPredictionJob. parameterType: STRING starting_replica_count: defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER outputDefinitions: @@ -4260,110 +7117,460 @@ components: artifactType: schemaTitle: google.VertexBatchPredictionJob schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' bigquery_output_table: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' gcs_output_directory: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' parameters: gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING - comp-model-batch-explanation-2: - executorLabel: exec-model-batch-explanation-2 + comp-model-batch-predict-3: + executorLabel: exec-model-batch-predict-3 inputDefinitions: artifacts: - explanation_metadata_artifact: + model: artifactType: - schemaTitle: system.Artifact + schemaTitle: google.VertexModel schemaVersion: 0.0.1 + description: 'The Model used to get predictions via this job. Must share + the same + + ancestor Location. Starting this job has no impact on any existing + + deployments of the Model and their resources. Either this or + + unmanaged_container_model must be specified.' isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 + description: 'The unmanaged container model used to get predictions via + this job. + + This should be used for models that are not uploaded to Vertex. Either + + this or model must be specified.' isOptional: true parameters: accelerator_count: defaultValue: 0.0 + description: 'The number of accelerators to attach + + to the `machine_type`. Only used if `machine_type` is set. For more + + details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: NUMBER_INTEGER accelerator_type: defaultValue: '' + description: 'The type of accelerator(s) that may be + + attached to the machine as per `accelerator_count`. Only used if + + `machine_type` is set. For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: STRING bigquery_destination_output_uri: defaultValue: '' + description: 'The BigQuery project location where the output is to be written + to. In + + the given project a new dataset is created with name + + ``prediction__`` where is made + + BigQuery-dataset-name compatible (for example, most special characters + + become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ + + "based on ISO-8601" format. In the dataset two tables will be created, + + ``predictions``, and ``errors``. If the Model has both ``instance`` + + and ``prediction`` schemata defined then the tables have columns as + + follows: The ``predictions`` table contains instances for which the + + prediction succeeded, it has columns as per a concatenation of the + + Model''s instance and prediction schemata. The ``errors`` table + + contains rows for which the prediction has failed, it has instance + + columns, as per the instance schema, followed by a single "errors" + + column, which as values has ```google.rpc.Status`` `__ + + represented as a STRUCT, and containing only ``code`` and + + ``message``. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING bigquery_source_input_uri: defaultValue: '' + description: 'BigQuery URI to a table, up to 2000 characters long. For example: + + `projectId.bqDatasetId.bqTableId` For more details about this input + + config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: 'Customer-managed encryption + + key options for a BatchPredictionJob. If this is set, then all + + resources created by the BatchPredictionJob will be encrypted with the + + provided encryption key. Has the form: + + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + + The key needs to be in the same region as where the compute resource + + is created.' isOptional: true parameterType: STRING - encryption_spec_key_name: - defaultValue: '' + excluded_fields: + defaultValue: [] + description: 'Fields that will be excluded in the prediction instance that + is + + sent to the Model. + + Excluded will be attached to the batch prediction output if + + [key_field][] is not specified. + + When excluded_fields is populated, [included_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord. + + may be specified via the Model''s `parameters_schema_uri`.' isOptional: true - parameterType: STRING + parameterType: LIST explanation_metadata: defaultValue: {} + description: 'Explanation metadata + + configuration for this BatchPredictionJob. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_metadata`. All fields of + + `explanation_metadata` are optional in the request. If a field of the + + `explanation_metadata` object is not populated, the corresponding + + field of the `Model.explanation_metadata` object is inherited. For + + more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} + description: 'Parameters to configure + + explaining for Model''s predictions. Can be specified only if + + `generate_explanation` is set to `True`. This value overrides the + + value of `Model.explanation_parameters`. All fields of + + `explanation_parameters` are optional in the request. If a field of + + the `explanation_parameters` object is not populated, the + + corresponding field of the `Model.explanation_parameters` object is + + inherited. For more details, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' isOptional: true parameterType: STRUCT gcs_destination_output_uri_prefix: defaultValue: '' + description: 'The Google Cloud + + Storage location of the directory where the output is to be written + + to. In the given directory a new directory is created. Its name is + + ``prediction--``, where timestamp + + is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files + + ``predictions_0001.``, ``predictions_0002.``, + + ..., ``predictions_N.`` are created where ```` + + depends on chosen ``predictions_format``, and N may equal 0001 and + + depends on the total number of successfully predicted instances. If + + the Model has both ``instance`` and ``prediction`` schemata defined + + then each such file contains predictions as per the + + ``predictions_format``. If prediction for any instance failed + + (partially or completely), then an additional + + ``errors_0001.``, ``errors_0002.``,..., + + ``errors_N.`` files are created (N depends on total number + + of failed predictions). These files contain the failed instances, as + + per their schema, followed by an additional ``error`` field which as + + value has ``google.rpc.Status`` containing only ``code`` and + + ``message`` fields. For more details about this output config, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] + description: "Google Cloud Storage URI(-s) to your instances to run batch\ + \ prediction\non. They must match `instances_format`. May contain wildcards.\ + \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ + For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." isOptional: true parameterType: LIST generate_explanation: defaultValue: false + description: 'Generate explanation along with + + the batch prediction results. This will cause the batch prediction + + output to include explanations based on the `prediction_format`: - + + `bigquery`: output includes a column named `explanation`. The value is + + a struct that conforms to the [aiplatform.gapic.Explanation] object. - + + `jsonl`: The JSON objects on each line include an additional entry + + keyed `explanation`. The value of the entry is a JSON object that + + conforms to the [aiplatform.gapic.Explanation] object. - `csv`: + + Generating explanations for CSV format is not supported. If this + + field is set to true, either the Model.explanation_spec or + + explanation_metadata and explanation_parameters must be populated.' isOptional: true parameterType: BOOLEAN + included_fields: + defaultValue: [] + description: 'Fields that will be included in the prediction instance that + is + + sent to the Model. + + If [instance_type][] is `array`, the order of field names in + + included_fields also determines the order of the values in the array. + + When included_fields is populated, [excluded_fields][] must be empty. + + The input must be JSONL with objects at each line, CSV, BigQuery + + or TfRecord.' + isOptional: true + parameterType: LIST + instance_type: + defaultValue: '' + description: "The format of the instance that the Model accepts. Vertex\ + \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ + to the specified format.\nSupported values are:\n** `object`: Each input\ + \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ + \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ + \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ + ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ + \ each row is converted to an array. The order\n of columns is determined\ + \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ + \ [included_fields][] must be populated for specifying field orders.\n\ + * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ + \ must be populated for specifying field orders.\n* Does not apply to\ + \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ + \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ + \ and `csv`, the behavior is the same as `array`. The\n order of columns\ + \ is the same as defined in the file or table, unless\n [included_fields][]\ + \ is populated.\n * For `jsonl`, the prediction instance format is determined\ + \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ + \ each record will be converted to\n an object in the format of `{\"\ + b64\": }`, where `` is\n the Base64-encoded string of\ + \ the content of the record.\n * For `file-list`, each file in the list\ + \ will be converted to an\n object in the format of `{\"b64\": }`,\ + \ where `` is\n the Base64-encoded string of the content of the\ + \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ + \ Base64 is not for this field. --)" + isOptional: true + parameterType: STRING instances_format: defaultValue: jsonl + description: "The format in which instances are\ngiven, must be one of the\ + \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ + . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." isOptional: true parameterType: STRING job_display_name: + description: The user-defined name of this BatchPredictionJob. + parameterType: STRING + key_field: + defaultValue: '' + description: "The name of the field that is considered as a key.\nThe values\ + \ identified by the key field is not included in the\ntransformed instances\ + \ that is sent to the Model. This is similar to\nspecifying this name\ + \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ + \ output will not include the instances. Instead the\noutput will only\ + \ include the value of the key field, in a field named\n`key` in the output:\n\ + \ * For `jsonl` output format, the output will have a `key` field\n \ + \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ + \ the output will have have a `key`\n column instead of the instance\ + \ feature columns.\nThe input must be JSONL with objects at each line,\ + \ CSV, BigQuery\nor TfRecord." + isOptional: true parameterType: STRING labels: defaultValue: {} + description: 'The labels with user-defined metadata to + + organize your BatchPredictionJobs. Label keys and values can be no + + longer than 64 characters (Unicode codepoints), can only contain + + lowercase letters, numeric characters, underscores and dashes. + + International characters are allowed. See https://goo.gl/xmQnxf for + + more information and examples of labels.' isOptional: true parameterType: STRUCT location: defaultValue: us-central1 + description: 'Location for creating the BatchPredictionJob. + + If not set, default to us-central1.' isOptional: true parameterType: STRING machine_type: defaultValue: '' + description: 'The type of machine for running batch + + prediction on dedicated resources. If the Model supports + + DEDICATED_RESOURCES this config may be provided (and the job will use + + these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, + + this config must be provided. For more details about the + + BatchDedicatedResources, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. + + For more details about the machine spec, see + + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' isOptional: true parameterType: STRING manual_batch_tuning_parameters_batch_size: defaultValue: 0.0 + description: 'The number of + + the records (e.g. instances) of the operation given in each batch to a + + machine replica. Machine type, and size of a single record should be + + considered when setting this parameter, higher value speeds up the + + batch operation''s execution, but too high value will result in a whole + + batch not fitting in a machine''s memory, and the whole operation will + + fail. The default value is 4.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: defaultValue: 0.0 + description: 'The maximum number of machine replicas the batch operation + may be scaled + + to. Only used if `machine_type` is set. Default is 10.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: defaultValue: {} + description: The parameters that govern the predictions. The schema of the + parameters isOptional: true parameterType: STRUCT predictions_format: defaultValue: jsonl + description: "The format in which Vertex AI gives the predictions. Must\ + \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ + \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." isOptional: true parameterType: STRING project: + description: Project to create the BatchPredictionJob. parameterType: STRING starting_replica_count: defaultValue: 0.0 + description: 'The number of machine replicas + + used at the start of the batch operation. If not set, Vertex AI + + decides starting number, not greater than `max_replica_count`. Only + + used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER outputDefinitions: @@ -4372,19 +7579,42 @@ components: artifactType: schemaTitle: google.VertexBatchPredictionJob schemaVersion: 0.0.1 + description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table + + instead.**] Artifact + + representation of the created batch prediction job.' bigquery_output_table: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + bigquery_output_table is specified.' gcs_output_directory: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 + description: 'Artifact tracking the batch prediction job output. This is + only + + available if + + gcs_destination_output_uri_prefix is specified.' parameters: gcp_resources: + description: 'Serialized gcp_resources proto tracking the batch prediction + job. + + For more details, see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING - comp-model-batch-predict: - executorLabel: exec-model-batch-predict + comp-model-batch-predict-4: + executorLabel: exec-model-batch-predict-4 inputDefinitions: artifacts: model: @@ -4441,7 +7671,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4449,25 +7679,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4493,7 +7723,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -4570,37 +7800,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4845,8 +8075,8 @@ components: https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' parameterType: STRING - comp-model-batch-predict-2: - executorLabel: exec-model-batch-predict-2 + comp-model-batch-predict-5: + executorLabel: exec-model-batch-predict-5 inputDefinitions: artifacts: model: @@ -4903,7 +8133,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4911,25 +8141,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4955,7 +8185,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -5032,37 +8262,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5479,6 +8709,92 @@ components: parameters: gcp_resources: parameterType: STRING + comp-model-evaluation-3: + executorLabel: exec-model-evaluation-3 + inputDefinitions: + artifacts: + batch_prediction_job: + artifactType: + schemaTitle: google.VertexBatchPredictionJob + schemaVersion: 0.0.1 + parameters: + dataflow_disk_size: + defaultValue: 50.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-4 + isOptional: true + parameterType: STRING + dataflow_max_workers_num: + defaultValue: 100.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataflow_workers_num: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + example_weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING + ground_truth_column: + parameterType: STRING + ground_truth_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + location: + defaultValue: us-central1 + isOptional: true + parameterType: STRING + prediction_id_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_label_column: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_score_column: + defaultValue: '' + isOptional: true + parameterType: STRING + predictions_format: + defaultValue: jsonl + isOptional: true + parameterType: STRING + problem_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + outputDefinitions: + artifacts: + evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + parameters: + gcp_resources: + parameterType: STRING comp-model-evaluation-import: executorLabel: exec-model-evaluation-import inputDefinitions: @@ -5674,23 +8990,185 @@ components: defaultValue: '' description: The display name for the uploaded model evaluation resource. isOptional: true - parameterType: STRING - problem_type: - description: 'The problem type of the metrics being imported to the - - VertexModel. `classification`, `regression`, and `forecasting` are the - - currently supported problem types. Must be provided when `metrics` is - - provided.' + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-evaluation-import-3: + executorLabel: exec-model-evaluation-import-3 + inputDefinitions: + artifacts: + classification_metrics: + artifactType: + schemaTitle: google.ClassificationMetrics + schemaVersion: 0.0.1 + description: 'Path of classification metrics generated from the + + classification evaluation component.' + isOptional: true + explanation: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'Path for model explanation metrics generated from an evaluation + + component.' + isOptional: true + feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The feature attributions metrics artifact generated + + from the feature attribution component.' + isOptional: true + forecasting_metrics: + artifactType: + schemaTitle: google.ForecastingMetrics + schemaVersion: 0.0.1 + description: 'Path of forecasting metrics generated from the + + forecasting evaluation component.' + isOptional: true + metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: Path of metrics generated from an evaluation component. + isOptional: true + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + description: 'Vertex model resource that will be the parent resource of + the + + uploaded evaluation.' + question_answering_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + regression_metrics: + artifactType: + schemaTitle: google.RegressionMetrics + schemaVersion: 0.0.1 + description: 'Path of regression metrics generated from the regression + + evaluation component.' + isOptional: true + summarization_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + text_generation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + isOptional: true + parameters: + dataset_path: + defaultValue: '' + isOptional: true + parameterType: STRING + dataset_paths: + defaultValue: [] + isOptional: true + parameterType: LIST + dataset_type: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + defaultValue: '' + description: The display name for the uploaded model evaluation resource. + isOptional: true + parameterType: STRING + problem_type: + description: 'The problem type of the metrics being imported to the + + VertexModel. `classification`, `regression`, and `forecasting` are the + + currently supported problem types. Must be provided when `metrics` is + + provided.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + gcp_resources: + parameterType: STRING + comp-model-upload: + executorLabel: exec-model-upload + inputDefinitions: + artifacts: + explanation_metadata_artifact: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + isOptional: true + parent_model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 + isOptional: true + unmanaged_container_model: + artifactType: + schemaTitle: google.UnmanagedContainerModel + schemaVersion: 0.0.1 + isOptional: true + parameters: + description: + defaultValue: '' + isOptional: true + parameterType: STRING + display_name: + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + explanation_metadata: + defaultValue: {} + isOptional: true + parameterType: STRUCT + explanation_parameters: + defaultValue: {} + isOptional: true + parameterType: STRUCT + labels: + defaultValue: {} + isOptional: true + parameterType: STRUCT + location: + defaultValue: us-central1 isOptional: true parameterType: STRING + project: + parameterType: STRING outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: google.VertexModel + schemaVersion: 0.0.1 parameters: gcp_resources: parameterType: STRING - comp-model-upload: - executorLabel: exec-model-upload + comp-model-upload-2: + executorLabel: exec-model-upload-2 inputDefinitions: artifacts: explanation_metadata_artifact: @@ -5746,8 +9224,8 @@ components: parameters: gcp_resources: parameterType: STRING - comp-model-upload-2: - executorLabel: exec-model-upload-2 + comp-model-upload-3: + executorLabel: exec-model-upload-3 inputDefinitions: artifacts: explanation_metadata_artifact: @@ -5864,6 +9342,34 @@ components: schemaTitle: system.Artifact schemaVersion: 0.0.1 description: Path patern to materialized train split. + comp-split-materialized-data-2: + executorLabel: exec-split-materialized-data-2 + inputDefinitions: + artifacts: + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: 'Materialized dataset output by the Feature + + Transform Engine.' + outputDefinitions: + artifacts: + materialized_eval_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized eval split. + materialized_test_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized test split. + materialized_train_split: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: Path patern to materialized train split. comp-string-not-empty: executorLabel: exec-string-not-empty inputDefinitions: @@ -6365,9 +9871,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6408,9 +9914,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6451,7 +9957,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -6463,7 +9969,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -6492,7 +9998,48 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", + \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", + "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", + \"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", + "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", + "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", + "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", + "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", + "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", + "\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", + "\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-ensemble-3: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -6504,7 +10051,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -6533,7 +10080,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6548,7 +10095,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -6557,7 +10104,16 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 52.0 + exec-automl-tabular-infra-validator-3: + container: + args: + - --executor_input + - '{{$}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -6577,9 +10133,56 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", + "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", + "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", + "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", + "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", + "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", + \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", + "\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", + "\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", + "\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", + "\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", + "\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", + \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", + \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", + "{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", + "{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", + "{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", + "{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", + "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", + "{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", + "{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", + \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.custom_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 + exec-automl-tabular-stage-1-tuner-2: + container: + args: + - --type + - CustomJob + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --payload + - '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", + \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": + {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -6888,6 +10491,50 @@ deploymentSpec: \ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ \ reduce_search_space_mode,\n )\n\n" image: python:3.7 + exec-distillation-stage-feature-transform-engine: + container: + args: + - distillation_stage_feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--transform_config_path=", "{{$.inputs.parameters[''transform_config_path'']}}"]}' + - '{"Concat": ["--bigquery_train_full_table_uri=", "{{$.inputs.parameters[''bigquery_train_full_table_uri'']}}"]}' + - '{"Concat": ["--bigquery_validate_full_table_uri=", "{{$.inputs.parameters[''bigquery_validate_full_table_uri'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 exec-feature-attribution: container: args: @@ -6990,6 +10637,57 @@ deploymentSpec: - python3 - /main.py image: gcr.io/ml-pipeline/model-evaluation:v0.9 + exec-feature-attribution-3: + container: + args: + - --task + - explanation + - --setup_file + - /setup.py + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --root_dir + - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", + "{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' + - '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", + {"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", + ".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' + - --dataflow_job_prefix + - evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --force_direct_runner + - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --gcs_output_path + - '{{$.outputs.artifacts[''feature_attributions''].path}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.9 exec-feature-transform-engine: container: args: @@ -7074,8 +10772,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -7091,10 +10789,201 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 + exec-get-bigquery-destination-output-uri: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_bigquery_destination_output_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_bigquery_destination_output_uri(\n bigquery_source_input_uri:\ + \ str,\n model_display_name: str = '',\n table_prefix: str = '',\n\ + ) -> NamedTuple('Outputs', [('bigquery_destination_output_uri', str),]):\n\ + \ \"\"\"Formats a bigquery output uri, including the model name and current\ + \ time in the table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n bigquery_staging_dataset_uri = '.'.join(\n bigquery_source_input_uri.split('.')[:-1]\n\ + \ )\n curr_time = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S-%f')\n\ + \ if model_display_name:\n model_display_name = f'{model_display_name}-'\n\ + \n if table_prefix:\n table_prefix = f'{table_prefix}-'\n\n return\ + \ collections.namedtuple(\n 'Outputs',\n [\n 'bigquery_destination_output_uri',\n\ + \ ],\n )(\n f'{bigquery_staging_dataset_uri}.{table_prefix}{model_display_name}{curr_time}',\n\ + \ )\n\n" + image: python:3.7 + exec-get-bigquery-destination-output-uri-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_bigquery_destination_output_uri + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_bigquery_destination_output_uri(\n bigquery_source_input_uri:\ + \ str,\n model_display_name: str = '',\n table_prefix: str = '',\n\ + ) -> NamedTuple('Outputs', [('bigquery_destination_output_uri', str),]):\n\ + \ \"\"\"Formats a bigquery output uri, including the model name and current\ + \ time in the table name.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n bigquery_staging_dataset_uri = '.'.join(\n bigquery_source_input_uri.split('.')[:-1]\n\ + \ )\n curr_time = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S-%f')\n\ + \ if model_display_name:\n model_display_name = f'{model_display_name}-'\n\ + \n if table_prefix:\n table_prefix = f'{table_prefix}-'\n\n return\ + \ collections.namedtuple(\n 'Outputs',\n [\n 'bigquery_destination_output_uri',\n\ + \ ],\n )(\n f'{bigquery_staging_dataset_uri}.{table_prefix}{model_display_name}{curr_time}',\n\ + \ )\n\n" + image: python:3.7 + exec-get-bp-bq-output-table: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_bp_bq_output_table + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_bp_bq_output_table(\n bp_job: dsl.Input[dsl.Artifact],\n\ + ) -> NamedTuple('Outputs', [('bq_output_table_uri', str),]):\n \"\"\"Gets\ + \ the output table uri from a batch prediction job.\n\n Args:\n bp_job:\ + \ The batch prediction job artifact.\n\n Returns:\n The uri of our output\ + \ table.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'bq_output_table_uri',\n ],\n )(\n f\"{bp_job.metadata['bigqueryOutputDataset']}.{bp_job.metadata['bigqueryOutputTable']}\"\ + ,\n )\n\n" + image: python:3.7 + exec-get-bp-bq-output-table-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_bp_bq_output_table + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_bp_bq_output_table(\n bp_job: dsl.Input[dsl.Artifact],\n\ + ) -> NamedTuple('Outputs', [('bq_output_table_uri', str),]):\n \"\"\"Gets\ + \ the output table uri from a batch prediction job.\n\n Args:\n bp_job:\ + \ The batch prediction job artifact.\n\n Returns:\n The uri of our output\ + \ table.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'bq_output_table_uri',\n ],\n )(\n f\"{bp_job.metadata['bigqueryOutputDataset']}.{bp_job.metadata['bigqueryOutputTable']}\"\ + ,\n )\n\n" + image: python:3.7 + exec-get-transform-config-path: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _get_transform_config_path + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _get_transform_config_path(\n transform_output_dir_artifact:\ + \ dsl.Input[dsl.Artifact],\n) -> NamedTuple('Outputs', [('transform_config_path',\ + \ str),]):\n \"\"\"Returns the path to the transform config file in the\ + \ transform_output_dir.\n\n Args:\n transform_output_dir_artifact: Transform\ + \ output dir str, output by FTE.\n\n Returns:\n The path of our transform\ + \ config.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import collections\n import os\n import tensorflow as tf\n # pylint:\ + \ enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(transform_output_dir_artifact.uri, 'r') as f:\n\ + \ transform_output_dir = f.read()\n\n transform_config_path = os.path.join(\n\ + \ transform_output_dir,\n 'feature_transform_engine',\n 'transform_config.json',\n\ + \ )\n\n return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'transform_config_path',\n ],\n )(\n transform_config_path,\n\ + \ )\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-importer: importer: artifactUri: @@ -7142,11 +11031,194 @@ deploymentSpec: - BatchPredictionJob - --payload - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-explanation-3: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", + ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 + exec-model-batch-predict: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", + "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", + "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", + "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": + \"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": + \"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": + ", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": + ", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": + ", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": + {", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", + "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", + ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-2: + container: + args: + - --type + - BatchPredictionJob + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": @@ -7159,8 +11231,7 @@ deploymentSpec: "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}", "}"]}' - --project @@ -7175,20 +11246,28 @@ deploymentSpec: - python3 - -u - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-explanation-2: + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-3: container: args: - --type - BatchPredictionJob - --payload - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", - "\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", + "\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": + \"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, + " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", "\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", "}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", - "\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", - ", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", + "\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", + "\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" + ", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", + \"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, + {"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": + ", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": + ", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": + {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", "\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", "\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", "\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": @@ -7201,8 +11280,7 @@ deploymentSpec: "}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", - "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", - "\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": + "}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}", "}"]}' - --project @@ -7217,9 +11295,9 @@ deploymentSpec: - python3 - -u - -m - - launcher - image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 - exec-model-batch-predict: + - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + exec-model-batch-predict-4: container: args: - --type @@ -7268,7 +11346,7 @@ deploymentSpec: - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 - exec-model-batch-predict-2: + exec-model-batch-predict-5: container: args: - --type @@ -7447,6 +11525,71 @@ deploymentSpec: - python - /main.py image: gcr.io/ml-pipeline/model-evaluation:v0.4 + exec-model-evaluation-3: + container: + args: + - --setup_file + - /setup.py + - --json_mode + - 'true' + - --project_id + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' + - --batch_prediction_format + - '{{$.inputs.parameters[''predictions_format'']}}' + - --batch_prediction_gcs_source + - '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' + - --ground_truth_format + - '{{$.inputs.parameters[''ground_truth_format'']}}' + - --key_prefix_in_prediction_dataset + - instance + - --root_dir + - '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' + - --classification_type + - multiclass + - --ground_truth_column + - instance.{{$.inputs.parameters['ground_truth_column']}} + - --prediction_score_column + - '{{$.inputs.parameters[''prediction_score_column'']}}' + - --prediction_label_column + - '{{$.inputs.parameters[''prediction_label_column'']}}' + - --prediction_id_column + - '' + - --example_weight_column + - '' + - --generate_feature_attribution + - 'false' + - --dataflow_job_prefix + - evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - --dataflow_service_account + - '{{$.inputs.parameters[''dataflow_service_account'']}}' + - --dataflow_disk_size + - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - --dataflow_machine_type + - '{{$.inputs.parameters[''dataflow_machine_type'']}}' + - --dataflow_workers_num + - '{{$.inputs.parameters[''dataflow_workers_num'']}}' + - --dataflow_max_workers_num + - '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' + - --dataflow_subnetwork + - '{{$.inputs.parameters[''dataflow_subnetwork'']}}' + - --dataflow_use_public_ips + - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' + - --kms_key_name + - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' + - --output_metrics_gcs_path + - '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + command: + - python + - /main.py + image: gcr.io/ml-pipeline/model-evaluation:v0.4 exec-model-evaluation-import: container: args: @@ -7535,6 +11678,50 @@ deploymentSpec: - -m - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + exec-model-evaluation-import-3: + container: + args: + - '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", + "--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' + - '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", + "{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", + "{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", + "{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", + "{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", + "{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", + "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' + - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", + "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", + "{{$.inputs.parameters[''problem_type'']}}"]}}' + - --display_name + - '{{$.inputs.parameters[''display_name'']}}' + - --dataset_path + - '{{$.inputs.parameters[''dataset_path'']}}' + - --dataset_paths + - '{{$.inputs.parameters[''dataset_paths'']}}' + - --dataset_type + - '{{$.inputs.parameters[''dataset_type'']}}' + - --pipeline_job_id + - '{{$.pipeline_job_uuid}}' + - --pipeline_job_resource_name + - '{{$.pipeline_job_resource_name}}' + - --model_name + - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + command: + - python3 + - -u + - -m + - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 exec-model-upload: container: args: @@ -7593,6 +11780,35 @@ deploymentSpec: - -m - launcher image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 + exec-model-upload-3: + container: + args: + - --type + - UploadModel + - --payload + - '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", + "\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", + "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", + ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", + "}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", + "\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + - --project + - '{{$.inputs.parameters[''project'']}}' + - --location + - '{{$.inputs.parameters[''location'']}}' + - --gcp_resources + - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --executor_input + - '{{$}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' + command: + - python3 + - -u + - -m + - launcher + image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 exec-set-optional-inputs: container: args: @@ -7637,19 +11853,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -7695,7 +11911,53 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + exec-split-materialized-data-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - _split_materialized_data + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef _split_materialized_data(\n materialized_data: Input[Dataset],\n\ + \ materialized_train_split: OutputPath('MaterializedSplit'),\n materialized_eval_split:\ + \ OutputPath('MaterializedSplit'),\n materialized_test_split: OutputPath('MaterializedSplit')):\n\ + \ \"\"\"Splits materialized_data into materialized_data test, train, and\ + \ eval splits.\n\n Necessary adapter between FTE pipeline and trainer.\n\ + \n Args:\n materialized_data: materialized_data dataset output by FTE.\n\ + \ materialized_train_split: Path patern to materialized_train_split.\n\ + \ materialized_eval_split: Path patern to materialized_eval_split.\n\ + \ materialized_test_split: Path patern to materialized_test_split.\n\ + \ \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \ import json\n import tensorflow as tf\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ + \n with tf.io.gfile.GFile(materialized_data.path, 'r') as f:\n artifact_path\ + \ = f.read()\n\n # needed to import tf because this is a path in gs://\n\ + \ with tf.io.gfile.GFile(artifact_path, 'r') as f:\n materialized_data_json\ + \ = json.load(f)\n\n if 'tf_record_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['tf_record_data_source'][\n\ + \ 'file_patterns']\n elif 'avro_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['avro_data_source'][\n \ + \ 'file_patterns']\n elif 'parquet_data_source' in materialized_data_json:\n\ + \ file_patterns = materialized_data_json['parquet_data_source'][\n \ + \ 'file_patterns']\n else:\n raise ValueError(f'Unsupported training\ + \ data source: {materialized_data_json}')\n\n # we map indices to file\ + \ patterns based on the ordering of insertion order\n # in our transform_data\ + \ (see above in _generate_analyze_and_transform_data)\n with tf.io.gfile.GFile(materialized_train_split,\ + \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ + \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ + \ 'w') as f:\n f.write(file_patterns[2])\n\n" + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-string-not-empty: container: args: @@ -7770,7 +12032,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-training-configurator-and-validator-2: container: args: @@ -7815,7 +12077,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The AutoML Tabular pipeline v2. name: automl-tabular-v2 @@ -7827,6 +12089,10 @@ root: artifactSelectors: - outputArtifactKey: feature-attribution-2-feature_attributions producerSubtask: exit-handler-1 + feature-attribution-3-feature_attributions: + artifactSelectors: + - outputArtifactKey: feature-attribution-3-feature_attributions + producerSubtask: exit-handler-1 feature-attribution-feature_attributions: artifactSelectors: - outputArtifactKey: feature-attribution-feature_attributions @@ -7835,6 +12101,10 @@ root: artifactSelectors: - outputArtifactKey: model-evaluation-2-evaluation_metrics producerSubtask: exit-handler-1 + model-evaluation-3-evaluation_metrics: + artifactSelectors: + - outputArtifactKey: model-evaluation-3-evaluation_metrics + producerSubtask: exit-handler-1 model-evaluation-evaluation_metrics: artifactSelectors: - outputArtifactKey: model-evaluation-evaluation_metrics @@ -7887,6 +12157,12 @@ root: componentInputParameter: dataset_level_transformations pipelinechannel--disable_early_stopping: componentInputParameter: disable_early_stopping + pipelinechannel--distill_batch_predict_machine_type: + componentInputParameter: distill_batch_predict_machine_type + pipelinechannel--distill_batch_predict_max_replica_count: + componentInputParameter: distill_batch_predict_max_replica_count + pipelinechannel--distill_batch_predict_starting_replica_count: + componentInputParameter: distill_batch_predict_starting_replica_count pipelinechannel--enable_probabilistic_inference: componentInputParameter: enable_probabilistic_inference pipelinechannel--encryption_spec_key_name: @@ -7927,6 +12203,8 @@ root: componentInputParameter: legacy_transformations_path pipelinechannel--location: componentInputParameter: location + pipelinechannel--materialized_examples_format: + componentInputParameter: materialized_examples_format pipelinechannel--max_selected_features: componentInputParameter: max_selected_features pipelinechannel--model_description: @@ -7989,6 +12267,8 @@ root: componentInputParameter: tf_auto_transform_features pipelinechannel--tf_custom_transformation_definitions: componentInputParameter: tf_custom_transformation_definitions + pipelinechannel--tf_transform_execution_engine: + componentInputParameter: tf_transform_execution_engine pipelinechannel--tf_transformations_path: componentInputParameter: tf_transformations_path pipelinechannel--train_budget_milli_node_hours: @@ -8107,6 +12387,27 @@ root: description: If disable easly stopping. isOptional: true parameterType: BOOLEAN + distill_batch_predict_machine_type: + defaultValue: n1-standard-16 + description: 'The prediction server machine type for + + batch predict component in the model distillation.' + isOptional: true + parameterType: STRING + distill_batch_predict_max_replica_count: + defaultValue: 40.0 + description: 'The max number of prediction server + + for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER + distill_batch_predict_starting_replica_count: + defaultValue: 25.0 + description: 'The initial number of + + prediction server for batch predict component in the model distillation.' + isOptional: true + parameterType: NUMBER_INTEGER enable_probabilistic_inference: defaultValue: false description: 'If probabilistic inference is enabled, the @@ -8245,6 +12546,11 @@ root: location: description: The GCP region that runs the pipeline components. parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: The format for the materialized examples. + isOptional: true + parameterType: STRING max_selected_features: defaultValue: 1000.0 description: Maximum number of features to select. @@ -8391,6 +12697,13 @@ root: in string format.' isOptional: true parameterType: LIST + tf_transform_execution_engine: + defaultValue: '' + description: 'Execution engine to run TF-based + + transformations. Currently supports "dataflow" or "bigquery"' + isOptional: true + parameterType: STRING tf_transformations_path: defaultValue: '' description: Path to TF transformation configuration. @@ -8424,6 +12737,10 @@ root: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + feature-attribution-3-feature_attributions: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 feature-attribution-feature_attributions: artifactType: schemaTitle: system.Metrics @@ -8432,6 +12749,10 @@ root: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + model-evaluation-3-evaluation_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 model-evaluation-evaluation_metrics: artifactType: schemaTitle: system.Metrics diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py new file mode 100644 index 0000000000..574c05e4ec --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py @@ -0,0 +1,246 @@ +"""AutoML Feature Transform Engine component spec.""" + +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from kfp import dsl + + +@dsl.container_component +def distillation_stage_feature_transform_engine( + root_dir: str, + project: str, + location: str, + transform_config_path: str, + bigquery_train_full_table_uri: str, + bigquery_validate_full_table_uri: str, + target_column: str, + prediction_type: str, + materialized_data: dsl.Output[dsl.Dataset], + transform_output: dsl.Output[dsl.Artifact], + gcp_resources: dsl.OutputPath(str), + bigquery_staging_full_dataset_id: Optional[str] = '', + weight_column: Optional[str] = '', + dataflow_machine_type: Optional[str] = 'n1-standard-16', + dataflow_max_num_workers: Optional[int] = 25, + dataflow_disk_size_gb: Optional[int] = 40, + dataflow_subnetwork: Optional[str] = '', + dataflow_use_public_ips: Optional[bool] = True, + dataflow_service_account: Optional[str] = '', + encryption_spec_key_name: Optional[str] = '', + autodetect_csv_schema: Optional[bool] = False, +): + # fmt: off + """Feature Transform Engine (FTE) component to transform raw data to engineered features during model distilation. + + The FTE transform configuration is generated as part of the FTE stage prior + to distillation. This distillation-stage FTE component re-uses this config to + transform the input datasets with predicted outputs included (soft targets). + + Args: + root_dir (str): The Cloud Storage location to store the output. + project (str): Project to run feature transform engine. + location (str): Location for the created GCP services. + transform_config_path (str): Path to the transform config output by the + pre-distillation FTE component. + bigquery_train_full_table_uri (str): BigQuery full table id for our + train split output by pre-distillation FTE with soft target included. + bigquery_validate_full_table_uri (str): BigQuery full table id for our + validation split output by pre-distillation FTE with soft target + included. + target_column (str): Target column of input data. + prediction_type (str): Model prediction type. One of + "classification", "regression", "time_series". + bigquery_staging_full_dataset_id (Optional[str]): Dataset in + 'projectId.datasetId' format for storing intermediate-FTE BigQuery + tables. If the specified dataset does not exist in BigQuery, FTE will + create the dataset. If no bigquery_staging_full_dataset_id is specified, + all intermediate tables will be stored in a dataset created under the + provided project in the input data source's location during FTE + execution called + 'vertex_feature_transform_engine_staging_{location.replace('-', '_')}'. + All tables generated by FTE will have a 30 day TTL. + weight_column (Optional[str]): Weight column of input data. + dataflow_machine_type (Optional[str]): The machine type used for dataflow + jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers (Optional[int]): The number of workers to run the + dataflow job. If not set, default to 25. + dataflow_disk_size_gb (Optional[int]): The disk size, in gigabytes, to use + on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork (Optional[str]): Dataflow's fully qualified subnetwork + name, when empty the default subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips (Optional[bool]): Specifies whether Dataflow + workers use public IP addresses. + dataflow_service_account (Optional[str]): Custom service account to run + Dataflow jobs. + encryption_spec_key_name (Optional[str]): Customer-managed encryption key. + + Returns: + materialized_data (Dataset): + The materialized dataset. + transform_output (TransformOutput): + The transform output artifact. + gcp_resources (str): + GCP resources created by this component. + For more details, see + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + """ + # fmt: on + + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', + command=[], + args=[ + 'distillation_stage_feature_transform_engine', + dsl.ConcatPlaceholder(items=['--project=', project]), + dsl.ConcatPlaceholder(items=['--location=', location]), + dsl.ConcatPlaceholder( + items=[ + '--transform_config_path=', + transform_config_path, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_train_full_table_uri=', + bigquery_train_full_table_uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_validate_full_table_uri=', + bigquery_validate_full_table_uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--bigquery_staging_full_dataset_id=', + bigquery_staging_full_dataset_id, + ] + ), + dsl.ConcatPlaceholder(items=['--target_column=', target_column]), + dsl.ConcatPlaceholder(items=['--prediction_type=', prediction_type]), + dsl.ConcatPlaceholder(items=['--weight_column=', weight_column]), + dsl.ConcatPlaceholder( + items=[ + '--error_file_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/error.txt', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--transform_output_artifact_path=', + transform_output.uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--transform_output_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/transform', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_examples_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--export_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/export', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_path=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/materialized_data', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--materialized_data_artifact_path=', + materialized_data.uri, + ] + ), + f'--job_name=feature-transform-engine-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}-{dsl.PIPELINE_TASK_ID_PLACEHOLDER}', + dsl.ConcatPlaceholder(items=['--dataflow_project=', project]), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_staging_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_staging', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_tmp_dir=', + root_dir, + f'/{dsl.PIPELINE_JOB_ID_PLACEHOLDER}/{dsl.PIPELINE_TASK_ID_PLACEHOLDER}/dataflow_tmp', + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_max_num_workers=', + dataflow_max_num_workers, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_machine_type=', + dataflow_machine_type, + ] + ), + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + dsl.ConcatPlaceholder( + items=[ + '--dataflow_disk_size_gb=', + dataflow_disk_size_gb, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_subnetwork_fully_qualified=', + dataflow_subnetwork, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_use_public_ips=', + dataflow_use_public_ips, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_service_account=', + dataflow_service_account, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--dataflow_kms_key=', + encryption_spec_key_name, + ] + ), + dsl.ConcatPlaceholder(items=['--gcp_resources_path=', gcp_resources]), + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py index 3655e1dcc4..0c703ac517 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -116,7 +116,7 @@ def tabular_feature_ranking_and_selection( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["feature_selection", "--data_source=', data_source.uri, '", "--target_column=', @@ -153,7 +153,7 @@ def tabular_feature_ranking_and_selection( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml new file mode 100644 index 0000000000..435d8dae31 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml @@ -0,0 +1,1638 @@ +# PIPELINE DEFINITION +# Name: feature-selection +# Description: Defines pipeline for feature transform engine component. +# Inputs: +# bigquery_staging_full_dataset_id: str [Default: ''] +# data_source_bigquery_table_path: str [Default: ''] +# data_source_csv_filenames: str [Default: ''] +# dataflow_disk_size_gb: int [Default: 40.0] +# dataflow_machine_type: str [Default: 'n1-standard-16'] +# dataflow_max_num_workers: int [Default: 10.0] +# dataflow_service_account: str [Default: ''] +# dataflow_subnetwork: str [Default: ''] +# dataflow_use_public_ips: bool [Default: True] +# dataset_level_custom_transformation_definitions: list +# dataset_level_transformations: list +# encryption_spec_key_name: str [Default: ''] +# feature_selection_algorithm: str [Default: 'AMI'] +# feature_selection_execution_engine: str [Default: 'bigquery'] +# location: str +# max_selected_features: int [Default: -1.0] +# optimization_objective: str +# predefined_split_key: str [Default: ''] +# prediction_type: str +# project: str +# root_dir: str +# run_feature_selection: bool [Default: False] +# stage_1_deadline_hours: float [Default: -1.0] +# stage_2_deadline_hours: float [Default: -1.0] +# stratified_split_key: str [Default: ''] +# target_column: str +# test_fraction: float [Default: -1.0] +# tf_auto_transform_features: dict +# training_fraction: float [Default: -1.0] +# validation_fraction: float [Default: -1.0] +# weight_column: str [Default: ''] +components: + comp-feature-transform-engine: + executorLabel: exec-feature-transform-engine + inputDefinitions: + parameters: + autodetect_csv_schema: + defaultValue: false + description: 'If True, infers the column types + + when importing CSVs into BigQuery.' + isOptional: true + parameterType: BOOLEAN + bigquery_staging_full_dataset_id: + defaultValue: '' + description: 'Dataset in + + "projectId.datasetId" format for storing intermediate-FTE BigQuery + + tables. If the specified dataset does not exist in BigQuery, FTE will + + create the dataset. If no bigquery_staging_full_dataset_id is specified, + + all intermediate tables will be stored in a dataset created under the + + provided project in the input data source''s location during FTE + + execution called + + "vertex_feature_transform_engine_staging_{location.replace(''-'', ''_'')}". + + All tables generated by FTE will have a 30 day TTL.' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + description: 'BigQuery input data + + source to run feature transform on.' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + description: 'CSV input data source to run + + feature transform on.' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + description: 'The disk size, in gigabytes, to use + + on each Dataflow worker instance. If not set, default to 40.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + description: 'The machine type used for dataflow + + jobs. If not set, default to n1-standard-16.' + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 25.0 + description: 'The number of workers to run the + + dataflow job. If not set, default to 25.' + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + description: 'Custom service account to run + + Dataflow jobs.' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + description: 'Dataflow''s fully qualified subnetwork + + name, when empty the default subnetwork will be used. More details: + + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + description: 'Specifies whether Dataflow + + workers use public IP addresses.' + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + defaultValue: [] + description: "List of dataset-level custom transformation definitions. \ + \ Custom,\nbring-your-own dataset-level transform functions, where users\ + \ can define\nand import their own transform function and use it with\ + \ FTE's built-in\ntransformations. Using custom transformations is an\ + \ experimental feature\nand it is currently not supported during batch\ + \ prediction.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"ConcatCols\",\n \"module_path\": \"/path/to/custom_transform_fn_dlt.py\"\ + ,\n \"function_name\": \"concat_cols\" } ] Using custom transform\ + \ function\n together with FTE's built-in transformations: .. code-block::\n\ + \ python [ { \"transformation\": \"Join\", \"right_table_uri\":\n\ + \ \"bq://test-project.dataset_test.table\", \"join_keys\":\n [[\"\ + join_key_col\", \"join_key_col\"]] },{ \"transformation\":\n \"ConcatCols\"\ + , \"cols\": [\"feature_1\", \"feature_2\"], \"output_col\":\n \"feature_1_2\"\ + \ } ]" + isOptional: true + parameterType: LIST + dataset_level_transformations: + defaultValue: [] + description: "List of dataset-level\ntransformations.\nExample: .. code-block::\ + \ python [ { \"transformation\": \"Join\",\n \"right_table_uri\": \"\ + bq://test-project.dataset_test.table\",\n \"join_keys\": [[\"join_key_col\"\ + , \"join_key_col\"]] }, ... ] Additional\n information about FTE's currently\ + \ supported built-in\n transformations:\n Join: Joins features from\ + \ right_table_uri. For each join key, the\n left table keys will\ + \ be included and the right table keys will\n be dropped.\n \ + \ Example: .. code-block:: python { \"transformation\": \"Join\",\n\ + \ \"right_table_uri\": \"bq://test-project.dataset_test.table\"\ + ,\n \"join_keys\": [[\"join_key_col\", \"join_key_col\"]] }\n\ + \ Arguments:\n right_table_uri: Right table BigQuery\ + \ uri to join\n with input_full_table_id.\n join_keys:\ + \ Features to join on. For each\n nested list, the first\ + \ element is a left table column\n and the second is its\ + \ corresponding right table column.\n TimeAggregate: Creates a new\ + \ feature composed of values of an\n existing feature from a fixed\ + \ time period ago or in the future.\n Ex: A feature for sales by\ + \ store 1 year ago.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"TimeAggregate\", \"time_difference\": 40,\n \"\ + time_difference_units\": \"DAY\",\n \"time_series_identifier_columns\"\ + : [\"store_id\"],\n \"time_column\": \"time_col\", \"time_difference_target_column\"\ + :\n \"target_col\", \"output_column\": \"output_col\" }\n \ + \ Arguments:\n time_difference: Number of time_difference_units\ + \ to\n look back or into the future on our\n \ + \ time_difference_target_column.\n time_difference_units:\ + \ Units of time_difference to\n look back or into the future\ + \ on our\n time_difference_target_column. Must be one of\ + \ * 'DAY' *\n 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER'\ + \ *\n 'YEAR'\n time_series_identifier_columns:\ + \ Names of the\n time series identifier columns.\n \ + \ time_column: Name of the time column.\n time_difference_target_column:\ + \ Column we wish to get\n the value of time_difference time_difference_units\ + \ in\n the past or future.\n output_column: Name\ + \ of our new time aggregate\n feature.\n is_future:\ + \ Whether we wish to look\n forward in time. Defaults to\ + \ False.\n PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum:\n\ + \ Performs a partition by reduce operation (one of max,\n\ + \ min, avg, or sum) with a fixed historic time period. Ex:\n\ + \ Getting avg sales (the reduce column) for each store\n\ + \ (partition_by_column) over the previous 5 days\n \ + \ (time_column, time_ago_units, and time_ago).\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"PartitionByMax\"\ + , \"reduce_column\": \"sell_price\",\n \"partition_by_columns\"\ + : [\"store_id\", \"state_id\"],\n \"time_column\": \"date\",\ + \ \"time_ago\": 1, \"time_ago_units\":\n \"WEEK\", \"output_column\"\ + : \"partition_by_reduce_max_output\" }\n Arguments:\n \ + \ reduce_column: Column to apply the reduce operation\n \ + \ on. Reduce operations include the\n following: Max,\ + \ Min, Avg, Sum.\n partition_by_columns: List of columns to\n\ + \ partition by.\n time_column: Time column for\ + \ the partition by\n operation's window function.\n \ + \ time_ago: Number of time_ago_units to look back on\n \ + \ our target_column, starting from time_column\n (inclusive).\n\ + \ time_ago_units: Units of time_ago to look back on\n \ + \ our target_column. Must be one of * 'DAY' * 'WEEK'\n \ + \ output_column: Name of our output feature." + isOptional: true + parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING + encryption_spec_key_name: + defaultValue: '' + description: Customer-managed encryption key. + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + description: "The algorithm of feature\nselection. One of \"AMI\", \"CMIM\"\ + , \"JMIM\", \"MRMR\", default to be \"AMI\".\nThe algorithms available\ + \ are: AMI(Adjusted Mutual Information):\n Reference:\n https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html\n\ + \ Arrays are not yet supported in this algorithm. CMIM(Conditional\n\ + \ Mutual Information Maximization): Reference paper: Mohamed\n \ + \ Bennasar, Yulia Hicks, Rossitza Setchi, \u201CFeature selection\ + \ using\n Joint Mutual Information Maximisation,\u201D Expert Systems\ + \ with\n Applications, vol. 42, issue 22, 1 December 2015, Pages\n\ + \ 8520-8532. JMIM(Joint Mutual Information Maximization): Reference\n\ + \ paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, \u201C\ + Feature\n selection using Joint Mutual Information Maximisation,\u201D\ + \ Expert\n Systems with Applications, vol. 42, issue 22, 1 December\ + \ 2015,\n Pages 8520-8532. MRMR(MIQ Minimum-redundancy\n \ + \ Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long,\n\ + \ and Chris Ding. \"Feature selection based on mutual information\n\ + \ criteria of max-dependency, max-relevance, and min-redundancy.\"\ + \n IEEE Transactions on pattern analysis and machine intelligence\n\ + \ 27, no.\n 8: 1226-1238." + isOptional: true + parameterType: STRING + feature_selection_execution_engine: + defaultValue: dataflow + description: Execution engine to run feature selection, value can be dataflow, + bigquery. + isOptional: true + parameterType: STRING + forecasting_apply_windowing: + defaultValue: true + description: Whether to apply window strategy. + isOptional: true + parameterType: BOOLEAN + forecasting_available_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + available at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_context_window: + defaultValue: -1.0 + description: Forecasting context window. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_forecast_horizon: + defaultValue: -1.0 + description: Forecasting horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_holiday_regions: + defaultValue: [] + description: 'The geographical region based on which the + + holiday effect is applied in modeling by adding holiday categorical + + array feature that include all holidays matching the date. This option + + only allowed when data granularity is day. By default, holiday effect + + modeling is disabled. To turn it on, specify the holiday region using + + this option. + + Top level: * ''GLOBAL'' + + Second level: continental regions: * ''NA'': North America + + * ''JAPAC'': Japan and Asia Pacific + + * ''EMEA'': Europe, the Middle East and Africa + + * ''LAC'': Latin America and the Caribbean + + Third level: countries from ISO 3166-1 Country codes. + + Valid regions: * ''GLOBAL'' * ''NA'' * ''JAPAC'' * ''EMEA'' * ''LAC'' + * ''AE'' + + * ''AR'' * ''AT'' * ''AU'' * ''BE'' * ''BR'' * ''CA'' * ''CH'' * ''CL'' + * ''CN'' * ''CO'' + + * ''CZ'' * ''DE'' * ''DK'' * ''DZ'' * ''EC'' * ''EE'' * ''EG'' * ''ES'' + * ''FI'' * ''FR'' + + * ''GB'' * ''GR'' * ''HK'' * ''HU'' * ''ID'' * ''IE'' * ''IL'' * ''IN'' + * ''IR'' * ''IT'' + + * ''JP'' * ''KR'' * ''LV'' * ''MA'' * ''MX'' * ''MY'' * ''NG'' * ''NL'' + * ''NO'' * ''NZ'' + + * ''PE'' * ''PH'' * ''PK'' * ''PL'' * ''PT'' * ''RO'' * ''RS'' * ''RU'' + * ''SA'' * ''SE'' + + * ''SG'' * ''SI'' * ''SK'' * ''TH'' * ''TR'' * ''TW'' * ''UA'' * ''US'' + * ''VE'' * ''VN'' + + * ''ZA''' + isOptional: true + parameterType: LIST + forecasting_predefined_window_column: + defaultValue: '' + description: Forecasting predefined window column. + isOptional: true + parameterType: STRING + forecasting_time_column: + defaultValue: '' + description: Forecasting time column. + isOptional: true + parameterType: STRING + forecasting_time_series_attribute_columns: + defaultValue: [] + description: 'Forecasting + + time series attribute columns.' + isOptional: true + parameterType: LIST + forecasting_time_series_identifier_column: + description: '[Deprecated] A forecasting time series identifier column. + Raises an + + exception if used - use the "time_series_identifier_column" field + + instead.' + isOptional: true + parameterType: STRING + forecasting_time_series_identifier_columns: + defaultValue: [] + description: The list of forecasting time series identifier columns. + isOptional: true + parameterType: LIST + forecasting_unavailable_at_forecast_columns: + defaultValue: [] + description: 'Forecasting + + unavailable at forecast columns.' + isOptional: true + parameterType: LIST + forecasting_window_max_count: + defaultValue: -1.0 + description: Forecasting window max count. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_window_stride_length: + defaultValue: -1.0 + description: Forecasting window stride length. + isOptional: true + parameterType: NUMBER_INTEGER + group_columns: + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + legacy_transformations_path: + defaultValue: '' + isOptional: true + parameterType: STRING + location: + description: Location for the created GCP services. + parameterType: STRING + materialized_examples_format: + defaultValue: tfrecords_gzip + description: 'The format to use for the + + materialized examples. Should be either ''tfrecords_gzip'' (default) or + + ''parquet''.' + isOptional: true + parameterType: STRING + max_selected_features: + defaultValue: 1000.0 + description: 'Maximum number of features to + + select. If specified, the transform config will be purged by only using + + the selected features that ranked top in the feature ranking, which has + + the ranking value for all supported features. If the number of input + + features is smaller than max_selected_features specified, we will still + + run the feature selection process and generate the feature ranking, no + + features will be excluded. The value will be set to 1000 by default if + + run_feature_selection is enabled.' + isOptional: true + parameterType: NUMBER_INTEGER + model_type: + description: 'Model type, which we wish to engineer features + + for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, + or + + tide. Defaults to the empty value, `None`.' + isOptional: true + parameterType: STRING + multimodal_image_columns: + defaultValue: [] + description: 'List of multimodal image + + columns. Defaults to an empty list.' + isOptional: true + parameterType: LIST + multimodal_tabular_columns: + defaultValue: [] + description: 'List of multimodal tabular + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_text_columns: + defaultValue: [] + description: 'List of multimodal text + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + multimodal_timeseries_columns: + defaultValue: [] + description: 'List of multimodal timeseries + + columns. Defaults to an empty list' + isOptional: true + parameterType: LIST + predefined_split_key: + defaultValue: '' + description: Predefined split key. + isOptional: true + parameterType: STRING + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of + + "classification", "regression", "time_series".' + isOptional: true + parameterType: STRING + project: + description: Project to run feature transform engine. + parameterType: STRING + root_dir: + description: The Cloud Storage location to store the output. + parameterType: STRING + run_distill: + defaultValue: false + description: '(deprecated) Whether the distillation should be applied + + to the training.' + isOptional: true + parameterType: BOOLEAN + run_feature_selection: + defaultValue: false + description: 'Whether the feature selection + + should be applied to the dataset.' + isOptional: true + parameterType: BOOLEAN + stats_gen_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + statistics generation. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental.' + isOptional: true + parameterType: STRING + stratified_split_key: + defaultValue: '' + description: Stratified split key. + isOptional: true + parameterType: STRING + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_DOUBLE + test_fraction: + defaultValue: -1.0 + description: Fraction of input data for testing. + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + defaultValue: {} + description: "Dict mapping auto and/or type-resolutions to\nTF transform\ + \ features. FTE will automatically configure a set of\nbuilt-in transformations\ + \ for each feature based on its data statistics.\nIf users do not want\ + \ auto type resolution, but want the set of\ntransformations for a given\ + \ type to be automatically generated, they\nmay specify pre-resolved transformations\ + \ types. The following type hint\ndict keys are supported: * 'auto' *\ + \ 'categorical' * 'numeric' * 'text'\n* 'timestamp'\n Example: .. code-block::\ + \ python { \"auto\": [\"feature1\"],\n \"categorical\": [\"feature2\"\ + , \"feature3\"], } Note that the target and\n weight column may not\ + \ be included as an auto transformation unless\n users are running\ + \ forecasting." + isOptional: true + parameterType: STRUCT + tf_custom_transformation_definitions: + defaultValue: [] + description: "List of\nTensorFlow-based custom transformation definitions.\ + \ Custom,\nbring-your-own transform functions, where users can define\ + \ and import\ntheir own transform function and use it with FTE's built-in\n\ + transformations.\n Example: .. code-block:: python [ { \"transformation\"\ + : \"PlusOne\",\n \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"plus_one_transform\" }, { \"transformation\"\ + :\n \"MultiplyTwo\", \"module_path\": \"gs://bucket/custom_transform_fn.py\"\ + ,\n \"function_name\": \"multiply_two_transform\" } ] Using custom\n\ + \ transform function together with FTE's built-in transformations:\ + \ ..\n code-block:: python [ { \"transformation\": \"CastToFloat\"\ + ,\n \"input_columns\": [\"feature_1\"], \"output_columns\": [\"feature_1\"\ + ] },{\n \"transformation\": \"PlusOne\", \"input_columns\": [\"feature_1\"\ + ]\n \"output_columns\": [\"feature_1_plused_one\"] },{ \"transformation\"\ + :\n \"MultiplyTwo\", \"input_columns\": [\"feature_1\"] \"output_columns\"\ + :\n [\"feature_1_multiplied_two\"] } ]" + isOptional: true + parameterType: LIST + tf_transform_execution_engine: + defaultValue: dataflow + description: 'Execution engine to perform + + row-level TF transformations. Can be one of: "dataflow" (by default) or + + "bigquery". Using "bigquery" as the execution engine is experimental and + + is for allowlisted customers only. In addition, executing on "bigquery" + + only supports auto transformations (i.e., specified by + + tf_auto_transform_features) and will raise an error when + + tf_custom_transformation_definitions or tf_transformations_path is set.' + isOptional: true + parameterType: STRING + tf_transformations_path: + defaultValue: '' + description: "Path to TensorFlow-based\ntransformation configuration. Path\ + \ to a JSON file used to specified\nFTE's TF transformation configurations.\ + \ In the following, we provide\nsome sample transform configurations\ + \ to demonstrate FTE's capabilities.\nAll transformations on input columns\ + \ are explicitly specified with FTE's\nbuilt-in transformations. Chaining\ + \ of multiple transformations on a\nsingle column is also supported. For\ + \ example: .. code-block:: python [\n{ \"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }, {\n\"transformation\": \"ZScale\"\ + , \"input_columns\": [\"feature_2\"] } ]\nAdditional information about\ + \ FTE's currently supported built-in\ntransformations:\n Datetime:\ + \ Extracts datetime featues from a column containing\n timestamp\ + \ strings.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"Datetime\", \"input_columns\": [\"feature_1\"], \"time_format\"\ + :\n \"%Y-%m-%d\" }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the datetime\ + \ transformation on.\n output_columns: Names of output\n\ + \ columns, one for each datetime_features element.\n \ + \ time_format: Datetime format string. Time format is\n \ + \ a combination of Date + Time Delimiter (optional) + Time\n\ + \ (optional) directives. Valid date directives are as\n\ + \ follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' #\n\ + \ 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' #\n\ + \ 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' #\n\ + \ 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' #\n\ + \ 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' #\n\ + \ 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y'\n\ + \ # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' #\n\ + \ 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' \ + \ #\n 11302018 * '%Y%m%d' # 20181130 Valid time delimiters\n\ + \ are as follows * 'T' * ' ' Valid time directives are\ + \ as\n follows * '%H:%M' # 23:59 * '%H:%M:%S'\ + \ #\n 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456]\ + \ *\n '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 *\n \ + \ '%H:%M:%S%z', # 23:59:58+0000\n datetime_features:\ + \ List of datetime\n features to be extract. Each entry\ + \ must be one of *\n 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK'\ + \ * 'DAY_OF_YEAR'\n * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR'\ + \ * 'MINUTE' *\n 'SECOND' Defaults to ['YEAR', 'MONTH',\ + \ 'DAY',\n 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR']\n\ + \ Log: Performs the natural log on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Log\",\n \ + \ \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the log transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n ZScale:\ + \ Performs Z-scale normalization on a numeric column.\n Example:\ + \ .. code-block:: python { \"transformation\":\n \"ZScale\"\ + , \"input_columns\": [\"feature_1\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform the z-scale transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n Vocabulary:\ + \ Converts strings to integers, where each unique string\n gets\ + \ a unique integer representation.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"Vocabulary\", \"input_columns\"\ + : [\"feature_1\"] }\n Arguments:\n input_columns:\ + \ A list with a single column to\n perform the vocabulary\ + \ transformation on.\n output_columns: A list with a single\n\ + \ output column name, corresponding to the output of our\n\ + \ transformation.\n top_k: Number of the most\ + \ frequent words\n in the vocabulary to use for generating\ + \ dictionary\n lookup indices. If not specified, all words\ + \ in the\n vocabulary will be used. Defaults to None.\n\ + \ frequency_threshold: Limit the vocabulary\n \ + \ only to words whose number of occurrences in the input\n \ + \ exceeds frequency_threshold. If not specified, all words\n \ + \ in the vocabulary will be included. If both top_k and\n\ + \ frequency_threshold are specified, a word must satisfy\n\ + \ both conditions to be included. Defaults to None.\n \ + \ Categorical: Transforms categorical columns to integer columns.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Categorical\", \"input_columns\": [\"feature_1\"], \"top_k\"\ + : 10 }\n Arguments:\n input_columns: A list with\ + \ a single column to\n perform the categorical transformation\ + \ on.\n output_columns: A list with a single\n \ + \ output column name, corresponding to the output of our\n \ + \ transformation.\n top_k: Number of the most frequent\ + \ words\n in the vocabulary to use for generating dictionary\n\ + \ lookup indices. If not specified, all words in the\n\ + \ vocabulary will be used.\n frequency_threshold:\ + \ Limit the vocabulary\n only to words whose number of\ + \ occurrences in the input\n exceeds frequency_threshold.\ + \ If not specified, all words\n in the vocabulary will\ + \ be included. If both top_k and\n frequency_threshold\ + \ are specified, a word must satisfy\n both conditions\ + \ to be included.\n Reduce: Given a column where each entry is a\ + \ numeric array,\n reduces arrays according to our reduce_mode.\n\ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"Reduce\", \"input_columns\": [\"feature_1\"], \"reduce_mode\"\ + :\n \"MEAN\", \"output_columns\": [\"feature_1_mean\"] }\n\ + \ Arguments:\n input_columns: A list with a single\ + \ column to\n perform the reduce transformation on.\n \ + \ output_columns: A list with a single\n output\ + \ column name, corresponding to the output of our\n transformation.\n\ + \ reduce_mode: One of * 'MAX' * 'MIN' *\n \ + \ 'MEAN' * 'LAST_K' Defaults to 'MEAN'.\n last_k: The number\ + \ of last k elements when\n 'LAST_K' reduce mode is used.\ + \ Defaults to 1.\n SplitString: Given a column of strings, splits\ + \ strings into token\n arrays.\n Example: .. code-block::\ + \ python { \"transformation\":\n \"SplitString\", \"input_columns\"\ + : [\"feature_1\"], \"separator\":\n \"$\" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the split string transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ separator: Separator to split input string\n into tokens.\ + \ Defaults to ' '.\n missing_token: Missing token to use\ + \ when\n no string is included. Defaults to ' _MISSING_\ + \ '.\n NGram: Given a column of strings, splits strings into token\ + \ arrays\n where each token is an integer.\n Example:\ + \ .. code-block:: python { \"transformation\": \"NGram\",\n \ + \ \"input_columns\": [\"feature_1\"], \"min_ngram_size\": 1,\n \ + \ \"max_ngram_size\": 2, \"separator\": \" \" }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_ngram_size: Minimum n-gram size. Must\n be a positive\ + \ number and <= max_ngram_size. Defaults to\n 1.\n \ + \ max_ngram_size: Maximum n-gram size. Must\n \ + \ be a positive number and >= min_ngram_size. Defaults to\n \ + \ 2.\n top_k: Number of the most frequent words\n \ + \ in the vocabulary to use for generating dictionary\n \ + \ lookup indices. If not specified, all words in the\n \ + \ vocabulary will be used. Defaults to None.\n \ + \ frequency_threshold: Limit the\n dictionary's vocabulary\ + \ only to words whose number of\n occurrences in the input\ + \ exceeds frequency_threshold. If\n not specified, all\ + \ words in the vocabulary will be\n included. If both top_k\ + \ and frequency_threshold are\n specified, a word must\ + \ satisfy both conditions to be\n included. Defaults to\ + \ None.\n separator: Separator to split input string\n \ + \ into tokens. Defaults to ' '.\n missing_token:\ + \ Missing token to use when\n no string is included. Defaults\ + \ to ' _MISSING_ '.\n Clip: Given a numeric column, clips elements\ + \ such that elements <\n min_value are assigned min_value, and\ + \ elements > max_value are\n assigned max_value.\n Example:\ + \ .. code-block:: python { \"transformation\": \"Clip\",\n \ + \ \"input_columns\": [\"col1\"], \"output_columns\":\n [\"\ + col1_clipped\"], \"min_value\": 1., \"max_value\": 10., }\n Arguments:\n\ + \ input_columns: A list with a single column to\n \ + \ perform the n-gram transformation on.\n output_columns:\ + \ A list with a single\n output column name, corresponding\ + \ to the output of our\n transformation.\n \ + \ min_value: Number where all values below\n min_value\ + \ are set to min_value. If no min_value is\n provided,\ + \ min clipping will not occur. Defaults to None.\n max_value:\ + \ Number where all values above\n max_value are set to\ + \ max_value If no max_value is\n provided, max clipping\ + \ will not occur. Defaults to None.\n MultiHotEncoding: Performs\ + \ multi-hot encoding on a categorical\n array column.\n \ + \ Example: .. code-block:: python { \"transformation\":\n \ + \ \"MultiHotEncoding\", \"input_columns\": [\"col1\"], } The number\n\ + \ of classes is determened by the largest number included in\n\ + \ the input if it is numeric or the total number of unique\n\ + \ values of the input if it is type str. If the input is has\n\ + \ type str and an element contians separator tokens, the input\n\ + \ will be split at separator indices, and the each element\ + \ of\n the split list will be considered a seperate class.\ + \ For\n example,\n Input: .. code-block:: python\ + \ [ [\"foo bar\"], # Example\n 0 [\"foo\", \"bar\"],\ + \ # Example 1 [\"foo\"], # Example\n 2 [\"bar\"\ + ], # Example 3 ]\n Output (with default separator=\"\ + \ \"): .. code-block:: python [\n [1, 1], # Example\ + \ 0 [1, 1], # Example 1\n [1, 0], # Example\ + \ 2 [0, 1], # Example 3 ]\n Arguments:\n \ + \ input_columns: A list with a single column to\n perform\ + \ the multi-hot-encoding on.\n output_columns: A list with\ + \ a single\n output column name, corresponding to the output\ + \ of our\n transformation.\n top_k: Number\ + \ of the most frequent words\n in the vocabulary to use\ + \ for generating dictionary\n lookup indices. If not specified,\ + \ all words in the\n vocabulary will be used. Defaults\ + \ to None.\n frequency_threshold: Limit the\n \ + \ dictionary's vocabulary only to words whose number of\n \ + \ occurrences in the input exceeds frequency_threshold. If\n \ + \ not specified, all words in the vocabulary will be\n \ + \ included. If both top_k and frequency_threshold are\n \ + \ specified, a word must satisfy both conditions to be\n\ + \ included. Defaults to None.\n separator:\ + \ Separator to split input string\n into tokens. Defaults\ + \ to ' '.\n MaxAbsScale: Performs maximum absolute scaling on a numeric\n\ + \ column.\n Example: .. code-block:: python { \"transformation\"\ + :\n \"MaxAbsScale\", \"input_columns\": [\"col1\"], \"output_columns\"\ + :\n [\"col1_max_abs_scaled\"] }\n Arguments:\n \ + \ input_columns: A list with a single column to\n \ + \ perform max-abs-scale on.\n output_columns: A list\ + \ with a single\n output column name, corresponding to\ + \ the output of our\n transformation.\n Custom: Transformations\ + \ defined in\n tf_custom_transformation_definitions are included\ + \ here in the\n TensorFlow-based transformation configuration.\ + \ For example,\n given the following tf_custom_transformation_definitions:\ + \ ..\n code-block:: python [ { \"transformation\": \"PlusX\"\ + ,\n \"module_path\": \"gs://bucket/custom_transform_fn.py\",\n\ + \ \"function_name\": \"plus_one_transform\" } ] We can include\ + \ the\n following transformation: .. code-block:: python {\n\ + \ \"transformation\": \"PlusX\", \"input_columns\": [\"col1\"],\n\ + \ \"output_columns\": [\"col1_max_abs_scaled\"] \"x\": 5 } Note\ + \ that\n input_columns must still be included in our arguments\ + \ and\n output_columns is optional. All other arguments are those\n\ + \ defined in custom_transform_fn.py, which includes `\"x\"` in\ + \ this\n case. See tf_custom_transformation_definitions above.\n\ + \ legacy_transformations_path (Optional[str]) Deprecated. Prefer\n\ + \ tf_auto_transform_features. Path to a GCS file containing JSON\n\ + \ string for legacy style transformations. Note that\n legacy_transformations_path\ + \ and tf_auto_transform_features\n cannot both be specified." + isOptional: true + parameterType: STRING + timestamp_split_key: + defaultValue: '' + description: Timestamp split key. + isOptional: true + parameterType: STRING + training_fraction: + defaultValue: -1.0 + description: Fraction of input data for training. + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + description: Fraction of input data for validation. + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The stats of the dataset. + feature_ranking: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'The ranking of features, all features supported in the + + dataset will be included. For "AMI" algorithm, array features won''t be + + available in the ranking as arrays are not supported yet.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + materialized_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + description: The materialized dataset. + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + transform_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The transform output artifact. + parameters: + bigquery_downsampled_test_split_uri: + description: 'BigQuery URI for the downsampled test + + split to pass to the batch prediction component during batch explain.' + parameterType: STRING + bigquery_test_split_uri: + description: 'BigQuery URI for the test split to pass to the + + batch prediction component during evaluation.' + parameterType: STRING + bigquery_train_split_uri: + description: 'BigQuery URI for the train split to pass to the + + batch prediction component during distillation.' + parameterType: STRING + bigquery_validation_split_uri: + description: 'BigQuery URI for the validation split to + + pass to the batch prediction component during distillation.' + parameterType: STRING + gcp_resources: + description: 'GCP resources created by this component. For more details, + + see + + https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + parameterType: STRING + split_example_counts: + description: 'JSON string of data split example counts for train, + + validate, and test splits.' + parameterType: STRING + comp-training-configurator-and-validator: + executorLabel: exec-training-configurator-and-validator + inputDefinitions: + artifacts: + dataset_stats: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Dataset stats generated by + + feature transform engine.' + instance_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: 'Schema of input data to the tf_model at + + serving time.' + training_schema: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + parameters: + available_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + available at forecast time.' + isOptional: true + parameterType: LIST + context_window: + defaultValue: -1.0 + description: The length of the context window. + isOptional: true + parameterType: NUMBER_INTEGER + enable_probabilistic_inference: + defaultValue: false + description: 'If probabilistic inference is + + enabled, the model will fit a distribution that captures the uncertainty + + of a prediction. At inference time, the predictive distribution is used + + to make a point prediction that minimizes the optimization objective. + + For example, the mean of a predictive distribution is the point + + prediction that minimizes RMSE loss. If quantiles are specified, then + + the quantiles of the distribution are also returned.' + isOptional: true + parameterType: BOOLEAN + forecast_horizon: + defaultValue: -1.0 + description: The length of the forecast horizon. + isOptional: true + parameterType: NUMBER_INTEGER + forecasting_model_type: + defaultValue: '' + description: The model types, e.g. l2l, seq2seq, tft. + isOptional: true + parameterType: STRING + forecasting_transformations: + defaultValue: {} + description: 'Dict mapping auto and/or type-resolutions to + + feature columns. The supported types are auto, categorical, numeric, + + text, and timestamp.' + isOptional: true + parameterType: STRUCT + group_columns: + description: 'A list of time series attribute column + + names that define the time series hierarchy.' + isOptional: true + parameterType: LIST + group_temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over both the horizon and time series in the same + + hierarchy group.' + isOptional: true + parameterType: NUMBER_DOUBLE + group_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over time series in the same group.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective: + defaultValue: '' + description: "Objective function the model is optimizing\ntowards. The training\ + \ process creates a model that maximizes/minimizes\nthe value of the objective\ + \ function over the validation set. The\nsupported optimization objectives\ + \ depend on the prediction type. If the\nfield is not set, a default objective\ + \ function is used.\n classification: \"maximize-au-roc\" (default) -\ + \ Maximize the\n area under the receiver operating characteristic (ROC)\ + \ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ + \ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ + \ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ + \ - Maximize recall for a\n specified precision value.\n classification\ + \ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ + \ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ + \ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ + \ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." + isOptional: true + parameterType: STRING + optimization_objective_precision_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-recall-at-precision". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + optimization_objective_recall_value: + defaultValue: -1.0 + description: 'Required when + + optimization_objective is "maximize-precision-at-recall". Must be + + between 0 and 1, inclusive.' + isOptional: true + parameterType: NUMBER_DOUBLE + prediction_type: + defaultValue: '' + description: 'Model prediction type. One of "classification", + + "regression", "time_series".' + isOptional: true + parameterType: STRING + quantiles: + defaultValue: [] + description: All quantiles that the model need to predict. + isOptional: true + parameterType: LIST + run_distill: + defaultValue: false + description: 'Whether the distillation should be applied to the + + training.' + isOptional: true + parameterType: BOOLEAN + run_evaluation: + defaultValue: false + description: 'Whether we are running evaluation in the training + + pipeline.' + isOptional: true + parameterType: BOOLEAN + split_example_counts: + description: 'JSON string of data split example counts for + + train, validate, and test splits.' + parameterType: STRING + stage_1_deadline_hours: + description: 'Stage 1 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + description: 'Stage 2 training budget in + + hours.' + isOptional: true + parameterType: NUMBER_DOUBLE + target_column: + defaultValue: '' + description: Target column of input data. + isOptional: true + parameterType: STRING + temporal_total_weight: + defaultValue: 0.0 + description: 'The weight of the loss for + + predictions aggregated over the horizon for a single time series.' + isOptional: true + parameterType: NUMBER_DOUBLE + time_column: + defaultValue: '' + description: 'The column that indicates the time. Used by forecasting + + only.' + isOptional: true + parameterType: STRING + time_series_attribute_columns: + defaultValue: [] + description: 'The column names of the time series + + attributes.' + isOptional: true + parameterType: LIST + time_series_identifier_column: + description: '[Deprecated] The time series identifier + + column. Used by forecasting only. Raises exception if used - + + use the "time_series_identifier_column" field instead.' + isOptional: true + parameterType: STRING + time_series_identifier_columns: + defaultValue: [] + description: 'The list of time series identifier columns. + + Used by forecasting only.' + isOptional: true + parameterType: LIST + unavailable_at_forecast_columns: + defaultValue: [] + description: 'The names of the columns that are + + not available at forecast time.' + isOptional: true + parameterType: LIST + weight_column: + defaultValue: '' + description: Weight column of input data. + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + instance_baseline: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + metadata: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + description: The tabular example gen metadata. +deploymentSpec: + executors: + exec-feature-transform-engine: + container: + args: + - feature_transform_engine + - '{"Concat": ["--project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--location=", "{{$.inputs.parameters[''location'']}}"]}' + - '{"Concat": ["--dataset_level_custom_transformation_definitions=", "{{$.inputs.parameters[''dataset_level_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--dataset_level_transformations=", "{{$.inputs.parameters[''dataset_level_transformations'']}}"]}' + - '{"Concat": ["--forecasting_time_column=", "{{$.inputs.parameters[''forecasting_time_column'']}}"]}' + - '{"IfPresent": {"InputName": "forecasting_time_series_identifier_column", + "Then": {"Concat": ["--forecasting_time_series_identifier_column=", "{{$.inputs.parameters[''forecasting_time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--forecasting_time_series_identifier_columns=", "{{$.inputs.parameters[''forecasting_time_series_identifier_columns'']}}"]}' + - '{"Concat": ["--forecasting_time_series_attribute_columns=", "{{$.inputs.parameters[''forecasting_time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--forecasting_unavailable_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_unavailable_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_available_at_forecast_columns=", "{{$.inputs.parameters[''forecasting_available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--forecasting_forecast_horizon=", "{{$.inputs.parameters[''forecasting_forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_context_window=", "{{$.inputs.parameters[''forecasting_context_window'']}}"]}' + - '{"Concat": ["--forecasting_predefined_window_column=", "{{$.inputs.parameters[''forecasting_predefined_window_column'']}}"]}' + - '{"Concat": ["--forecasting_window_stride_length=", "{{$.inputs.parameters[''forecasting_window_stride_length'']}}"]}' + - '{"Concat": ["--forecasting_window_max_count=", "{{$.inputs.parameters[''forecasting_window_max_count'']}}"]}' + - '{"Concat": ["--forecasting_holiday_regions=", "{{$.inputs.parameters[''forecasting_holiday_regions'']}}"]}' + - '{"Concat": ["--forecasting_apply_windowing=", "{{$.inputs.parameters[''forecasting_apply_windowing'']}}"]}' + - '{"Concat": ["--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}"]}' + - '{"Concat": ["--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}"]}' + - '{"Concat": ["--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}"]}' + - '{"Concat": ["--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}"]}' + - '{"Concat": ["--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}"]}' + - '{"Concat": ["--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}"]}' + - '{"Concat": ["--stats_gen_execution_engine=", "{{$.inputs.parameters[''stats_gen_execution_engine'']}}"]}' + - '{"Concat": ["--tf_transform_execution_engine=", "{{$.inputs.parameters[''tf_transform_execution_engine'']}}"]}' + - '{"IfPresent": {"InputName": "tf_auto_transform_features", "Then": {"Concat": + ["--tf_auto_transform_features=", "{{$.inputs.parameters[''tf_auto_transform_features'']}}"]}}}' + - '{"Concat": ["--tf_custom_transformation_definitions=", "{{$.inputs.parameters[''tf_custom_transformation_definitions'']}}"]}' + - '{"Concat": ["--tf_transformations_path=", "{{$.inputs.parameters[''tf_transformations_path'']}}"]}' + - '{"Concat": ["--legacy_transformations_path=", "{{$.inputs.parameters[''legacy_transformations_path'']}}"]}' + - '{"Concat": ["--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}"]}' + - '{"Concat": ["--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}"]}' + - '{"Concat": ["--bigquery_staging_full_dataset_id=", "{{$.inputs.parameters[''bigquery_staging_full_dataset_id'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"IfPresent": {"InputName": "model_type", "Then": {"Concat": ["--model_type=", + "{{$.inputs.parameters[''model_type'']}}"]}}}' + - '{"Concat": ["--multimodal_tabular_columns=", "{{$.inputs.parameters[''multimodal_tabular_columns'']}}"]}' + - '{"Concat": ["--multimodal_timeseries_columns=", "{{$.inputs.parameters[''multimodal_timeseries_columns'']}}"]}' + - '{"Concat": ["--multimodal_text_columns=", "{{$.inputs.parameters[''multimodal_text_columns'']}}"]}' + - '{"Concat": ["--multimodal_image_columns=", "{{$.inputs.parameters[''multimodal_image_columns'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--run_feature_selection=", "{{$.inputs.parameters[''run_feature_selection'']}}"]}' + - '{"Concat": ["--materialized_examples_format=", "{{$.inputs.parameters[''materialized_examples_format'']}}"]}' + - '{"Concat": ["--max_selected_features=", "{{$.inputs.parameters[''max_selected_features'']}}"]}' + - '{"Concat": ["--feature_selection_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/feature_selection_staging_dir"]}' + - '{"Concat": ["--feature_selection_algorithm=", "{{$.inputs.parameters[''feature_selection_algorithm'']}}"]}' + - '{"Concat": ["--feature_selection_execution_engine=", "{{$.inputs.parameters[''feature_selection_execution_engine'']}}"]}' + - '{"Concat": ["--feature_ranking_path=", "{{$.outputs.artifacts[''feature_ranking''].uri}}"]}' + - '{"Concat": ["--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.txt"]}' + - '{"Concat": ["--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}"]}' + - '{"Concat": ["--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform"]}' + - '{"Concat": ["--materialized_examples_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized"]}' + - '{"Concat": ["--export_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/export"]}' + - '{"Concat": ["--materialized_data_path=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/materialized_data"]}' + - '{"Concat": ["--materialized_data_artifact_path=", "{{$.outputs.artifacts[''materialized_data''].uri}}"]}' + - '{"Concat": ["--bigquery_train_split_uri_path=", "{{$.outputs.parameters[''bigquery_train_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_validation_split_uri_path=", "{{$.outputs.parameters[''bigquery_validation_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--bigquery_downsampled_test_split_uri_path=", "{{$.outputs.parameters[''bigquery_downsampled_test_split_uri''].output_file}}"]}' + - '{"Concat": ["--split_example_counts_path=", "{{$.outputs.parameters[''split_example_counts''].output_file}}"]}' + - '{"Concat": ["--instance_schema_path=", "{{$.outputs.artifacts[''instance_schema''].path}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.outputs.artifacts[''training_schema''].path}}"]}' + - --job_name=feature-transform-engine-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} + - '{"Concat": ["--dataflow_project=", "{{$.inputs.parameters[''project'']}}"]}' + - '{"Concat": ["--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging"]}' + - '{"Concat": ["--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", + "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' + - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' + - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' + - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' + - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' + - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' + - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + - '{"Concat": ["--autodetect_csv_schema=", "{{$.inputs.parameters[''autodetect_csv_schema'']}}"]}' + - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + resources: + cpuLimit: 8.0 + memoryLimit: 30.0 + exec-training-configurator-and-validator: + container: + args: + - training_configurator_and_validator + - '{"Concat": ["--instance_schema_path=", "{{$.inputs.artifacts[''instance_schema''].uri}}"]}' + - '{"Concat": ["--training_schema_path=", "{{$.inputs.artifacts[''training_schema''].uri}}"]}' + - '{"Concat": ["--dataset_stats_path=", "{{$.inputs.artifacts[''dataset_stats''].uri}}"]}' + - '{"Concat": ["--split_example_counts=", "{{$.inputs.parameters[''split_example_counts'']}}"]}' + - '{"Concat": ["--target_column=", "{{$.inputs.parameters[''target_column'']}}"]}' + - '{"Concat": ["--weight_column=", "{{$.inputs.parameters[''weight_column'']}}"]}' + - '{"Concat": ["--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}"]}' + - '{"Concat": ["--optimization_objective=", "{{$.inputs.parameters[''optimization_objective'']}}"]}' + - '{"Concat": ["--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}"]}' + - '{"Concat": ["--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}"]}' + - '{"Concat": ["--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}"]}' + - '{"Concat": ["--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}"]}' + - '{"Concat": ["--run_evaluation=", "{{$.inputs.parameters[''run_evaluation'']}}"]}' + - '{"Concat": ["--run_distill=", "{{$.inputs.parameters[''run_distill'']}}"]}' + - '{"Concat": ["--enable_probabilistic_inference=", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}"]}' + - '{"IfPresent": {"InputName": "time_series_identifier_column", "Then": {"Concat": + ["--time_series_identifier_column=", "{{$.inputs.parameters[''time_series_identifier_column'']}}"]}}}' + - '{"Concat": ["--time_series_identifier_columns=", "{{$.inputs.parameters[''time_series_identifier_columns'']}}"]}' + - '{"Concat": ["--time_column=", "{{$.inputs.parameters[''time_column'']}}"]}' + - '{"Concat": ["--time_series_attribute_columns=", "{{$.inputs.parameters[''time_series_attribute_columns'']}}"]}' + - '{"Concat": ["--available_at_forecast_columns=", "{{$.inputs.parameters[''available_at_forecast_columns'']}}"]}' + - '{"Concat": ["--unavailable_at_forecast_columns=", "{{$.inputs.parameters[''unavailable_at_forecast_columns'']}}"]}' + - '{"IfPresent": {"InputName": "quantiles", "Then": {"Concat": ["--quantiles=", + "{{$.inputs.parameters[''quantiles'']}}"]}}}' + - '{"Concat": ["--context_window=", "{{$.inputs.parameters[''context_window'']}}"]}' + - '{"Concat": ["--forecast_horizon=", "{{$.inputs.parameters[''forecast_horizon'']}}"]}' + - '{"Concat": ["--forecasting_model_type=", "{{$.inputs.parameters[''forecasting_model_type'']}}"]}' + - '{"Concat": ["--forecasting_transformations=", "{{$.inputs.parameters[''forecasting_transformations'']}}"]}' + - '{"IfPresent": {"InputName": "stage_1_deadline_hours", "Then": {"Concat": + ["--stage_1_deadline_hours=", "{{$.inputs.parameters[''stage_1_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "stage_2_deadline_hours", "Then": {"Concat": + ["--stage_2_deadline_hours=", "{{$.inputs.parameters[''stage_2_deadline_hours'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_columns", "Then": {"Concat": ["--group_columns=", + "{{$.inputs.parameters[''group_columns'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_total_weight", "Then": {"Concat": ["--group_total_weight=", + "{{$.inputs.parameters[''group_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "temporal_total_weight", "Then": {"Concat": + ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' + - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": + ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 +pipelineInfo: + description: Defines pipeline for feature transform engine component. + name: feature-selection +root: + dag: + tasks: + feature-transform-engine: + cachingOptions: + enableCache: true + componentRef: + name: comp-feature-transform-engine + inputs: + parameters: + bigquery_staging_full_dataset_id: + componentInputParameter: bigquery_staging_full_dataset_id + data_source_bigquery_table_path: + componentInputParameter: data_source_bigquery_table_path + data_source_csv_filenames: + componentInputParameter: data_source_csv_filenames + dataflow_disk_size_gb: + componentInputParameter: dataflow_disk_size_gb + dataflow_machine_type: + componentInputParameter: dataflow_machine_type + dataflow_max_num_workers: + componentInputParameter: dataflow_max_num_workers + dataflow_service_account: + componentInputParameter: dataflow_service_account + dataflow_subnetwork: + componentInputParameter: dataflow_subnetwork + dataflow_use_public_ips: + componentInputParameter: dataflow_use_public_ips + dataset_level_custom_transformation_definitions: + componentInputParameter: dataset_level_custom_transformation_definitions + dataset_level_transformations: + componentInputParameter: dataset_level_transformations + embedding_batch_prediction_accelerator_count: + runtimeValue: + constant: -1.0 + embedding_batch_prediction_accelerator_type: + runtimeValue: + constant: accelerator_type_unspecified + embedding_batch_prediction_batch_size: + runtimeValue: + constant: -1.0 + embedding_batch_prediction_machine_type: + runtimeValue: + constant: '' + embedding_batch_prediction_max_replica_count: + runtimeValue: + constant: -1.0 + embedding_batch_prediction_starting_replica_count: + runtimeValue: + constant: -1.0 + embedding_prediction_server_docker_uri: + runtimeValue: + constant: '' + encryption_spec_key_name: + componentInputParameter: encryption_spec_key_name + feature_selection_algorithm: + componentInputParameter: feature_selection_algorithm + feature_selection_execution_engine: + componentInputParameter: feature_selection_execution_engine + forecasting_available_at_forecast_columns: + runtimeValue: + constant: [] + forecasting_context_window: + runtimeValue: + constant: -1.0 + forecasting_forecast_horizon: + runtimeValue: + constant: -1.0 + forecasting_holiday_regions: + runtimeValue: + constant: [] + forecasting_predefined_window_column: + runtimeValue: + constant: '' + forecasting_time_column: + runtimeValue: + constant: '' + forecasting_time_series_attribute_columns: + runtimeValue: + constant: [] + forecasting_time_series_identifier_columns: + runtimeValue: + constant: [] + forecasting_unavailable_at_forecast_columns: + runtimeValue: + constant: [] + forecasting_window_max_count: + runtimeValue: + constant: -1.0 + forecasting_window_stride_length: + runtimeValue: + constant: -1.0 + location: + componentInputParameter: location + materialized_examples_format: + runtimeValue: + constant: tfrecords_gzip + max_selected_features: + componentInputParameter: max_selected_features + multimodal_image_columns: + runtimeValue: + constant: [] + multimodal_tabular_columns: + runtimeValue: + constant: [] + multimodal_text_columns: + runtimeValue: + constant: [] + multimodal_timeseries_columns: + runtimeValue: + constant: [] + predefined_split_key: + componentInputParameter: predefined_split_key + prediction_type: + componentInputParameter: prediction_type + project: + componentInputParameter: project + root_dir: + componentInputParameter: root_dir + run_feature_selection: + componentInputParameter: run_feature_selection + stratified_split_key: + componentInputParameter: stratified_split_key + target_column: + componentInputParameter: target_column + test_fraction: + componentInputParameter: test_fraction + tf_auto_transform_features: + componentInputParameter: tf_auto_transform_features + tf_custom_transformation_definitions: + runtimeValue: + constant: [] + tf_transform_execution_engine: + runtimeValue: + constant: dataflow + tf_transformations_path: + runtimeValue: + constant: '' + training_fraction: + componentInputParameter: training_fraction + validation_fraction: + componentInputParameter: validation_fraction + weight_column: + componentInputParameter: weight_column + taskInfo: + name: feature-transform-engine + training-configurator-and-validator: + cachingOptions: + enableCache: true + componentRef: + name: comp-training-configurator-and-validator + dependentTasks: + - feature-transform-engine + inputs: + artifacts: + dataset_stats: + taskOutputArtifact: + outputArtifactKey: dataset_stats + producerTask: feature-transform-engine + instance_schema: + taskOutputArtifact: + outputArtifactKey: instance_schema + producerTask: feature-transform-engine + training_schema: + taskOutputArtifact: + outputArtifactKey: training_schema + producerTask: feature-transform-engine + parameters: + available_at_forecast_columns: + runtimeValue: + constant: [] + context_window: + runtimeValue: + constant: -1.0 + enable_probabilistic_inference: + runtimeValue: + constant: 0.0 + forecast_horizon: + runtimeValue: + constant: -1.0 + optimization_objective: + componentInputParameter: optimization_objective + optimization_objective_precision_value: + runtimeValue: + constant: -1.0 + optimization_objective_recall_value: + runtimeValue: + constant: -1.0 + prediction_type: + componentInputParameter: prediction_type + run_distill: + runtimeValue: + constant: 0.0 + run_evaluation: + runtimeValue: + constant: 0.0 + split_example_counts: + taskOutputParameter: + outputParameterKey: split_example_counts + producerTask: feature-transform-engine + stage_1_deadline_hours: + componentInputParameter: stage_1_deadline_hours + stage_2_deadline_hours: + componentInputParameter: stage_2_deadline_hours + target_column: + componentInputParameter: target_column + time_column: + runtimeValue: + constant: '' + time_series_attribute_columns: + runtimeValue: + constant: [] + time_series_identifier_columns: + runtimeValue: + constant: [] + unavailable_at_forecast_columns: + runtimeValue: + constant: [] + weight_column: + componentInputParameter: weight_column + taskInfo: + name: training-configurator-and-validator + inputDefinitions: + parameters: + bigquery_staging_full_dataset_id: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_bigquery_table_path: + defaultValue: '' + isOptional: true + parameterType: STRING + data_source_csv_filenames: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_disk_size_gb: + defaultValue: 40.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_machine_type: + defaultValue: n1-standard-16 + isOptional: true + parameterType: STRING + dataflow_max_num_workers: + defaultValue: 10.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataflow_service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_subnetwork: + defaultValue: '' + isOptional: true + parameterType: STRING + dataflow_use_public_ips: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + dataset_level_custom_transformation_definitions: + isOptional: true + parameterType: LIST + dataset_level_transformations: + isOptional: true + parameterType: LIST + encryption_spec_key_name: + defaultValue: '' + isOptional: true + parameterType: STRING + feature_selection_algorithm: + defaultValue: AMI + isOptional: true + parameterType: STRING + feature_selection_execution_engine: + defaultValue: bigquery + isOptional: true + parameterType: STRING + location: + parameterType: STRING + max_selected_features: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_INTEGER + optimization_objective: + parameterType: STRING + predefined_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + prediction_type: + parameterType: STRING + project: + parameterType: STRING + root_dir: + parameterType: STRING + run_feature_selection: + defaultValue: false + isOptional: true + parameterType: BOOLEAN + stage_1_deadline_hours: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + stage_2_deadline_hours: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + stratified_split_key: + defaultValue: '' + isOptional: true + parameterType: STRING + target_column: + parameterType: STRING + test_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + tf_auto_transform_features: + isOptional: true + parameterType: STRUCT + training_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + validation_fraction: + defaultValue: -1.0 + isOptional: true + parameterType: NUMBER_DOUBLE + weight_column: + defaultValue: '' + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.0.0-beta.17 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py index 634e9b4184..d75bd80748 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -92,6 +92,15 @@ def feature_transform_engine( group_total_weight: float = 0.0, temporal_total_weight: float = 0.0, group_temporal_total_weight: float = 0.0, + embedding_prediction_server_docker_uri: Optional[str] = '', + embedding_batch_prediction_machine_type: Optional[str] = '', + embedding_batch_prediction_accelerator_type: Optional[ + str + ] = 'accelerator_type_unspecified', + embedding_batch_prediction_accelerator_count: Optional[int] = -1, + embedding_batch_prediction_starting_replica_count: Optional[int] = -1, + embedding_batch_prediction_max_replica_count: Optional[int] = -1, + embedding_batch_prediction_batch_size: Optional[int] = -1, ): # fmt: off """Transforms raw data to engineered features. @@ -525,7 +534,7 @@ def feature_transform_engine( weight_column: Weight column of input data. prediction_type: Model prediction type. One of "classification", "regression", "time_series". - run_distill: Whether the distillation should be applied + run_distill: (deprecated) Whether the distillation should be applied to the training. run_feature_selection: Whether the feature selection should be applied to the dataset. @@ -602,6 +611,23 @@ def feature_transform_engine( encryption_spec_key_name: Customer-managed encryption key. autodetect_csv_schema: If True, infers the column types when importing CSVs into BigQuery. + embedding_prediction_server_docker_uri: The docker image inside which to + run the embedding models to generate embeddings. + embedding_batch_prediction_machine_type: The machine type to be + used to run the embedding batch prediction job. If not provided, + `n1-highmem-32` will be used. For more details, see: + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types + embedding_batch_prediction_accelerator_type: The accelerator type to use to + generate embeddings. If not provided, no accelerator is used. More + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype + embedding_batch_prediction_accelerator_count: The number of accelerators to + use to generate the embeddings. Default is 0. + embedding_batch_prediction_starting_replica_count: The starting replica count + for embedding batch prediction job. Default = 20. + embedding_batch_prediction_max_replica_count: The max replica count for + embedding batch prediction job. Default = 50. + embedding_batch_prediction_batch_size: The batch size for embedding batch + prediction job. Default = 1024. Returns: dataset_stats: The stats of the dataset. @@ -640,7 +666,7 @@ def feature_transform_engine( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', command=[], args=[ 'feature_transform_engine', @@ -969,8 +995,8 @@ def feature_transform_engine( dsl.ConcatPlaceholder( items=['--dataflow_machine_type=', dataflow_machine_type] ), - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', - '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', dsl.ConcatPlaceholder( items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] ), @@ -1020,5 +1046,53 @@ def feature_transform_engine( ] ), ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_prediction_server_docker_uri=', + embedding_prediction_server_docker_uri, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_machine_type=', + embedding_batch_prediction_machine_type, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_accelerator_type=', + embedding_batch_prediction_accelerator_type, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_accelerator_count=', + embedding_batch_prediction_accelerator_count, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_starting_replica_count=', + embedding_batch_prediction_starting_replica_count, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_max_replica_count=', + embedding_batch_prediction_max_replica_count, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--embedding_batch_prediction_batch_size=', + embedding_batch_prediction_batch_size, + ] + ), + dsl.ConcatPlaceholder( + items=[ + '--encryption_spec_key_name=', + encryption_spec_key_name, + ] + ), ], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py index c749dd2f61..cb5044ed5f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -181,7 +181,7 @@ def tabnet_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -189,7 +189,7 @@ def tabnet_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml index 10076631fb..1ff0ae13ee 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -991,6 +991,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1247,7 +1302,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1768,7 +1823,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1776,25 +1831,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1820,7 +1875,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1897,37 +1952,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2410,7 +2465,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -3015,7 +3070,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3030,7 +3085,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3146,8 +3201,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3163,7 +3218,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3237,7 +3300,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-tabnet-study-spec-parameters: container: args: @@ -3753,7 +3816,7 @@ deploymentSpec: \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ \ {extra_override_str} were not found in the params and '\n 'will\ \ be ignored.'\n )\n\n return study_spec_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-model-batch-predict: container: args: @@ -4039,19 +4102,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -4097,7 +4160,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-tabnet-hyperparameter-tuning-job: container: args: @@ -4125,11 +4188,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", @@ -4198,7 +4261,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. name: automl-tabular-tabnet-hyperparameter-tuning-job diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py index 53956587d9..ee2456320f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -204,7 +204,7 @@ def tabnet_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -212,7 +212,7 @@ def tabnet_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml index 9cf550cd89..5a06291db8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -1,6 +1,8 @@ # PIPELINE DEFINITION # Name: automl-tabular-tabnet-trainer -# Description: The TabNet training pipeline. +# Description: Train a model using the Tabular Workflow for TabNet pipelines. +# TabNet uses sequential attention to choose which features to reason from at +# each decision step, promoting interpretability and more efficient learning. # Inputs: # alpha_focal_loss: float [Default: 0.25] # batch_momentum: float [Default: 0.95] @@ -1018,6 +1020,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1274,7 +1331,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1721,7 +1778,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1729,25 +1786,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1773,7 +1830,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1850,37 +1907,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2363,7 +2420,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -3082,7 +3139,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3097,7 +3154,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3213,8 +3270,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3230,7 +3287,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3519,19 +3584,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -3577,7 +3642,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-tabnet-trainer: container: args: @@ -3595,11 +3660,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230817_0125", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -3686,9 +3751,13 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 pipelineInfo: - description: The TabNet training pipeline. + description: 'Train a model using the Tabular Workflow for TabNet pipelines. + + TabNet uses sequential attention to choose which features to reason from at + + each decision step, promoting interpretability and more efficient learning.' name: automl-tabular-tabnet-trainer root: dag: diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py index 096c5e378c..97e6b370a1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py @@ -21,6 +21,7 @@ _EVALUATION_DATAFLOW_STARTING_NUM_WORKERS = 10 _EVALUATION_DATAFLOW_MAX_NUM_WORKERS = 100 _EVALUATION_DATAFLOW_DISK_SIZE_GB = 50 +_FEATURE_SELECTION_EXECUTION_ENGINE_BIGQUERY = 'bigquery' # Needed because we reference the AutoML Tabular V1 pipeline. _GCPC_STAGING_PATH = pathlib.Path( @@ -46,8 +47,6 @@ def _generate_model_display_name() -> str: return f'tabular-workflow-model-{uuid.uuid4()}' -# TODO(b/277393122): Once we finish L2L+FTE integration, add use_fte flag -# to signify FTE usage instead of the presence of num_selected_features. def _get_default_pipeline_params( project: str, location: str, @@ -109,6 +108,7 @@ def _get_default_pipeline_params( num_selected_features: Optional[int] = None, model_display_name: str = '', model_description: str = '', + enable_fte: bool = False, ) -> Dict[str, Any]: """Get the AutoML Tabular v1 default training pipeline. @@ -223,6 +223,7 @@ def _get_default_pipeline_params( enable_probabilistic_inference and run_distillation cannot be enabled. model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. + enable_fte: Whether to enable the Feature Transform Engine. Returns: Tuple of pipeline_definition_path and parameter_values. @@ -326,8 +327,29 @@ def _get_default_pipeline_params( } ) - # V1 pipeline without FTE - if num_selected_features is None: + if run_distillation: + distillation_parameters = { + 'distill_batch_predict_machine_type': ( + distill_batch_predict_machine_type + ), + 'distill_batch_predict_starting_replica_count': ( + distill_batch_predict_starting_replica_count + ), + 'distill_batch_predict_max_replica_count': ( + distill_batch_predict_max_replica_count + ), + 'run_distillation': run_distillation, + } + parameter_values.update( + { + param: value + for param, value in distillation_parameters.items() + if value is not None + } + ) + + # V1 pipeline + if not enable_fte: if not additional_experiments: additional_experiments = {} @@ -362,35 +384,8 @@ def _get_default_pipeline_params( 'apply_feature_selection_tuning': apply_feature_selection_tuning, }) - if run_distillation: - distillation_parameters = { - 'distill_batch_predict_machine_type': ( - distill_batch_predict_machine_type - ), - 'distill_batch_predict_starting_replica_count': ( - distill_batch_predict_starting_replica_count - ), - 'distill_batch_predict_max_replica_count': ( - distill_batch_predict_max_replica_count - ), - 'run_distillation': run_distillation, - } - parameter_values.update( - { - param: value - for param, value in distillation_parameters.items() - if value is not None - } - ) - # V2 pipeline (with FTE) else: - if run_distillation: - raise ValueError( - 'Distillation is currently not supported' - ' when num_selected_features is specified.' - ) - parameters = { 'num_selected_features': num_selected_features, 'dataset_level_custom_transformation_definitions': [], @@ -478,6 +473,7 @@ def get_automl_tabular_pipeline_and_parameters( num_selected_features: Optional[int] = None, model_display_name: str = '', model_description: str = '', + enable_fte: bool = False, ) -> Tuple[str, Dict[str, Any]]: """Get the AutoML Tabular v1 default training pipeline. @@ -589,6 +585,7 @@ def get_automl_tabular_pipeline_and_parameters( defaults to None, in which case all features are used. model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. + enable_fte: Whether to enable the Feature Transform Engine. Returns: Tuple of pipeline_definition_path and parameter_values. @@ -652,10 +649,11 @@ def get_automl_tabular_pipeline_and_parameters( num_selected_features=num_selected_features, model_display_name=model_display_name, model_description=model_description, + enable_fte=enable_fte, ) # V1 pipeline without FTE - if num_selected_features is None: + if not enable_fte: pipeline_definition_path = os.path.join( _GCPC_GA_TABULAR_PATH, 'automl_tabular_pipeline.yaml' ) @@ -3358,3 +3356,98 @@ def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( ) return pipeline_definition_path, parameter_values + + +def get_feature_selection_pipeline_and_parameters( + root_dir: str, + project: str, + location: str, + target_column: str, + prediction_type: str, + optimization_objective: str, + dataset_level_custom_transformation_definitions: Optional[ + List[Dict[str, Any]] + ] = None, + dataset_level_transformations: Optional[List[Dict[str, Any]]] = None, + run_feature_selection: Optional[bool] = None, + feature_selection_algorithm: Optional[str] = None, + feature_selection_execution_engine: Optional[ + str + ] = _FEATURE_SELECTION_EXECUTION_ENGINE_BIGQUERY, + max_selected_features: Optional[int] = None, + predefined_split_key: Optional[str] = None, + stratified_split_key: Optional[str] = None, + training_fraction: Optional[float] = None, + validation_fraction: Optional[float] = None, + test_fraction: Optional[float] = None, + tf_auto_transform_features: Optional[ + Union[List[str], Dict[str, List[str]]] + ] = None, + weight_column: Optional[str] = None, + data_source_csv_filenames: Optional[str] = None, + data_source_bigquery_table_path: Optional[str] = None, + bigquery_staging_full_dataset_id: Optional[str] = None, + dataflow_machine_type: Optional[str] = None, + dataflow_max_num_workers: Optional[int] = None, + dataflow_disk_size_gb: Optional[int] = None, + dataflow_subnetwork: Optional[str] = None, + dataflow_use_public_ips: Optional[bool] = None, + encryption_spec_key_name: Optional[str] = None, + stage_1_deadline_hours: Optional[float] = None, + stage_2_deadline_hours: Optional[float] = None, +): + """Returns feature transform engine pipeline and formatted parameters.""" + + if isinstance(tf_auto_transform_features, list): + tf_auto_transform_features = {'auto': tf_auto_transform_features} + + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), 'feature_selection_pipeline.yaml' + ) + + parameter_values = { + 'root_dir': root_dir, + 'project': project, + 'location': location, + 'target_column': target_column, + 'weight_column': weight_column, + 'prediction_type': prediction_type, + 'dataset_level_custom_transformation_definitions': ( + dataset_level_custom_transformation_definitions + if dataset_level_custom_transformation_definitions + else [] + ), + 'dataset_level_transformations': ( + dataset_level_transformations if dataset_level_transformations else [] + ), + 'run_feature_selection': run_feature_selection, + 'feature_selection_algorithm': feature_selection_algorithm, + 'feature_selection_execution_engine': feature_selection_execution_engine, + 'max_selected_features': max_selected_features, + 'predefined_split_key': predefined_split_key, + 'stratified_split_key': stratified_split_key, + 'training_fraction': training_fraction, + 'validation_fraction': validation_fraction, + 'test_fraction': test_fraction, + 'tf_auto_transform_features': tf_auto_transform_features, + 'optimization_objective': optimization_objective, + 'data_source_csv_filenames': data_source_csv_filenames, + 'data_source_bigquery_table_path': data_source_bigquery_table_path, + 'bigquery_staging_full_dataset_id': bigquery_staging_full_dataset_id, + 'dataflow_machine_type': dataflow_machine_type, + 'dataflow_max_num_workers': dataflow_max_num_workers, + 'dataflow_disk_size_gb': dataflow_disk_size_gb, + 'dataflow_subnetwork': dataflow_subnetwork, + 'dataflow_use_public_ips': dataflow_use_public_ips, + 'encryption_spec_key_name': encryption_spec_key_name, + 'stage_1_deadline_hours': stage_1_deadline_hours, + 'stage_2_deadline_hours': stage_2_deadline_hours, + } + + parameter_values = { + param: value + for param, value in parameter_values.items() + if value is not None + } + + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py index caa5ed2ab1..eeb22001f1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -181,7 +181,7 @@ def wide_and_deep_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -189,7 +189,7 @@ def wide_and_deep_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml index d4bc2cdf2f..65d9adf30a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -943,6 +943,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1199,7 +1254,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1706,7 +1761,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1714,25 +1769,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1758,7 +1813,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1835,37 +1890,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2243,7 +2298,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -2848,7 +2903,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2863,7 +2918,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -2979,8 +3034,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2996,7 +3051,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3070,7 +3133,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-wide-and-deep-study-spec-parameters: container: args: @@ -3396,19 +3459,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -3454,7 +3517,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -3499,7 +3562,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-wide-and-deep-hyperparameter-tuning-job: container: args: @@ -3527,11 +3590,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py index bc4b5d00fb..5cdd8cf9bf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -192,7 +192,7 @@ def wide_and_deep_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -200,7 +200,7 @@ def wide_and_deep_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml index 625b0b3b19..d7638c59f7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -1,6 +1,8 @@ # PIPELINE DEFINITION # Name: automl-tabular-wide-and-deep-trainer -# Description: The Wide & Deep training pipeline. +# Description: Train a model using the Tabular Workflow for Wide & Deep pipelines. +# Wide & Deep jointly trains wide linear models and deep neural networks. It +# combines the benefits of memorization and generalization. # Inputs: # batch_size: int [Default: 100.0] # beta_1: float [Default: 0.9] @@ -971,6 +973,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1227,7 +1284,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1674,7 +1731,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1682,25 +1739,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1726,7 +1783,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1803,37 +1860,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2211,7 +2268,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -2902,7 +2959,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2917,7 +2974,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3033,8 +3090,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3050,7 +3107,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3295,19 +3360,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -3353,7 +3418,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -3398,7 +3463,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-wide-and-deep-trainer: container: args: @@ -3416,11 +3481,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230817_0125", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -3460,7 +3525,11 @@ deploymentSpec: - google_cloud_pipeline_components.container.v1.custom_job.launcher image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 pipelineInfo: - description: The Wide & Deep training pipeline. + description: 'Train a model using the Tabular Workflow for Wide & Deep pipelines. + + Wide & Deep jointly trains wide linear models and deep neural networks. It + + combines the benefits of memorization and generalization.' name: automl-tabular-wide-and-deep-trainer root: dag: diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml index 1c37b19c30..93556638b1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -929,6 +929,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1185,7 +1240,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1815,7 +1870,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1823,25 +1878,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1867,7 +1922,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1944,37 +1999,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2352,7 +2407,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -2823,7 +2878,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2945,8 +3000,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2962,7 +3017,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3032,7 +3095,7 @@ deploymentSpec: \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ - \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230817_0125',\n\ + \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230910_1325',\n\ \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ @@ -3045,7 +3108,7 @@ deploymentSpec: \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ - \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230817_0125',\n\ + \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230910_1325',\n\ \ ],\n },\n }\n\n # Add optional arguments if set\n if\ \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ @@ -3135,7 +3198,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-prediction-type-for-xgboost: container: args: @@ -3740,19 +3803,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -3798,7 +3861,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -3843,7 +3906,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-xgboost-hyperparameter-tuning-job: container: args: diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml index 9101e59a3d..4222aa3987 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -1029,6 +1029,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1285,7 +1340,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -2079,7 +2134,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -2087,25 +2142,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has `google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2131,7 +2186,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -2208,37 +2263,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2616,7 +2671,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -3026,7 +3081,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3148,8 +3203,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3165,7 +3220,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3291,10 +3354,10 @@ deploymentSpec: \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ - \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230817_0125'\n\ + \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230910_1325'\n\ \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ - \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230817_0125',\n\ + \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230910_1325',\n\ \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ @@ -3574,19 +3637,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-split-materialized-data: container: args: @@ -3632,7 +3695,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 exec-training-configurator-and-validator: container: args: @@ -3677,7 +3740,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-xgboost-trainer: container: args: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml index 2a9cb6156a..dee5297a75 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml @@ -655,7 +655,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-create-dataset-2: container: args: @@ -690,7 +690,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -724,7 +724,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-query-job: container: args: @@ -785,7 +785,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-first-valid: container: args: @@ -815,7 +815,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-model-metadata: container: args: @@ -854,7 +854,7 @@ deploymentSpec: \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ \ options.time_series_id_column,\n options.time_series_data_column,\n\ \ options.horizon,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-table-location: container: args: @@ -890,7 +890,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-load-table-from-uri: container: args: @@ -931,7 +931,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-maybe-replace-with-default: container: args: @@ -959,7 +959,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-validate-inputs: container: args: @@ -1061,7 +1061,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 pipelineInfo: description: Forecasts using a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-prediction diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml index 6c1832bafe..fd5886e218 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -2162,6 +2162,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -2418,7 +2473,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -3521,7 +3576,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-create-dataset-2: container: args: @@ -3556,7 +3611,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-create-model-job: container: args: @@ -3616,7 +3671,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-list-rows: container: args: @@ -3654,7 +3709,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-list-rows-2: container: args: @@ -3692,7 +3747,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-query-job: container: args: @@ -3861,7 +3916,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-2: container: args: @@ -3895,7 +3950,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-3: container: args: @@ -3929,7 +3984,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-4: container: args: @@ -3963,7 +4018,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-5: container: args: @@ -3997,7 +4052,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-6: container: args: @@ -4031,7 +4086,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-serialized-query-parameters: container: args: @@ -4108,7 +4163,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-serialized-query-parameters-2: container: args: @@ -4185,7 +4240,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-serialized-query-parameters-3: container: args: @@ -4262,7 +4317,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-cond: container: args: @@ -4290,7 +4345,7 @@ deploymentSpec: \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ \ return true_str if predicate else false_str\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-create-metrics-artifact: container: args: @@ -4322,7 +4377,7 @@ deploymentSpec: \ 'MAPE': 'meanAbsolutePercentageError',\n }\n metrics = {metric_name_map[k]:\ \ v for k, v in dict(metrics_rows[0]).items()}\n evaluation_metrics.metadata\ \ = metrics\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-feature-transform-engine: container: args: @@ -4407,8 +4462,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -4424,7 +4479,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-get-fte-suffix: container: args: @@ -4458,7 +4521,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-table-location: container: args: @@ -4494,7 +4557,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-value: container: args: @@ -4521,7 +4584,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-window-query-priority: container: args: @@ -4551,7 +4614,7 @@ deploymentSpec: \ depending on the window number.\"\"\"\n if int(window['window_number'])\ \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-maybe-replace-with-default: container: args: @@ -4579,7 +4642,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-query-with-retry: container: args: @@ -4633,7 +4696,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-query-with-retry-2: container: args: @@ -4687,7 +4750,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-query-with-retry-3: container: args: @@ -4741,7 +4804,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri: container: args: @@ -4777,7 +4840,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -4813,7 +4876,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-validate-inputs: container: args: @@ -4915,7 +4978,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-wrapped-in-list: container: args: @@ -4942,7 +5005,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ \ in a list.\"\"\"\n return [value]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 pipelineInfo: description: Trains a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-train diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml index 3ccdd129b2..896d227869 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -901,7 +901,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -909,25 +909,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -953,7 +953,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -1030,37 +1030,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1442,7 +1442,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -1476,7 +1476,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-query-job: container: args: @@ -1564,7 +1564,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-build-job-configuration-query-2: container: args: @@ -1598,7 +1598,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-first-valid: container: args: @@ -1628,7 +1628,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-table-location: container: args: @@ -1664,7 +1664,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-table-location-2: container: args: @@ -1700,7 +1700,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-load-table-from-uri: container: args: @@ -1741,7 +1741,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-make-vertex-model-artifact: container: args: @@ -1771,7 +1771,7 @@ deploymentSpec: Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ \ f'/v1/{model_resource_name}')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-maybe-replace-with-default: container: args: @@ -1799,7 +1799,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-model-batch-predict: container: args: @@ -1884,7 +1884,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-table-to-uri-2: container: args: @@ -1920,7 +1920,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-validate-inputs: container: args: @@ -2022,7 +2022,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 pipelineInfo: description: Creates a batch prediction using a Prophet model. name: prophet-predict diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py index 5961bce1cc..6b91061171 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -131,17 +131,17 @@ def prophet_trainer( '"machine_spec": {"machine_type": "n1-standard-4"}, ', ( '"container_spec":' - ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", ' + ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", ' ), '"args": ["prophet_trainer", "', ( f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "' ), ( - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", "' + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "' ), ( - '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230817_0125", "' + '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230910_1325", "' ), '--artifacts_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml index aff359fcc6..89d1ddb5f3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -935,6 +935,61 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST + embedding_batch_prediction_accelerator_count: + defaultValue: -1.0 + description: 'The number of accelerators to + + use to generate the embeddings. Default is 0.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_accelerator_type: + defaultValue: accelerator_type_unspecified + description: 'The accelerator type to use to + + generate embeddings. If not provided, no accelerator is used. More + + details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' + isOptional: true + parameterType: STRING + embedding_batch_prediction_batch_size: + defaultValue: -1.0 + description: 'The batch size for embedding batch + + prediction job. Default = 1024.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_machine_type: + defaultValue: '' + description: 'The machine type to be + + used to run the embedding batch prediction job. If not provided, + + `n1-highmem-32` will be used. For more details, see: + + https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' + isOptional: true + parameterType: STRING + embedding_batch_prediction_max_replica_count: + defaultValue: -1.0 + description: 'The max replica count for + + embedding batch prediction job. Default = 50.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_batch_prediction_starting_replica_count: + defaultValue: -1.0 + description: 'The starting replica count + + for embedding batch prediction job. Default = 20.' + isOptional: true + parameterType: NUMBER_INTEGER + embedding_prediction_server_docker_uri: + defaultValue: '' + description: 'The docker image inside which to + + run the embedding models to generate embeddings.' + isOptional: true + parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1191,7 +1246,7 @@ components: parameterType: STRING run_distill: defaultValue: false - description: 'Whether the distillation should be applied + description: '(deprecated) Whether the distillation should be applied to the training.' isOptional: true @@ -1863,7 +1918,7 @@ components: Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -2198,7 +2253,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -2232,7 +2287,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-bigquery-query-job: container: args: @@ -2293,7 +2348,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-feature-transform-engine: container: args: @@ -2378,8 +2433,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2395,7 +2450,15 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125 + - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' + - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' + - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 exec-get-fte-suffix: container: args: @@ -2429,7 +2492,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-get-table-location: container: args: @@ -2465,7 +2528,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-model-evaluation-regression: container: args: @@ -2574,10 +2637,10 @@ deploymentSpec: ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": - {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125\", + {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325\", ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", - \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125\", - \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230817_0125\", + \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325\", + \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230910_1325\", \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", @@ -2641,7 +2704,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-validate-inputs: container: args: @@ -2743,7 +2806,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-wrapped-in-list: container: args: @@ -2770,7 +2833,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ \ in a list.\"\"\"\n return [value]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 pipelineInfo: description: Trains one Prophet model per time series. name: prophet-train diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml index f9b15715d9..583bec59ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -1,6 +1,7 @@ # PIPELINE DEFINITION # Name: automl-tabular -# Description: The AutoML Tabular pipeline v1. +# Description: Complete AutoML Tables pipeline. +# Includes feature engineering, architecture search, and hyper-parameter tuning. # Inputs: # additional_experiments: dict # cv_trainer_worker_pool_specs_override: list @@ -5223,7 +5224,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5231,25 +5232,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5275,7 +5276,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -5352,37 +5353,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5685,7 +5686,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5693,25 +5694,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5737,7 +5738,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -5814,37 +5815,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6147,7 +6148,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6155,25 +6156,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6199,7 +6200,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -6276,37 +6277,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6609,7 +6610,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6617,25 +6618,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6661,7 +6662,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -6738,37 +6739,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7071,7 +7072,7 @@ components: the given project a new dataset is created with name - `prediction__` where is made + ``prediction__`` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7079,25 +7080,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` + ``predictions``, and ``errors``. If the Model has both ``instance`` - and `prediction` schemata defined then the tables have columns as + and ``prediction`` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the + follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The `errors` table + Model''s instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ``google.rpc.Status` + column, which as values has ```google.rpc.Status`` `__ - represented as a STRUCT, and containing only `code` and + represented as a STRUCT, and containing only ``code`` and - `message`. For more details about this output config, see + ``message``. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7123,7 +7124,7 @@ components: provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource @@ -7200,37 +7201,37 @@ components: to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp + ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, + ``predictions_0001.``, ``predictions_0002.``, - ..., `predictions_N.` are created where `` + ..., ``predictions_N.`` are created where ```` - depends on chosen `predictions_format`, and N may equal 0001 and + depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined + the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed + ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., + ``errors_0001.``, ``errors_0002.``,..., - `errors_N.` files are created (N depends on total number + ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as + per their schema, followed by an additional ``error`` field which as - value has `google.rpc.Status` containing only `code` and + value has ``google.rpc.Status`` containing only ``code`` and - `message` fields. For more details about this output config, see + ``message`` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -8581,9 +8582,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8624,9 +8625,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8667,7 +8668,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8679,7 +8680,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8708,7 +8709,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8720,7 +8721,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8749,7 +8750,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8761,7 +8762,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8790,7 +8791,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -8805,7 +8806,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8814,7 +8815,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8823,7 +8824,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8843,9 +8844,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8890,9 +8891,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8937,7 +8938,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -8958,7 +8959,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -8989,7 +8990,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9010,7 +9011,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -10395,19 +10396,19 @@ deploymentSpec: \ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ \ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ \ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ - \ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ - \ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ - \ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ - \ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ - \ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ - \ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ - \ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ - \ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ - \ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ + \ input_config = dataset.metadata['inputConfig']\n if 'gcsSource'\ + \ in input_config:\n data_source_csv_filenames = ','.join(input_config['gcsSource']['uri'])\n\ + \ elif 'bigquerySource' in input_config:\n data_source_bigquery_table_path\ + \ = input_config['bigquerySource']['uri']\n elif data_source_csv_filenames:\n\ + \ pass\n elif data_source_bigquery_table_path:\n pass\n else:\n\ + \ raise ValueError(\n 'One of vertex_dataset, data_source_csv_filenames,'\n\ + \ ' data_source_bigquery_table_path must be specified'\n )\n\n\ + \ return collections.namedtuple(\n 'Outputs',\n [\n \ + \ 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 exec-string-not-empty: container: args: @@ -10454,7 +10455,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10487,7 +10488,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", @@ -10586,7 +10587,9 @@ deploymentSpec: \n" image: python:3.7 pipelineInfo: - description: The AutoML Tabular pipeline v1. + description: 'Complete AutoML Tables pipeline. + + Includes feature engineering, architecture search, and hyper-parameter tuning.' name: automl-tabular root: dag: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py index 448f1187a9..1c17a6fc8c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -105,11 +105,11 @@ def automl_tabular_cv_trainer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["l2l_cv_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', ( f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' ' "--training_base_dir=' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py index 6e7f0eaeeb..15d1a5463a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -113,7 +113,7 @@ def automl_tabular_ensemble( ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["ensemble", "--transform_output_path=', transform_output.uri, '", "--model_output_path=', @@ -144,7 +144,7 @@ def automl_tabular_ensemble( '", "--warmup_data=', warmup_data.uri, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', '", "--model_path=', model.uri, '", "--custom_model_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py index 350da2b07b..c6ecce48be 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -73,7 +73,7 @@ def automl_tabular_finalizer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["cancel_l2l_tuner", "--error_file_path=', root_dir, ( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py index ca147dd5fa..fd38a7e298 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -33,7 +33,7 @@ def automl_tabular_infra_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', command=[], args=['--executor_input', '{{$}}'], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py index b0175154b6..8b8af9e135 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -52,7 +52,7 @@ def split_materialized_data( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', command=[ 'sh', '-ec', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py index 8e0c9a7c60..24a25fa05c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -122,11 +122,11 @@ def automl_tabular_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "--feature_selection_result_path=', feature_ranking.uri, '", "--disable_early_stopping=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py index 6013e0d8d1..73ee939b0d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -174,7 +174,7 @@ def tabular_stats_and_example_gen( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', '", "args": ["stats_generator",', '"--train_spec={\\"prediction_type\\": \\"', prediction_type, @@ -253,7 +253,7 @@ def tabular_stats_and_example_gen( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py index 7c42727ac1..62fae23365 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -144,7 +144,7 @@ def training_configurator_and_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230817_0125', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', command=[], args=[ 'training_configurator_and_validator', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py index af5542192d..0bef5c4d66 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -116,7 +116,7 @@ def automl_tabular_transform( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', ( '", "args": ["transform", "--is_mp=true",' ' "--transform_output_artifact_path=' @@ -175,7 +175,7 @@ def automl_tabular_transform( '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', '", "--dataflow_disk_size_gb=', dataflow_disk_size_gb, '", "--dataflow_subnetwork_fully_qualified=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py index 2c19976e47..6889db79ae 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py @@ -651,6 +651,9 @@ def get_automl_tabular_pipeline_and_parameters( _GCPC_PREVIEW_TABULAR_PATH, 'automl_tabular_v2_pipeline.yaml', ) + # V2 pipeline requires execution engine to be set. + if 'tf_transform_execution_engine' not in parameter_values: + parameter_values['tf_transform_execution_engine'] = 'dataflow' return pipeline_definition_path, parameter_values From 1fbdeb2abfba900a2492e58099b15fa071eebcd6 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 14 Sep 2023 09:50:24 -0700 Subject: [PATCH 149/253] chore(components): fix GCPC markdown docstrings rendering PiperOrigin-RevId: 565397565 --- components/google-cloud/docs/source/conf.py | 5 +- .../preview/automl/forecasting/__init__.py | 1 - .../forecasting/forecasting_ensemble.py | 17 +- .../forecasting/forecasting_stage_1_tuner.py | 26 +- .../forecasting/forecasting_stage_2_tuner.py | 25 +- .../preview/automl/tabular/__init__.py | 1 - .../tabular/auto_feature_engineering.py | 3 +- ...illation_stage_feature_transform_engine.py | 63 +- .../automl/tabular/feature_selection.py | 37 +- .../tabular/feature_transform_engine.py | 678 +++++------------- .../tabnet_hyperparameter_tuning_job.py | 50 +- .../preview/automl/tabular/tabnet_trainer.py | 88 +-- .../preview/automl/tabular/utils.py | 97 ++- ...wide_and_deep_hyperparameter_tuning_job.py | 50 +- .../automl/tabular/wide_and_deep_trainer.py | 76 +- .../xgboost_hyperparameter_tuning_job.py | 33 +- .../preview/automl/tabular/xgboost_trainer.py | 4 +- .../dataflow/flex_template/component.py | 123 +--- .../preview/llm/infer/component.py | 36 +- .../preview/llm/rlhf/component.py | 78 +- .../model_evaluation/data_bias_component.py | 58 +- .../evaluation_llm_classification_pipeline.py | 87 +-- ...evaluation_llm_text_generation_pipeline.py | 68 +- .../feature_attribution_component.py | 61 +- .../feature_attribution_graph_component.py | 138 +--- .../model_evaluation/model_bias_component.py | 62 +- .../preview/model_evaluation/utils.py | 8 +- .../types/artifact_types.py | 113 ++- .../v1/automl/forecasting/__init__.py | 1 - .../v1/automl/forecasting/prophet_trainer.py | 59 +- .../v1/automl/forecasting/utils.py | 12 +- .../v1/automl/tabular/__init__.py | 1 - .../v1/automl/tabular/cv_trainer.py | 15 +- .../v1/automl/tabular/ensemble.py | 18 +- .../v1/automl/tabular/finalizer.py | 4 +- .../v1/automl/tabular/infra_validator.py | 4 +- .../automl/tabular/split_materialized_data.py | 1 - .../v1/automl/tabular/stage_1_tuner.py | 31 +- .../automl/tabular/stats_and_example_gen.py | 74 +- .../training_configurator_and_validator.py | 100 +-- .../v1/automl/tabular/transform.py | 22 +- .../v1/automl/tabular/utils.py | 59 +- .../v1/automl/training_job/__init__.py | 5 +- .../component.py | 293 ++------ .../automl_image_training_job/component.py | 183 +---- .../automl_tabular_training_job/component.py | 230 +----- .../automl_text_training_job/component.py | 101 +-- .../automl_video_training_job/component.py | 107 +-- .../v1/batch_predict_job/__init__.py | 2 + .../v1/batch_predict_job/component.py | 233 +----- .../v1/bigquery/__init__.py | 2 + .../v1/bigquery/create_model/component.py | 27 +- .../detect_anomalies_model/component.py | 65 +- .../v1/bigquery/drop_model/component.py | 25 +- .../v1/bigquery/evaluate_model/component.py | 56 +- .../explain_forecast_model/component.py | 49 +- .../explain_predict_model/component.py | 70 +- .../v1/bigquery/export_model/component.py | 23 +- .../bigquery/feature_importance/component.py | 41 +- .../v1/bigquery/forecast_model/component.py | 48 +- .../v1/bigquery/global_explain/component.py | 20 +- .../bigquery/ml_advanced_weights/component.py | 33 +- .../ml_arima_coefficients/component.py | 34 +- .../bigquery/ml_arima_evaluate/component.py | 48 +- .../v1/bigquery/ml_centroids/component.py | 46 +- .../bigquery/ml_confusion_matrix/component.py | 44 +- .../v1/bigquery/ml_feature_info/component.py | 33 +- .../ml_principal_component_info/component.py | 43 +- .../ml_principal_components/component.py | 44 +- .../v1/bigquery/ml_recommend/component.py | 48 +- .../ml_reconstruction_loss/component.py | 51 +- .../v1/bigquery/ml_roc_curve/component.py | 44 +- .../v1/bigquery/ml_training_info/component.py | 34 +- .../v1/bigquery/ml_trial_info/component.py | 38 +- .../v1/bigquery/ml_weights/component.py | 34 +- .../v1/bigquery/predict_model/component.py | 56 +- .../v1/bigquery/query_job/component.py | 44 +- .../v1/custom_job/__init__.py | 2 + .../v1/custom_job/component.py | 62 +- .../v1/custom_job/utils.py | 118 +-- .../v1/dataflow/__init__.py | 2 + .../v1/dataflow/python_job/component.py | 12 +- .../v1/dataproc/__init__.py | 2 + .../create_pyspark_batch/component.py | 54 +- .../dataproc/create_spark_batch/component.py | 54 +- .../create_spark_r_batch/component.py | 44 +- .../create_spark_sql_batch/component.py | 40 +- .../v1/dataset/__init__.py | 2 + .../dataset/create_image_dataset/component.py | 52 +- .../create_tabular_dataset/component.py | 30 +- .../dataset/create_text_dataset/component.py | 54 +- .../create_time_series_dataset/component.py | 30 +- .../dataset/create_video_dataset/component.py | 53 +- .../dataset/export_image_dataset/component.py | 13 +- .../export_tabular_dataset/component.py | 13 +- .../dataset/export_text_dataset/component.py | 13 +- .../export_time_series_dataset/component.py | 13 +- .../dataset/export_video_dataset/component.py | 13 +- .../dataset/import_image_dataset/component.py | 26 +- .../dataset/import_text_dataset/component.py | 28 +- .../dataset/import_video_dataset/component.py | 28 +- .../v1/endpoint/__init__.py | 2 + .../v1/endpoint/create_endpoint/component.py | 35 +- .../v1/endpoint/delete_endpoint/component.py | 4 +- .../v1/endpoint/deploy_model/component.py | 99 +-- .../v1/endpoint/undeploy_model/component.py | 12 +- .../v1/forecasting/__init__.py | 2 + .../prepare_data_for_train/component.py | 38 +- .../v1/forecasting/preprocess/component.py | 3 +- .../v1/hyperparameter_tuning_job/__init__.py | 3 +- .../v1/hyperparameter_tuning_job/component.py | 114 +-- .../v1/hyperparameter_tuning_job/utils.py | 35 +- .../v1/model/__init__.py | 2 + .../v1/model/delete_model/component.py | 6 +- .../v1/model/export_model/component.py | 40 +- .../v1/model/upload_model/component.py | 50 +- .../v1/model_evaluation/__init__.py | 1 - .../classification_component.py | 134 +--- .../error_analysis_pipeline.py | 116 +-- .../evaluated_annotation_pipeline.py | 103 +-- ...ml_tabular_feature_attribution_pipeline.py | 452 +++--------- .../evaluation_automl_tabular_pipeline.py | 393 ++-------- ...uation_automl_unstructure_data_pipeline.py | 416 ++--------- ...evaluation_feature_attribution_pipeline.py | 471 +++--------- .../model_evaluation/forecasting_component.py | 96 +-- .../model_evaluation/regression_component.py | 84 +-- .../v1/vertex_notification_email/component.py | 6 +- .../v1/wait_gcp_resources/__init__.py | 2 + .../v1/wait_gcp_resources/component.py | 23 +- 129 files changed, 1731 insertions(+), 6510 deletions(-) diff --git a/components/google-cloud/docs/source/conf.py b/components/google-cloud/docs/source/conf.py index d8d574af93..3f2f7de1c4 100644 --- a/components/google-cloud/docs/source/conf.py +++ b/components/google-cloud/docs/source/conf.py @@ -328,6 +328,7 @@ def remove_after_returns_in_place(lines: List[str]) -> bool: return False def process_named_docstring_returns(app, what, name, obj, options, lines): + markdown_to_rst(lines) if getattr(obj, '_is_component', False): has_returns_section = remove_after_returns_in_place(lines) if has_returns_section: @@ -335,10 +336,8 @@ def process_named_docstring_returns(app, what, name, obj, options, lines): lines.extend([':returns:', '']) lines.extend(returns_section) - markdown_to_rst(app, what, name, obj, options, lines) - -def markdown_to_rst(app, what, name, obj, options, lines): +def markdown_to_rst(lines: List[str]) -> List[str]: md = '\n'.join(lines) ast = commonmark.Parser().parse(md) rst = commonmark.ReStructuredTextRenderer().render(ast) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py index befa20f9ad..e6535b039a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Experimental AutoML forecasting components.""" from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py index 4cf088feaf..8782c6d880 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Forecasting Ensemble component spec.""" from typing import Optional @@ -54,20 +53,14 @@ def automl_forecasting_ensemble( root_dir: The Cloud Storage path to store the output. transform_output: The transform output artifact. metadata: The tabular example gen metadata. - tuning_result_input: AutoML Tabular tuning - result. - instance_baseline: The instance baseline - used to calculate explanations. - instance_schema_path: The path to the instance schema, - describing the input data for the tf_model at serving time. + tuning_result_input: AutoML Tabular tuning result. + instance_baseline: The instance baseline used to calculate explanations. + instance_schema_path: The path to the instance schema, describing the input data for the tf_model at serving time. encryption_spec_key_name: Customer-managed encryption key. - prediction_image_uri: URI of the Docker image to be used as the - container for serving predictions. This URI must identify an image in - Artifact Registry or Container Registry. + prediction_image_uri: URI of the Docker image to be used as the container for serving predictions. This URI must identify an image in Artifact Registry or Container Registry. Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. model_architecture: The architecture of the output model. unmanaged_container_model: Model information needed to perform batch prediction. explanation_metadata: The explanation metadata used by Vertex online and batch explanations. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py index cd39d4d6e6..ea34e5bcb6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Forecasting Stage 1 Tuner component spec.""" from typing import Optional @@ -50,32 +49,21 @@ def automl_forecasting_stage_1_tuner( project: Project to run hyperparameter tuning. location: Location for running the hyperparameter tuning. root_dir: The Cloud Storage location to store the output. - study_spec_parameters_override: JSON study spec. E.g., - [{"parameter_id": "activation","categorical_value_spec": {"values": - ["tanh"]}}] - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - reduce_search_space_mode: The reduce search space mode. Possible - values: "regular" (default), "minimal", "full". - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - deadline_hours: Number of hours the hyperparameter tuning should - run. + study_spec_parameters_override: JSON study spec. E.g., [{"parameter_id": "activation","categorical_value_spec": {"values": ["tanh"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}] + reduce_search_space_mode: The reduce search space mode. Possible values: "regular" (default), "minimal", "full". + num_selected_trials: Number of selected trials. The number of weak learners in the final model is 5 * num_selected_trials. + deadline_hours: Number of hours the hyperparameter tuning should run. num_parallel_trials: Number of parallel training trials. single_run_max_secs: Max number of seconds each training trial runs. metadata: The tabular example gen metadata. transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. + materialized_train_split: The materialized train split. materialized_eval_split: The materialized eval split. encryption_spec_key_name: Customer-managed encryption key. Returns: - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. tuning_result_output: The trained model and architectures. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py index ff96d9215f..361dce3d06 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Forecasting Stage 2 Tuner component spec.""" from typing import Optional @@ -49,30 +48,20 @@ def automl_forecasting_stage_2_tuner( project: Project to run stage 2 tuner. location: Cloud region for running the component: us-central1). root_dir: The Cloud Storage location to store the output. - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] - num_selected_trials: Number of selected trials. The number of weak - learners in the final model. - deadline_hours: Number of hours the cross-validation trainer - should run. + worker_pool_specs_override_json: JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}] + num_selected_trials: Number of selected trials. The number of weak learners in the final model. + deadline_hours: Number of hours the cross-validation trainer should run. num_parallel_trials: Number of parallel training trials. single_run_max_secs: Max number of seconds each training trial runs. - metadata: The forecasting example gen - metadata. + metadata: The forecasting example gen metadata. transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. + materialized_train_split: The materialized train split. materialized_eval_split: The materialized eval split. encryption_spec_key_name: Customer-managed encryption key. - tuning_result_input_path: Path to the json of hyperparameter - tuning results to use when evaluating models. + tuning_result_input_path: Path to the json of hyperparameter tuning results to use when evaluating models. Returns: - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. tuning_result_output: The trained (private) model artifact paths and their hyperparameters. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py index 4268da69ff..39db8e0e17 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Preview AutoML tabular components.""" import os diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py index 2ac6fed1b0..d23a97d116 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Auto Feature Engineering component spec.""" from typing import Optional @@ -34,7 +33,7 @@ def automated_feature_engineering( bigquery_staging_full_dataset_id: Optional[str] = '', materialized_examples_format: Optional[str] = 'tfrecords_gzip', ): - """find the top features from the dataset.""" + """Find the top features from the dataset.""" # fmt: off return dsl.ContainerSpec( image='gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py index 574c05e4ec..eda3503d90 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py @@ -51,53 +51,26 @@ def distillation_stage_feature_transform_engine( transform the input datasets with predicted outputs included (soft targets). Args: - root_dir (str): The Cloud Storage location to store the output. - project (str): Project to run feature transform engine. - location (str): Location for the created GCP services. - transform_config_path (str): Path to the transform config output by the - pre-distillation FTE component. - bigquery_train_full_table_uri (str): BigQuery full table id for our - train split output by pre-distillation FTE with soft target included. - bigquery_validate_full_table_uri (str): BigQuery full table id for our - validation split output by pre-distillation FTE with soft target - included. - target_column (str): Target column of input data. - prediction_type (str): Model prediction type. One of - "classification", "regression", "time_series". - bigquery_staging_full_dataset_id (Optional[str]): Dataset in - 'projectId.datasetId' format for storing intermediate-FTE BigQuery - tables. If the specified dataset does not exist in BigQuery, FTE will - create the dataset. If no bigquery_staging_full_dataset_id is specified, - all intermediate tables will be stored in a dataset created under the - provided project in the input data source's location during FTE - execution called - 'vertex_feature_transform_engine_staging_{location.replace('-', '_')}'. - All tables generated by FTE will have a 30 day TTL. - weight_column (Optional[str]): Weight column of input data. - dataflow_machine_type (Optional[str]): The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers (Optional[int]): The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb (Optional[int]): The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork (Optional[str]): Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips (Optional[bool]): Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account (Optional[str]): Custom service account to run - Dataflow jobs. - encryption_spec_key_name (Optional[str]): Customer-managed encryption key. + root_dir: The Cloud Storage location to store the output. + project: Project to run feature transform engine. + location: Location for the created GCP services. + transform_config_path: Path to the transform config output by the pre-distillation FTE component. + bigquery_train_full_table_uri: BigQuery full table id for our train split output by pre-distillation FTE with soft target included. + bigquery_validate_full_table_uri: BigQuery full table id for our validation split output by pre-distillation FTE with soft target included. + target_column: Target column of input data. prediction_type (str): Model prediction type. One of "classification", "regression", "time_series". + bigquery_staging_full_dataset_id: Dataset in 'projectId.datasetId' format for storing intermediate-FTE BigQuery tables. If the specified dataset does not exist in BigQuery, FTE will create the dataset. If no bigquery_staging_full_dataset_id is specified, all intermediate tables will be stored in a dataset created under the provided project in the input data source's location during FTE execution called 'vertex_feature_transform_engine_staging_{location.replace('-', '_')}'. All tables generated by FTE will have a 30 day TTL. + weight_column: Weight column of input data. + dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips (Optional[bool]): Specifies whether Dataflow workers use public IP addresses. + dataflow_service_account: Custom service account to run Dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. Returns: - materialized_data (Dataset): - The materialized dataset. - transform_output (TransformOutput): - The transform output artifact. - gcp_resources (str): - GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + materialized_data: The materialized dataset. + transform_output: The transform output artifact. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py index 0c703ac517..5478275e09 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Feature Ranking and Selection component spec.""" from typing import Optional @@ -51,37 +50,21 @@ def tabular_feature_ranking_and_selection( Args: project: Project to run feature selection. - location: Location for running the feature selection. If not set, - default to us-central1. + location: Location for running the feature selection. If not set, default to us-central1. root_dir: The Cloud Storage location to store the output. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. - encryption_spec_key_name: Customer-managed encryption key. - If this is set, then all resources will be encrypted with the provided - encryption key. data_source(Dataset): The input dataset artifact which - references csv, BigQuery, or TF Records. target_column_name(str): Target - column name of the input dataset. - max_selected_features: number of features to select by the - algorithm. If not set, default to 1000. + dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + dataflow_service_account: Custom service account to run dataflow jobs. + encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources will be encrypted with the provided encryption key. data_source(Dataset): The input dataset artifact which references csv, BigQuery, or TF Records. target_column_name(str): Target column name of the input dataset. + max_selected_features: number of features to select by the algorithm. If not set, default to 1000. Returns: feature_ranking: the dictionary of feature names and feature ranking values. selected_features: A json array of selected feature names. - gcp_resources: GCP resources created by this component. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py index d75bd80748..48741880e4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Feature Transform Engine component spec.""" from typing import Optional @@ -113,116 +112,52 @@ def feature_transform_engine( root_dir: The Cloud Storage location to store the output. project: Project to run feature transform engine. location: Location for the created GCP services. - dataset_level_custom_transformation_definitions: List of dataset-level custom transformation definitions. Custom, - bring-your-own dataset-level transform functions, where users can define - and import their own transform function and use it with FTE's built-in - transformations. Using custom transformations is an experimental feature - and it is currently not supported during batch prediction. - Example: .. code-block:: python [ { "transformation": "ConcatCols", - "module_path": "/path/to/custom_transform_fn_dlt.py", - "function_name": "concat_cols" } ] Using custom transform function - together with FTE's built-in transformations: .. code-block:: - python [ { "transformation": "Join", "right_table_uri": - "bq://test-project.dataset_test.table", "join_keys": - [["join_key_col", "join_key_col"]] },{ "transformation": - "ConcatCols", "cols": ["feature_1", "feature_2"], "output_col": - "feature_1_2" } ] - dataset_level_transformations: List of dataset-level - transformations. - Example: .. code-block:: python [ { "transformation": "Join", - "right_table_uri": "bq://test-project.dataset_test.table", - "join_keys": [["join_key_col", "join_key_col"]] }, ... ] Additional - information about FTE's currently supported built-in + dataset_level_custom_transformation_definitions: List of dataset-level custom transformation definitions. Custom, bring-your-own dataset-level transform functions, where users can define and import their own transform function and use it with FTE's built-in transformations. Using custom transformations is an experimental feature and it is currently not supported during batch prediction. + + [ { "transformation": "ConcatCols", "module_path": "/path/to/custom_transform_fn_dlt.py", "function_name": "concat_cols" } ] Using custom transform function together with FTE's built-in transformations: .. code-block:: python [ { "transformation": "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": [["join_key_col", "join_key_col"]] },{ "transformation": "ConcatCols", "cols": ["feature_1", "feature_2"], "output_col": "feature_1_2" } ] + + dataset_level_transformations: List of dataset-level transformations. + + [ { "transformation": "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": [["join_key_col", "join_key_col"]] }, ... ] Additional information about FTE's currently supported built-in transformations: - Join: Joins features from right_table_uri. For each join key, the - left table keys will be included and the right table keys will - be dropped. - Example: .. code-block:: python { "transformation": "Join", - "right_table_uri": "bq://test-project.dataset_test.table", - "join_keys": [["join_key_col", "join_key_col"]] } - Arguments: - right_table_uri: Right table BigQuery uri to join - with input_full_table_id. - join_keys: Features to join on. For each - nested list, the first element is a left table column - and the second is its corresponding right table column. - TimeAggregate: Creates a new feature composed of values of an - existing feature from a fixed time period ago or in the future. - Ex: A feature for sales by store 1 year ago. - Example: .. code-block:: python { "transformation": - "TimeAggregate", "time_difference": 40, - "time_difference_units": "DAY", - "time_series_identifier_columns": ["store_id"], - "time_column": "time_col", "time_difference_target_column": - "target_col", "output_column": "output_col" } - Arguments: - time_difference: Number of time_difference_units to - look back or into the future on our - time_difference_target_column. - time_difference_units: Units of time_difference to - look back or into the future on our - time_difference_target_column. Must be one of * 'DAY' * - 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * - 'YEAR' - time_series_identifier_columns: Names of the - time series identifier columns. - time_column: Name of the time column. - time_difference_target_column: Column we wish to get - the value of time_difference time_difference_units in - the past or future. - output_column: Name of our new time aggregate - feature. - is_future: Whether we wish to look - forward in time. Defaults to False. - PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum: - Performs a partition by reduce operation (one of max, - min, avg, or sum) with a fixed historic time period. Ex: - Getting avg sales (the reduce column) for each store - (partition_by_column) over the previous 5 days - (time_column, time_ago_units, and time_ago). - Example: .. code-block:: python { "transformation": - "PartitionByMax", "reduce_column": "sell_price", - "partition_by_columns": ["store_id", "state_id"], - "time_column": "date", "time_ago": 1, "time_ago_units": - "WEEK", "output_column": "partition_by_reduce_max_output" } - Arguments: - reduce_column: Column to apply the reduce operation - on. Reduce operations include the - following: Max, Min, Avg, Sum. - partition_by_columns: List of columns to - partition by. - time_column: Time column for the partition by - operation's window function. - time_ago: Number of time_ago_units to look back on - our target_column, starting from time_column - (inclusive). - time_ago_units: Units of time_ago to look back on - our target_column. Must be one of * 'DAY' * 'WEEK' - output_column: Name of our output feature. + Join: Joins features from right_table_uri. For each join key, the left table keys will be included and the right table keys will be dropped. + Example: .. code-block:: python { "transformation": "Join", "right_table_uri": "bq://test-project.dataset_test.table", "join_keys": [["join_key_col", "join_key_col"]] } + Arguments: + right_table_uri: Right table BigQuery uri to join with input_full_table_id. + join_keys: Features to join on. For each nested list, the first element is a left table column and the second is its corresponding right table column. + TimeAggregate: Creates a new feature composed of values of an existing feature from a fixed time period ago or in the future. + Ex: A feature for sales by store 1 year ago. + Example: .. code-block:: python { "transformation": "TimeAggregate", "time_difference": 40, "time_difference_units": "DAY", "time_series_identifier_columns": ["store_id"], "time_column": "time_col", "time_difference_target_column": "target_col", "output_column": "output_col" } + Arguments: + time_difference: Number of time_difference_units to look back or into the future on our time_difference_target_column. + time_difference_units: Units of time_difference to look back or into the future on our time_difference_target_column. Must be one of * 'DAY' * 'WEEK' (Equivalent to 7 DAYs) * 'MONTH' * 'QUARTER' * 'YEAR' + time_series_identifier_columns: Names of the time series identifier columns. + time_column: Name of the time column. + time_difference_target_column: Column we wish to get the value of time_difference time_difference_units in the past or future. + output_column: Name of our new time aggregate feature. + is_future: Whether we wish to look forward in time. Defaults to False. PartitionByMax/PartitionByMin/PartitionByAvg/PartitionBySum: Performs a partition by reduce operation (one of max, min, avg, or sum) with a fixed historic time period. Ex: Getting avg sales (the reduce column) for each store (partition_by_column) over the previous 5 days (time_column, time_ago_units, and time_ago). + Example: .. code-block:: python { "transformation": "PartitionByMax", "reduce_column": "sell_price", "partition_by_columns": ["store_id", "state_id"], "time_column": "date", "time_ago": 1, "time_ago_units": "WEEK", "output_column": "partition_by_reduce_max_output" } + Arguments: + reduce_column: Column to apply the reduce operation on. Reduce operations include the + following: Max, Min, Avg, Sum. + partition_by_columns: List of columns to partition by. + time_column: Time column for the partition by operation's window function. + time_ago: Number of time_ago_units to look back on our target_column, starting from time_column (inclusive). + time_ago_units: Units of time_ago to look back on our target_column. Must be one of * 'DAY' * 'WEEK' + output_column: Name of our output feature. + forecasting_time_column: Forecasting time column. - forecasting_time_series_identifier_column: - [Deprecated] A forecasting time series identifier column. Raises an - exception if used - use the "time_series_identifier_column" field - instead. - forecasting_time_series_identifier_columns: - The list of forecasting time series identifier columns. - forecasting_time_series_attribute_columns: Forecasting - time series attribute columns. - forecasting_unavailable_at_forecast_columns: Forecasting - unavailable at forecast columns. - forecasting_available_at_forecast_columns: Forecasting - available at forecast columns. + forecasting_time_series_identifier_column: [Deprecated] A forecasting time series identifier column. Raises an exception if used - use the "time_series_identifier_column" field instead. + forecasting_time_series_identifier_columns: The list of forecasting time series identifier columns. + forecasting_time_series_attribute_columns: Forecasting time series attribute columns. + forecasting_unavailable_at_forecast_columns: Forecasting unavailable at forecast columns. + forecasting_available_at_forecast_columns: Forecasting available at forecast columns. forecasting_forecast_horizon: Forecasting horizon. forecasting_context_window: Forecasting context window. forecasting_predefined_window_column: Forecasting predefined window column. forecasting_window_stride_length: Forecasting window stride length. forecasting_window_max_count: Forecasting window max count. - forecasting_holiday_regions: The geographical region based on which the - holiday effect is applied in modeling by adding holiday categorical - array feature that include all holidays matching the date. This option - only allowed when data granularity is day. By default, holiday effect - modeling is disabled. To turn it on, specify the holiday region using - this option. + forecasting_holiday_regions: The geographical region based on which the holiday effect is applied in modeling by adding holiday categorical array feature that include all holidays matching the date. This option only allowed when data granularity is day. By default, holiday effect modeling is disabled. To turn it on, specify the holiday region using this option. Top level: * 'GLOBAL' Second level: continental regions: * 'NA': North America * 'JAPAC': Japan and Asia Pacific @@ -244,424 +179,145 @@ def feature_transform_engine( training_fraction: Fraction of input data for training. validation_fraction: Fraction of input data for validation. test_fraction: Fraction of input data for testing. - stats_gen_execution_engine: Execution engine to perform - statistics generation. Can be one of: "dataflow" (by default) or - "bigquery". Using "bigquery" as the execution engine is experimental. - tf_transform_execution_engine: Execution engine to perform - row-level TF transformations. Can be one of: "dataflow" (by default) or - "bigquery". Using "bigquery" as the execution engine is experimental and - is for allowlisted customers only. In addition, executing on "bigquery" - only supports auto transformations (i.e., specified by - tf_auto_transform_features) and will raise an error when - tf_custom_transformation_definitions or tf_transformations_path is set. - tf_auto_transform_features: Dict mapping auto and/or type-resolutions to - TF transform features. FTE will automatically configure a set of - built-in transformations for each feature based on its data statistics. - If users do not want auto type resolution, but want the set of - transformations for a given type to be automatically generated, they - may specify pre-resolved transformations types. The following type hint - dict keys are supported: * 'auto' * 'categorical' * 'numeric' * 'text' - * 'timestamp' - Example: .. code-block:: python { "auto": ["feature1"], - "categorical": ["feature2", "feature3"], } Note that the target and - weight column may not be included as an auto transformation unless - users are running forecasting. - tf_custom_transformation_definitions: List of - TensorFlow-based custom transformation definitions. Custom, - bring-your-own transform functions, where users can define and import - their own transform function and use it with FTE's built-in - transformations. - Example: .. code-block:: python [ { "transformation": "PlusOne", - "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" }, { "transformation": - "MultiplyTwo", "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "multiply_two_transform" } ] Using custom - transform function together with FTE's built-in transformations: .. - code-block:: python [ { "transformation": "CastToFloat", - "input_columns": ["feature_1"], "output_columns": ["feature_1"] },{ - "transformation": "PlusOne", "input_columns": ["feature_1"] - "output_columns": ["feature_1_plused_one"] },{ "transformation": - "MultiplyTwo", "input_columns": ["feature_1"] "output_columns": - ["feature_1_multiplied_two"] } ] - tf_transformations_path: Path to TensorFlow-based - transformation configuration. Path to a JSON file used to specified - FTE's TF transformation configurations. In the following, we provide - some sample transform configurations to demonstrate FTE's capabilities. - All transformations on input columns are explicitly specified with FTE's - built-in transformations. Chaining of multiple transformations on a - single column is also supported. For example: .. code-block:: python [ - { "transformation": "ZScale", "input_columns": ["feature_1"] }, { - "transformation": "ZScale", "input_columns": ["feature_2"] } ] - Additional information about FTE's currently supported built-in + stats_gen_execution_engine: Execution engine to perform statistics generation. Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the execution engine is experimental. + tf_transform_execution_engine: Execution engine to perform row-level TF transformations. Can be one of: "dataflow" (by default) or "bigquery". Using "bigquery" as the execution engine is experimental and is for allowlisted customers only. In addition, executing on "bigquery" only supports auto transformations (i.e., specified by tf_auto_transform_features) and will raise an error when tf_custom_transformation_definitions or tf_transformations_path is set. + tf_auto_transform_features: Dict mapping auto and/or type-resolutions to TF transform features. FTE will automatically configure a set of built-in transformations for each feature based on its data statistics. If users do not want auto type resolution, but want the set of transformations for a given type to be automatically generated, they may specify pre-resolved transformations types. The following type hint dict keys are supported: * 'auto' * 'categorical' * 'numeric' * 'text' * 'timestamp' Example: `{ "auto": ["feature1"], "categorical": ["feature2", "feature3"], }`. Note that the target and weight column may not be included as an auto transformation unless users are running forecasting. + tf_custom_transformation_definitions: List of TensorFlow-based custom transformation definitions. Custom, bring-your-own transform functions, where users can define and import their own transform function and use it with FTE's built-in transformations. `[ { "transformation": "PlusOne", "module_path": "gs://bucket/custom_transform_fn.py", "function_name": "plus_one_transform" }, { "transformation": "MultiplyTwo", "module_path": "gs://bucket/custom_transform_fn.py", "function_name": "multiply_two_transform" } ] Using custom transform function together with FTE's built-in transformations: .. code-block:: python [ { "transformation": "CastToFloat", "input_columns": ["feature_1"], "output_columns": ["feature_1"] },{ "transformation": "PlusOne", "input_columns": ["feature_1"] "output_columns": ["feature_1_plused_one"] },{ "transformation": "MultiplyTwo", "input_columns": ["feature_1"] "output_columns": ["feature_1_multiplied_two"] } ] + tf_transformations_path: Path to TensorFlow-based transformation configuration. Path to a JSON file used to specified FTE's TF transformation configurations. In the following, we provide some sample transform configurations to demonstrate FTE's capabilities. All transformations on input columns are explicitly specified with FTE's built-in transformations. Chaining of multiple transformations on a single column is also supported. For example: .. code-block:: python [ { "transformation": "ZScale", "input_columns": ["feature_1"] }, { "transformation": "ZScale", "input_columns": ["feature_2"] } ]`. Additional information about FTE's currently supported built-in transformations: - Datetime: Extracts datetime featues from a column containing - timestamp strings. - Example: .. code-block:: python { "transformation": - "Datetime", "input_columns": ["feature_1"], "time_format": - "%Y-%m-%d" } - Arguments: - input_columns: A list with a single column to - perform the datetime transformation on. - output_columns: Names of output - columns, one for each datetime_features element. - time_format: Datetime format string. Time format is - a combination of Date + Time Delimiter (optional) + Time - (optional) directives. Valid date directives are as - follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # - 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' # - 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' # - 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # - 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' # - 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y' - # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # - 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' # - 11302018 * '%Y%m%d' # 20181130 Valid time delimiters - are as follows * 'T' * ' ' Valid time directives are as - follows * '%H:%M' # 23:59 * '%H:%M:%S' # - 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * - '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 * - '%H:%M:%S%z', # 23:59:58+0000 - datetime_features: List of datetime - features to be extract. Each entry must be one of * - 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR' - * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * - 'SECOND' Defaults to ['YEAR', 'MONTH', 'DAY', - 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR'] - Log: Performs the natural log on a numeric column. - Example: .. code-block:: python { "transformation": "Log", - "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the log transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - ZScale: Performs Z-scale normalization on a numeric column. - Example: .. code-block:: python { "transformation": - "ZScale", "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the z-scale transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - Vocabulary: Converts strings to integers, where each unique string - gets a unique integer representation. - Example: .. code-block:: python { "transformation": - "Vocabulary", "input_columns": ["feature_1"] } - Arguments: - input_columns: A list with a single column to - perform the vocabulary transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the vocabulary - only to words whose number of occurrences in the input - exceeds frequency_threshold. If not specified, all words - in the vocabulary will be included. If both top_k and - frequency_threshold are specified, a word must satisfy - both conditions to be included. Defaults to None. - Categorical: Transforms categorical columns to integer columns. - Example: .. code-block:: python { "transformation": - "Categorical", "input_columns": ["feature_1"], "top_k": 10 } - Arguments: - input_columns: A list with a single column to - perform the categorical transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. - frequency_threshold: Limit the vocabulary - only to words whose number of occurrences in the input - exceeds frequency_threshold. If not specified, all words - in the vocabulary will be included. If both top_k and - frequency_threshold are specified, a word must satisfy - both conditions to be included. - Reduce: Given a column where each entry is a numeric array, - reduces arrays according to our reduce_mode. - Example: .. code-block:: python { "transformation": - "Reduce", "input_columns": ["feature_1"], "reduce_mode": - "MEAN", "output_columns": ["feature_1_mean"] } - Arguments: - input_columns: A list with a single column to - perform the reduce transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - reduce_mode: One of * 'MAX' * 'MIN' * - 'MEAN' * 'LAST_K' Defaults to 'MEAN'. - last_k: The number of last k elements when - 'LAST_K' reduce mode is used. Defaults to 1. - SplitString: Given a column of strings, splits strings into token - arrays. - Example: .. code-block:: python { "transformation": - "SplitString", "input_columns": ["feature_1"], "separator": - "$" } - Arguments: - input_columns: A list with a single column to - perform the split string transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - separator: Separator to split input string - into tokens. Defaults to ' '. - missing_token: Missing token to use when - no string is included. Defaults to ' _MISSING_ '. - NGram: Given a column of strings, splits strings into token arrays - where each token is an integer. - Example: .. code-block:: python { "transformation": "NGram", - "input_columns": ["feature_1"], "min_ngram_size": 1, - "max_ngram_size": 2, "separator": " " } - Arguments: - input_columns: A list with a single column to - perform the n-gram transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - min_ngram_size: Minimum n-gram size. Must - be a positive number and <= max_ngram_size. Defaults to - 1. - max_ngram_size: Maximum n-gram size. Must - be a positive number and >= min_ngram_size. Defaults to - 2. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the - dictionary's vocabulary only to words whose number of - occurrences in the input exceeds frequency_threshold. If - not specified, all words in the vocabulary will be - included. If both top_k and frequency_threshold are - specified, a word must satisfy both conditions to be - included. Defaults to None. - separator: Separator to split input string - into tokens. Defaults to ' '. - missing_token: Missing token to use when - no string is included. Defaults to ' _MISSING_ '. - Clip: Given a numeric column, clips elements such that elements < - min_value are assigned min_value, and elements > max_value are - assigned max_value. - Example: .. code-block:: python { "transformation": "Clip", - "input_columns": ["col1"], "output_columns": - ["col1_clipped"], "min_value": 1., "max_value": 10., } - Arguments: - input_columns: A list with a single column to - perform the n-gram transformation on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - min_value: Number where all values below - min_value are set to min_value. If no min_value is - provided, min clipping will not occur. Defaults to None. - max_value: Number where all values above - max_value are set to max_value If no max_value is - provided, max clipping will not occur. Defaults to None. - MultiHotEncoding: Performs multi-hot encoding on a categorical - array column. - Example: .. code-block:: python { "transformation": - "MultiHotEncoding", "input_columns": ["col1"], } The number - of classes is determened by the largest number included in - the input if it is numeric or the total number of unique - values of the input if it is type str. If the input is has - type str and an element contians separator tokens, the input - will be split at separator indices, and the each element of - the split list will be considered a seperate class. For - example, - Input: .. code-block:: python [ ["foo bar"], # Example - 0 ["foo", "bar"], # Example 1 ["foo"], # Example - 2 ["bar"], # Example 3 ] - Output (with default separator=" "): .. code-block:: python [ - [1, 1], # Example 0 [1, 1], # Example 1 - [1, 0], # Example 2 [0, 1], # Example 3 ] - Arguments: - input_columns: A list with a single column to - perform the multi-hot-encoding on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - top_k: Number of the most frequent words - in the vocabulary to use for generating dictionary - lookup indices. If not specified, all words in the - vocabulary will be used. Defaults to None. - frequency_threshold: Limit the - dictionary's vocabulary only to words whose number of - occurrences in the input exceeds frequency_threshold. If - not specified, all words in the vocabulary will be - included. If both top_k and frequency_threshold are - specified, a word must satisfy both conditions to be - included. Defaults to None. - separator: Separator to split input string - into tokens. Defaults to ' '. - MaxAbsScale: Performs maximum absolute scaling on a numeric - column. - Example: .. code-block:: python { "transformation": - "MaxAbsScale", "input_columns": ["col1"], "output_columns": - ["col1_max_abs_scaled"] } - Arguments: - input_columns: A list with a single column to - perform max-abs-scale on. - output_columns: A list with a single - output column name, corresponding to the output of our - transformation. - Custom: Transformations defined in - tf_custom_transformation_definitions are included here in the - TensorFlow-based transformation configuration. For example, - given the following tf_custom_transformation_definitions: .. - code-block:: python [ { "transformation": "PlusX", - "module_path": "gs://bucket/custom_transform_fn.py", - "function_name": "plus_one_transform" } ] We can include the - following transformation: .. code-block:: python { - "transformation": "PlusX", "input_columns": ["col1"], - "output_columns": ["col1_max_abs_scaled"] "x": 5 } Note that - input_columns must still be included in our arguments and - output_columns is optional. All other arguments are those - defined in custom_transform_fn.py, which includes `"x"` in this - case. See tf_custom_transformation_definitions above. - legacy_transformations_path (Optional[str]) Deprecated. Prefer - tf_auto_transform_features. Path to a GCS file containing JSON - string for legacy style transformations. Note that - legacy_transformations_path and tf_auto_transform_features - cannot both be specified. + Datetime: Extracts datetime featues from a column containing timestamp strings. + Example: .. code-block:: python { "transformation": "Datetime", "input_columns": ["feature_1"], "time_format": "%Y-%m-%d" } + Arguments: + input_columns: A list with a single column to perform the datetime transformation on. + output_columns: Names of output columns, one for each datetime_features element. + time_format: Datetime format string. Time format is a combination of Date + Time Delimiter (optional) + Time (optional) directives. Valid date directives are as follows * '%Y-%m-%d' # 2018-11-30 * '%Y/%m/%d' # 2018/11/30 * '%y-%m-%d' # 18-11-30 * '%y/%m/%d' # 18/11/30 * '%m-%d-%Y' # 11-30-2018 * '%m/%d/%Y' # 11/30/2018 * '%m-%d-%y' # 11-30-18 * '%m/%d/%y' # 11/30/18 * '%d-%m-%Y' # 30-11-2018 * '%d/%m/%Y' # 30/11/2018 * '%d-%B-%Y' # 30-November-2018 * '%d-%m-%y' # 30-11-18 * '%d/%m/%y' # 30/11/18 * '%d-%B-%y' # 30-November-18 * '%d%m%Y' # 30112018 * '%m%d%Y' # 11302018 * '%Y%m%d' # 20181130 Valid time delimiters are as follows * 'T' * ' ' Valid time directives are as follows * '%H:%M' # 23:59 * '%H:%M:%S' # + 23:59:58 * '%H:%M:%S.%f' # 23:59:58[.123456] * '%H:%M:%S.%f%z' # 23:59:58[.123456]+0000 * '%H:%M:%S%z', # 23:59:58+0000 + datetime_features: List of datetime features to be extract. Each entry must be one of * 'YEAR' * 'MONTH' * 'DAY' * 'DAY_OF_WEEK' * 'DAY_OF_YEAR' * 'WEEK_OF_YEAR' * 'QUARTER' * 'HOUR' * 'MINUTE' * 'SECOND' Defaults to ['YEAR', 'MONTH', 'DAY', 'DAY_OF_WEEK', 'DAY_OF_YEAR', 'WEEK_OF_YEAR'] + Log: Performs the natural log on a numeric column. + Example: .. code-block:: python { "transformation": "Log", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to perform the log transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + ZScale: Performs Z-scale normalization on a numeric column. + Example: .. code-block:: python { "transformation": "ZScale", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to perform the z-scale transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + Vocabulary: Converts strings to integers, where each unique string gets a unique integer representation. + Example: .. code-block:: python { "transformation": "Vocabulary", "input_columns": ["feature_1"] } + Arguments: + input_columns: A list with a single column to perform the vocabulary transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + top_k: Number of the most frequent words in the vocabulary to use for generating dictionary lookup indices. If not specified, all words in the vocabulary will be used. Defaults to None. + frequency_threshold: Limit the vocabulary only to words whose number of occurrences in the input exceeds frequency_threshold. If not specified, all words in the vocabulary will be included. If both top_k and frequency_threshold are specified, a word must satisfy both conditions to be included. Defaults to None. + Categorical: Transforms categorical columns to integer columns. + Example: .. code-block:: python { "transformation": "Categorical", "input_columns": ["feature_1"], "top_k": 10 } + Arguments: + input_columns: A list with a single column to perform the categorical transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + top_k: Number of the most frequent words in the vocabulary to use for generating dictionary lookup indices. If not specified, all words in the vocabulary will be used. + frequency_threshold: Limit the vocabulary only to words whose number of occurrences in the input exceeds frequency_threshold. If not specified, all words in the vocabulary will be included. If both top_k and frequency_threshold are specified, a word must satisfy both conditions to be included. + Reduce: Given a column where each entry is a numeric array, reduces arrays according to our reduce_mode. + Example: .. code-block:: python { "transformation": "Reduce", "input_columns": ["feature_1"], "reduce_mode": "MEAN", "output_columns": ["feature_1_mean"] } + Arguments: + input_columns: A list with a single column to perform the reduce transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + reduce_mode: One of * 'MAX' * 'MIN' * 'MEAN' * 'LAST_K' Defaults to 'MEAN'. + last_k: The number of last k elements when 'LAST_K' reduce mode is used. Defaults to 1. + SplitString: Given a column of strings, splits strings into token arrays. + Example: .. code-block:: python { "transformation": "SplitString", "input_columns": ["feature_1"], "separator": "$" } + Arguments: + input_columns: A list with a single column to perform the split string transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + separator: Separator to split input string into tokens. Defaults to ' '. + missing_token: Missing token to use when no string is included. Defaults to ' _MISSING_ '. + NGram: Given a column of strings, splits strings into token arrays where each token is an integer. + Example: .. code-block:: python { "transformation": "NGram", "input_columns": ["feature_1"], "min_ngram_size": 1, "max_ngram_size": 2, "separator": " " } + Arguments: + input_columns: A list with a single column to perform the n-gram transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + min_ngram_size: Minimum n-gram size. Must be a positive number and <= max_ngram_size. Defaults to 1. + max_ngram_size: Maximum n-gram size. Must be a positive number and >= min_ngram_size. Defaults to 2. + top_k: Number of the most frequent words in the vocabulary to use for generating dictionary lookup indices. If not specified, all words in the vocabulary will be used. Defaults to None. + frequency_threshold: Limit the dictionary's vocabulary only to words whose number of occurrences in the input exceeds frequency_threshold. If not specified, all words in the vocabulary will be included. If both top_k and frequency_threshold are specified, a word must satisfy both conditions to be included. Defaults to None. + separator: Separator to split input string into tokens. Defaults to ' '. + missing_token: Missing token to use when no string is included. Defaults to ' _MISSING_ '. + Clip: Given a numeric column, clips elements such that elements < min_value are assigned min_value, and elements > max_value are assigned max_value. + Example: .. code-block:: python { "transformation": "Clip", "input_columns": ["col1"], "output_columns": ["col1_clipped"], "min_value": 1., "max_value": 10., } + Arguments: + input_columns: A list with a single column to perform the n-gram transformation on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + min_value: Number where all values below min_value are set to min_value. If no min_value is provided, min clipping will not occur. Defaults to None. + max_value: Number where all values above max_value are set to max_value If no max_value is provided, max clipping will not occur. Defaults to None. + MultiHotEncoding: Performs multi-hot encoding on a categorical array column. + Example: .. code-block:: python { "transformation": "MultiHotEncoding", "input_columns": ["col1"], } The number of classes is determened by the largest number included in the input if it is numeric or the total number of unique values of the input if it is type str. If the input is has type str and an element contians separator tokens, the input will be split at separator indices, and the each element of the split list will be considered a seperate class. For example, + Input: .. code-block:: python [ ["foo bar"], # Example 0 ["foo", "bar"], # Example 1 ["foo"], # Example 2 ["bar"], # Example 3 ] Output (with default separator=" "): .. code-block:: python [ [1, 1], # Example 0 [1, 1], # Example 1 [1, 0], # Example 2 [0, 1], # Example 3 ] + Arguments: + input_columns: A list with a single column to perform the multi-hot-encoding on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + top_k: Number of the most frequent words in the vocabulary to use for generating dictionary lookup indices. If not specified, all words in the vocabulary will be used. Defaults to None. + frequency_threshold: Limit the dictionary's vocabulary only to words whose number of occurrences in the input exceeds frequency_threshold. If not specified, all words in the vocabulary will be included. If both top_k and frequency_threshold are specified, a word must satisfy both conditions to be included. Defaults to None. + separator: Separator to split input string into tokens. Defaults to ' '. + MaxAbsScale: Performs maximum absolute scaling on a numeric column. + Example: .. code-block:: python { "transformation": "MaxAbsScale", "input_columns": ["col1"], "output_columns": ["col1_max_abs_scaled"] } + Arguments: + input_columns: A list with a single column to perform max-abs-scale on. + output_columns: A list with a single output column name, corresponding to the output of our transformation. + Custom: Transformations defined in tf_custom_transformation_definitions are included here in the TensorFlow-based transformation configuration. For example, given the following tf_custom_transformation_definitions: .. code-block:: python [ { "transformation": "PlusX", "module_path": "gs://bucket/custom_transform_fn.py", "function_name": "plus_one_transform" } ] We can include the following transformation: .. code-block:: python { "transformation": "PlusX", "input_columns": ["col1"], "output_columns": ["col1_max_abs_scaled"] "x": 5 } Note that input_columns must still be included in our arguments and output_columns is optional. All other arguments are those defined in custom_transform_fn.py, which includes `"x"` in this case. See tf_custom_transformation_definitions above. legacy_transformations_path (Optional[str]) Deprecated. Prefer tf_auto_transform_features. Path to a GCS file containing JSON string for legacy style transformations. Note that legacy_transformations_path and tf_auto_transform_features cannot both be specified. target_column: Target column of input data. weight_column: Weight column of input data. - prediction_type: Model prediction type. One of - "classification", "regression", "time_series". - run_distill: (deprecated) Whether the distillation should be applied - to the training. - run_feature_selection: Whether the feature selection - should be applied to the dataset. - feature_selection_algorithm: The algorithm of feature - selection. One of "AMI", "CMIM", "JMIM", "MRMR", default to be "AMI". - The algorithms available are: AMI(Adjusted Mutual Information): + prediction_type: Model prediction type. One of "classification", "regression", "time_series". + run_distill: (deprecated) Whether the distillation should be applied to the training. + run_feature_selection: Whether the feature selection should be applied to the dataset. + feature_selection_algorithm: The algorithm of feature selection. One of "AMI", "CMIM", "JMIM", "MRMR", default to be "AMI". The algorithms available are: AMI(Adjusted Mutual Information): + Reference: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html Arrays are not yet supported in this algorithm. CMIM(Conditional Mutual Information Maximization): Reference paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using Joint Mutual Information Maximisation,” Expert Systems with Applications, vol. 42, issue 22, 1 December 2015, Pages 8520-8532. JMIM(Joint Mutual Information Maximization Reference: - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_mutual_info_score.html - Arrays are not yet supported in this algorithm. CMIM(Conditional - Mutual Information Maximization): Reference paper: Mohamed - Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using - Joint Mutual Information Maximisation,” Expert Systems with - Applications, vol. 42, issue 22, 1 December 2015, Pages - 8520-8532. JMIM(Joint Mutual Information Maximization): Reference - paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature - selection using Joint Mutual Information Maximisation,” Expert - Systems with Applications, vol. 42, issue 22, 1 December 2015, - Pages 8520-8532. MRMR(MIQ Minimum-redundancy - Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long, - and Chris Ding. "Feature selection based on mutual information - criteria of max-dependency, max-relevance, and min-redundancy." - IEEE Transactions on pattern analysis and machine intelligence - 27, no. - 8: 1226-1238. + paper: Mohamed Bennasar, Yulia Hicks, Rossitza Setchi, “Feature selection using Joint Mutual Information Maximisation,” Expert Systems with Applications, vol. 42, issue 22, 1 December 2015, Pages 8520-8532. MRMR(MIQ Minimum-redundancy Maximum-relevance): Reference paper: Hanchuan Peng, Fuhui Long, and Chris Ding. "Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy." IEEE Transactions on pattern analysis and machine intelligence 27, no. + 8: 1226-1238. feature_selection_execution_engine: Execution engine to run feature selection, value can be dataflow, bigquery. - materialized_examples_format: The format to use for the - materialized examples. Should be either 'tfrecords_gzip' (default) or - 'parquet'. - max_selected_features: Maximum number of features to - select. If specified, the transform config will be purged by only using - the selected features that ranked top in the feature ranking, which has - the ranking value for all supported features. If the number of input - features is smaller than max_selected_features specified, we will still - run the feature selection process and generate the feature ranking, no - features will be excluded. The value will be set to 1000 by default if - run_feature_selection is enabled. - data_source_csv_filenames: CSV input data source to run - feature transform on. - data_source_bigquery_table_path: BigQuery input data - source to run feature transform on. - bigquery_staging_full_dataset_id: Dataset in - "projectId.datasetId" format for storing intermediate-FTE BigQuery - tables. If the specified dataset does not exist in BigQuery, FTE will - create the dataset. If no bigquery_staging_full_dataset_id is specified, - all intermediate tables will be stored in a dataset created under the - provided project in the input data source's location during FTE - execution called - "vertex_feature_transform_engine_staging_{location.replace('-', '_')}". - All tables generated by FTE will have a 30 day TTL. - model_type: Model type, which we wish to engineer features - for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or - tide. Defaults to the empty value, `None`. - multimodal_tabular_columns: List of multimodal tabular - columns. Defaults to an empty list - multimodal_timeseries_columns: List of multimodal timeseries - columns. Defaults to an empty list - multimodal_text_columns: List of multimodal text - columns. Defaults to an empty list - multimodal_image_columns: List of multimodal image - columns. Defaults to an empty list. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - Dataflow jobs. + materialized_examples_format: The format to use for the materialized examples. Should be either 'tfrecords_gzip' (default) or 'parquet'. + max_selected_features: Maximum number of features to select. If specified, the transform config will be purged by only using the selected features that ranked top in the feature ranking, which has the ranking value for all supported features. If the number of input features is smaller than max_selected_features specified, we will still run the feature selection process and generate the feature ranking, no features will be excluded. The value will be set to 1000 by default if run_feature_selection is enabled. + data_source_csv_filenames: CSV input data source to run feature transform on. + data_source_bigquery_table_path: BigQuery input data source to run feature transform on. + bigquery_staging_full_dataset_id: Dataset in "projectId.datasetId" format for storing intermediate-FTE BigQuery tables. If the specified dataset does not exist in BigQuery, FTE will create the dataset. If no bigquery_staging_full_dataset_id is specified, all intermediate tables will be stored in a dataset created under the provided project in the input data source's location during FTE execution called "vertex_feature_transform_engine_staging_{location.replace('-', '_')}". All tables generated by FTE will have a 30 day TTL. + model_type: Model type, which we wish to engineer features for. Can be one of: neural_network, boosted_trees, l2l, seq2seq, tft, or tide. Defaults to the empty value, `None`. + multimodal_tabular_columns: List of multimodal tabular columns. Defaults to an empty list + multimodal_timeseries_columns: List of multimodal timeseries columns. Defaults to an empty list + multimodal_text_columns: List of multimodal text columns. Defaults to an empty list + multimodal_image_columns: List of multimodal image columns. Defaults to an empty list. + dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + dataflow_service_account: Custom service account to run Dataflow jobs. encryption_spec_key_name: Customer-managed encryption key. - autodetect_csv_schema: If True, infers the column types - when importing CSVs into BigQuery. - embedding_prediction_server_docker_uri: The docker image inside which to - run the embedding models to generate embeddings. - embedding_batch_prediction_machine_type: The machine type to be - used to run the embedding batch prediction job. If not provided, - `n1-highmem-32` will be used. For more details, see: - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types - embedding_batch_prediction_accelerator_type: The accelerator type to use to - generate embeddings. If not provided, no accelerator is used. More - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype - embedding_batch_prediction_accelerator_count: The number of accelerators to - use to generate the embeddings. Default is 0. - embedding_batch_prediction_starting_replica_count: The starting replica count - for embedding batch prediction job. Default = 20. - embedding_batch_prediction_max_replica_count: The max replica count for - embedding batch prediction job. Default = 50. - embedding_batch_prediction_batch_size: The batch size for embedding batch - prediction job. Default = 1024. + autodetect_csv_schema: If True, infers the column types when importing CSVs into BigQuery. + embedding_prediction_server_docker_uri: The docker image inside which to run the embedding models to generate embeddings. + embedding_batch_prediction_machine_type: The machine type to be used to run the embedding batch prediction job. If not provided, `n1-highmem-32` will be used. For more details, see: https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types + embedding_batch_prediction_accelerator_type: The accelerator type to use to generate embeddings. If not provided, no accelerator is used. More details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype + embedding_batch_prediction_accelerator_count: The number of accelerators to use to generate the embeddings. Default is 0. + embedding_batch_prediction_starting_replica_count: The starting replica count for embedding batch prediction job. Default = 20. + embedding_batch_prediction_max_replica_count: The max replica count for embedding batch prediction job. Default = 50. + embedding_batch_prediction_batch_size: The batch size for embedding batch prediction job. Default = 1024. Returns: dataset_stats: The stats of the dataset. materialized_data: The materialized dataset. transform_output: The transform output artifact. - split_example_counts: JSON string of data split example counts for train, - validate, and test splits. - bigquery_train_split_uri: BigQuery URI for the train split to pass to the - batch prediction component during distillation. - bigquery_validation_split_uri: BigQuery URI for the validation split to - pass to the batch prediction component during distillation. - bigquery_test_split_uri: BigQuery URI for the test split to pass to the - batch prediction component during evaluation. - bigquery_downsampled_test_split_uri: BigQuery URI for the downsampled test - split to pass to the batch prediction component during batch explain. - instance_schema_path: Schema of input data to the tf_model at serving - time. - training_schema_path: Schema of input data to the tf_model at training - time. - feature_ranking: The ranking of features, all features supported in the - dataset will be included. For "AMI" algorithm, array features won't be - available in the ranking as arrays are not supported yet. - gcp_resources: GCP resources created by this component. For more details, - see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. - group_columns: A list of time series attribute column names that define - the time series hierarchy. - group_total_weight: The weight of the loss for predictions aggregated over - time series in the same group. - temporal_total_weight: The weight of the loss for predictions aggregated - over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for predictions - aggregated over both the horizon and time series in the same hierarchy - group. + split_example_counts: JSON string of data split example counts for train, validate, and test splits. + bigquery_train_split_uri: BigQuery URI for the train split to pass to the batch prediction component during distillation. + bigquery_validation_split_uri: BigQuery URI for the validation split to pass to the batch prediction component during distillation. + bigquery_test_split_uri: BigQuery URI for the test split to pass to the batch prediction component during evaluation. + bigquery_downsampled_test_split_uri: BigQuery URI for the downsampled test split to pass to the batch prediction component during batch explain. + instance_schema_path: Schema of input data to the tf_model at serving time. + training_schema_path: Schema of input data to the tf_model at training time. + feature_ranking: The ranking of features, all features supported in the dataset will be included. For "AMI" algorithm, array features won't be available in the ranking as arrays are not supported yet. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + group_columns: A list of time series attribute column names that define the time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions aggregated over both the horizon and time series in the same hierarchy group. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py index cb5044ed5f..1ed5fdd75d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Tabnet Hyperparameter Tuning component spec.""" from typing import Optional @@ -69,45 +68,22 @@ def tabnet_hyperparameter_tuning_job( location: The GCP region that runs the pipeline components. root_dir: The root GCS directory for the pipeline components. target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". + prediction_type: The type of prediction the model is to produce. "classification" or "regression". weight_column: The weight column name. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is determined based on the dataset size. seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - study_spec_metric_id: Metric to optimize, possible - values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. + eval_steps: Number of steps to run evaluation for. If not specified or negative, it means run evaluation on the whole validation dataset. If set to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters to optimize. The dictionary key is the parameter_id, which is passed to training job as a command line argument, and the dictionary value is the parameter specification of the metric. max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. + parallel_trial_count: The desired number of trials to run in parallel. + max_failed_trial_count: The number of failed trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. One of "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine spec. See https://cloud.google.com/compute/docs/machine-types for options. training_disk_spec: The training disk spec. instance_baseline: The path to a JSON file for baseline values. metadata: Amount of time in seconds to run the trainer for. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py index ee2456320f..db64853055 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Tabnet Trainer component spec.""" from typing import Optional @@ -84,78 +83,39 @@ def tabnet_trainer( location: The GCP region that runs the pipeline components. root_dir: The root GCS directory for the pipeline components. target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". + prediction_type: The type of prediction the model is to produce. "classification" or "regression". weight_column: The weight column name. max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the - trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. learning_rate: The learning rate used by the linear optimizer. - large_category_dim: Embedding dimension for categorical - feature with large number of categories. - large_category_thresh: Threshold for number of categories - to apply large_category_dim embedding dimension to. - yeo_johnson_transform: Enables trainable Yeo-Johnson - power transform. - feature_dim: Dimensionality of the hidden representation - in feature transformation block. - feature_dim_ratio: The ratio of output dimension - (dimensionality of the outputs of each decision step) to feature - dimension. + large_category_dim: Embedding dimension for categorical feature with large number of categories. + large_category_thresh: Threshold for number of categories to apply large_category_dim embedding dimension to. + yeo_johnson_transform: Enables trainable Yeo-Johnson power transform. + feature_dim: Dimensionality of the hidden representation in feature transformation block. + feature_dim_ratio: The ratio of output dimension (dimensionality of the outputs of each decision step) to feature dimension. num_decision_steps: Number of sequential decision steps. - relaxation_factor: Relaxation factor that promotes the - reuse of each feature at different decision steps. When it is 1, a - feature is enforced to be used only at one decision step and as it - increases, more flexibility is provided to use a feature at multiple - decision steps. - decay_every: Number of iterations for periodically - applying learning rate decaying. + relaxation_factor: Relaxation factor that promotes the reuse of each feature at different decision steps. When it is 1, a feature is enforced to be used only at one decision step and as it increases, more flexibility is provided to use a feature at multiple decision steps. + decay_every: Number of iterations for periodically applying learning rate decaying. decay_rate: Learning rate decaying. gradient_thresh: Threshold for the norm of gradients for clipping. - sparsity_loss_weight: Weight of the loss for sparsity - regularization (increasing it will yield more sparse feature selection). + sparsity_loss_weight: Weight of the loss for sparsity regularization (increasing it will yield more sparse feature selection). batch_momentum: Momentum in ghost batch normalization. - batch_size_ratio: The ratio of virtual batch size (size - of the ghost batch normalization) to batch size. - num_transformer_layers: The number of transformer layers - for each decision step. used only at one decision step and as it - increases, more flexibility is provided to use a feature at multiple - decision steps. - num_transformer_layers_ratio: The ratio of shared - transformer layer to transformer layers. - class_weight: The class weight is used to computes a - weighted cross entropy which is helpful in classify imbalanced dataset. - Only used for classification. - loss_function_type: Loss function type. Loss function in - classification [cross_entropy, weighted_cross_entropy, focal_loss], - default is cross_entropy. Loss function in regression: [rmse, mae, mse], - default is mse. - alpha_focal_loss: Alpha value (balancing factor) in - focal_loss function. Only used for classification. - gamma_focal_loss: Gamma value (modulating factor) for - focal loss for focal loss. Only used for classification. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. + batch_size_ratio: The ratio of virtual batch size (size of the ghost batch normalization) to batch size. + num_transformer_layers: The number of transformer layers for each decision step. used only at one decision step and as it increases, more flexibility is provided to use a feature at multiple decision steps. + num_transformer_layers_ratio: The ratio of shared transformer layer to transformer layers. + class_weight: The class weight is used to computes a weighted cross entropy which is helpful in classify imbalanced dataset. Only used for classification. + loss_function_type: Loss function type. Loss function in classification [cross_entropy, weighted_cross_entropy, focal_loss], default is cross_entropy. Loss function in regression: [rmse, mae, mse], default is mse. + alpha_focal_loss: Alpha value (balancing factor) in focal_loss function. Only used for classification. + gamma_focal_loss: Gamma value (modulating factor) for focal loss for focal loss. Only used for classification. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is determined based on the dataset size. seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. + eval_steps: Number of steps to run evaluation for. If not specified or negative, it means run evaluation on the whole validation dataset. If set to 0, it means run evaluation for a fixed number of samples. batch_size: Batch size for training. - measurement_selection_type: Which measurement to use - if/when the service automatically selects the final measurement from - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - or "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. + measurement_selection_type: Which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. One of "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for `measurement_selection_type`. Default is "rmse" for regression and "auc" for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. + training_machine_spec: The training machine spec. See https://cloud.google.com/compute/docs/machine-types for options. training_disk_spec: The training disk spec. instance_baseline: The path to a JSON file for baseline values. metadata: Amount of time in seconds to run the trainer for. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py index 97e6b370a1..f9634726db 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py @@ -41,8 +41,7 @@ def _update_parameters( def _generate_model_display_name() -> str: """Automatically generates a model_display_name. - Returns: - model_display_name. + Returns: model_display_name. """ return f'tabular-workflow-model-{uuid.uuid4()}' @@ -150,10 +149,10 @@ def _get_default_pipeline_params( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -170,7 +169,7 @@ def _get_default_pipeline_params( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -225,8 +224,7 @@ def _get_default_pipeline_params( model_description: The description for the uploaded model. enable_fte: Whether to enable the Feature Transform Engine. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if not study_spec_parameters_override: study_spec_parameters_override = [] @@ -515,10 +513,10 @@ def get_automl_tabular_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -535,7 +533,7 @@ def get_automl_tabular_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -587,8 +585,7 @@ def get_automl_tabular_pipeline_and_parameters( model_description: The description for the uploaded model. enable_fte: Whether to enable the Feature Transform Engine. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = _get_default_pipeline_params( project=project, @@ -766,10 +763,10 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -786,7 +783,7 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -825,8 +822,7 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ model_display_name = ( model_display_name @@ -910,8 +906,7 @@ def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: Args: input_dict: The input json dictionary. - Returns: - The encoded string used for parameter. + Returns: The encoded string used for parameter. """ if not input_dict: return '' @@ -1004,7 +999,7 @@ def get_skip_architecture_search_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1021,7 +1016,7 @@ def get_skip_architecture_search_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1049,8 +1044,7 @@ def get_skip_architecture_search_pipeline_and_parameters( evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ return get_automl_tabular_pipeline_and_parameters( @@ -1281,7 +1275,7 @@ def get_wide_and_deep_trainer_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1300,13 +1294,12 @@ def get_wide_and_deep_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -1573,7 +1566,7 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1592,13 +1585,12 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'This method is deprecated. Please use' @@ -1850,7 +1842,7 @@ def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1869,13 +1861,12 @@ def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2132,7 +2123,7 @@ def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -2151,13 +2142,12 @@ def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2461,7 +2451,7 @@ def get_tabnet_trainer_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -2480,13 +2470,12 @@ def get_tabnet_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2642,8 +2631,7 @@ def get_tabnet_study_spec_parameters_override( parameter is only used as a hint for the hyperparameter search space, unrelated to the real cost. - Returns: - List of study_spec_parameters_override. + Returns: List of study_spec_parameters_override. """ if dataset_size_bucket not in ['small', 'medium', 'large']: @@ -2687,8 +2675,7 @@ def _format_tabnet_regression_study_spec_parameters_override( parameter is only used as a hint for the hyperparameter search space, unrelated to the real cost. - Returns: - List of study_spec_parameters_override for regression. + Returns: List of study_spec_parameters_override for regression. """ # To get regression study_spec_parameters, we need to set @@ -2720,8 +2707,7 @@ def _format_tabnet_regression_study_spec_parameters_override( def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: """Get study_spec_parameters_override for a Wide & Deep hyperparameter tuning job. - Returns: - List of study_spec_parameters_override. + Returns: List of study_spec_parameters_override. """ param_path = os.path.join( pathlib.Path(__file__).parent.resolve(), @@ -2737,8 +2723,7 @@ def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: def get_xgboost_study_spec_parameters_override() -> List[Dict[str, Any]]: """Get study_spec_parameters_override for an XGBoost hyperparameter tuning job. - Returns: - List of study_spec_parameters_override. + Returns: List of study_spec_parameters_override. """ param_path = os.path.join( pathlib.Path(__file__).parent.resolve(), 'configs/xgboost_params.json' @@ -2964,13 +2949,12 @@ def get_xgboost_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = {} if isinstance(tf_auto_transform_features, list): @@ -3245,13 +3229,12 @@ def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = {} if isinstance(tf_auto_transform_features, list): diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py index eeb22001f1..1aba4e1c56 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Wide and Deep Hyperparameter Tuning component spec.""" from typing import Optional @@ -69,45 +68,22 @@ def wide_and_deep_hyperparameter_tuning_job( location: The GCP region that runs the pipeline components. root_dir: The root GCS directory for the pipeline components. target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". + prediction_type: The type of prediction the model is to produce. "classification" or "regression". weight_column: The weight column name. - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is determined based on the dataset size. seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - study_spec_metric_id: Metric to optimize, , possible - values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. + eval_steps: Number of steps to run evaluation for. If not specified or negative, it means run evaluation on the whole validation dataset. If set to 0, it means run evaluation for a fixed number of samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. + study_spec_metric_id: Metric to optimize, possible values: [ 'loss', 'average_loss', 'rmse', 'mae', 'mql', 'accuracy', 'auc', 'precision', 'recall']. + study_spec_metric_goal: Optimization goal of the metric, possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters to optimize. The dictionary key is the parameter_id, which is passed to training job as a command line argument, and the dictionary value is the parameter specification of the metric. max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. + parallel_trial_count: The desired number of trials to run in parallel. + max_failed_trial_count: The number of failed trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. One of "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + training_machine_spec: The training machine spec. See https://cloud.google.com/compute/docs/machine-types for options. training_disk_spec: The training disk spec. instance_baseline: The path to a JSON file for baseline values. metadata: Amount of time in seconds to run the trainer for. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py index 5cdd8cf9bf..5aac5303e0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Wide and Deep Trainer component spec.""" from typing import Optional @@ -82,68 +81,37 @@ def wide_and_deep_trainer( location: The GCP region that runs the pipeline components. root_dir: The root GCS directory for the pipeline components. target_column: The target column name. - prediction_type: The type of prediction the model is to - produce. "classification" or "regression". + prediction_type: The type of prediction the model is to produce. "classification" or "regression". weight_column: The weight column name. max_steps: Number of steps to run the trainer for. - max_train_secs: Amount of time in seconds to run the - trainer for. + max_train_secs: Amount of time in seconds to run the trainer for. learning_rate: The learning rate used by the linear optimizer. - optimizer_type: The type of optimizer to use. Choices are - "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent - Optimizers, respectively. - l1_regularization_strength: L1 regularization strength - for optimizer_type="ftrl". - l2_regularization_strength: L2 regularization strength - for optimizer_type="ftrl" - l2_shrinkage_regularization_strength: L2 shrinkage - regularization strength for optimizer_type="ftrl". + optimizer_type: The type of optimizer to use. Choices are "adam", "ftrl" and "sgd" for the Adam, FTRL, and Gradient Descent Optimizers, respectively. + l1_regularization_strength: L1 regularization strength for optimizer_type="ftrl". + l2_regularization_strength: L2 regularization strength for optimizer_type="ftrl" + l2_shrinkage_regularization_strength: L2 shrinkage regularization strength for optimizer_type="ftrl". beta_1: Beta 1 value for optimizer_type="adam". beta_2: Beta 2 value for optimizer_type="adam". - hidden_units: Hidden layer sizes to use for DNN feature - columns, provided in comma-separated layers. - use_wide: If set to true, the categorical columns will be - used in the wide part of the DNN model. - embed_categories: If set to true, the categorical columns - will be used embedded and used in the deep part of the model. Embedding - size is the square root of the column cardinality. - dnn_dropout: The probability we will drop out a given - coordinate. - dnn_learning_rate: The learning rate for training the - deep part of the model. - dnn_optimizer_type: The type of optimizer to use for the - deep part of the model. Choices are "adam", "ftrl" and "sgd". for the - Adam, FTRL, and Gradient Descent Optimizers, respectively. - dnn_l1_regularization_strength: L1 regularization - strength for dnn_optimizer_type="ftrl". - dnn_l2_regularization_strength: L2 regularization - strength for dnn_optimizer_type="ftrl". - dnn_l2_shrinkage_regularization_strength: L2 shrinkage - regularization strength for dnn_optimizer_type="ftrl". + hidden_units: Hidden layer sizes to use for DNN feature columns, provided in comma-separated layers. + use_wide: If set to true, the categorical columns will be used in the wide part of the DNN model. + embed_categories: If set to true, the categorical columns will be used embedded and used in the deep part of the model. Embedding size is the square root of the column cardinality. + dnn_dropout: The probability we will drop out a given coordinate. + dnn_learning_rate: The learning rate for training the deep part of the model. + dnn_optimizer_type: The type of optimizer to use for the deep part of the model. Choices are "adam", "ftrl" and "sgd". for the Adam, FTRL, and Gradient Descent Optimizers, respectively. + dnn_l1_regularization_strength: L1 regularization strength for dnn_optimizer_type="ftrl". + dnn_l2_regularization_strength: L2 regularization strength for dnn_optimizer_type="ftrl". + dnn_l2_shrinkage_regularization_strength: L2 shrinkage regularization strength for dnn_optimizer_type="ftrl". dnn_beta_1: Beta 1 value for dnn_optimizer_type="adam". dnn_beta_2: Beta 2 value for dnn_optimizer_type="adam". - enable_profiler: Enables profiling and saves a trace - during evaluation. - cache_data: Whether to cache data or not. If set to - 'auto', caching is determined based on the dataset size. + enable_profiler: Enables profiling and saves a trace during evaluation. + cache_data: Whether to cache data or not. If set to 'auto', caching is determined based on the dataset size. seed: Seed to be used for this run. - eval_steps: Number of steps to run evaluation for. If not - specified or negative, it means run evaluation on the whole validation - dataset. If set to 0, it means run evaluation for a fixed number of - samples. + eval_steps: Number of steps to run evaluation for. If not specified or negative, it means run evaluation on the whole validation dataset. If set to 0, it means run evaluation for a fixed number of samples. batch_size: Batch size for training. - measurement_selection_type: Which measurement to use - if/when the service automatically selects the final measurement from - previously reported intermediate measurements. One of "BEST_MEASUREMENT" - or "LAST_MEASUREMENT". - optimization_metric: Optimization metric used for - `measurement_selection_type`. Default is "rmse" for regression and "auc" - for classification. - eval_frequency_secs: Frequency at which evaluation and - checkpointing will take place. - training_machine_spec: The training machine - spec. See https://cloud.google.com/compute/docs/machine-types for - options. + measurement_selection_type: Which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. One of "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + optimization_metric: Optimization metric used for `measurement_selection_type`. Default is "rmse" for regression and "auc" for classification. + eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. + training_machine_spec: The training machine spec. See https://cloud.google.com/compute/docs/machine-types for options. training_disk_spec: The training disk spec. instance_baseline: The path to a JSON file for baseline values. metadata: Amount of time in seconds to run the trainer for. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py index a96e46d984..6699703e1a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML XGBoost Hyperparameter Tuning component spec.""" from typing import Optional @@ -41,35 +40,19 @@ def xgboost_hyperparameter_tuning_job( Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - study_spec_metric_id: Metric to optimize. For options, - please look under 'eval_metric' at - https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. - study_spec_metric_goal: Optimization goal of the metric, - possible values: "MAXIMIZE", "MINIMIZE". - study_spec_parameters_override: List of dictionaries - representing parameters to optimize. The dictionary key is the - parameter_id, which is passed to training job as a command line - argument, and the dictionary value is the parameter specification of the - metric. + study_spec_metric_id: Metric to optimize. For options, please look under 'eval_metric' at https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters. + study_spec_metric_goal: Optimization goal of the metric, possible values: "MAXIMIZE", "MINIMIZE". + study_spec_parameters_override: List of dictionaries representing parameters to optimize. The dictionary key is the parameter_id, which is passed to training job as a command line argument, and the dictionary value is the parameter specification of the metric. max_trial_count: The desired total number of trials. - parallel_trial_count: The desired number of trials to run - in parallel. - max_failed_trial_count: The number of failed trials that - need to be seen before failing the HyperparameterTuningJob. If set to 0, - Vertex AI decides how many trials must fail before the whole job fails. - study_spec_algorithm: The search algorithm specified for - the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or - 'RANDOM_SEARCH'. - study_spec_measurement_selection_type: Which measurement - to use if/when the service automatically selects the final measurement - from previously reported intermediate measurements. One of - "BEST_MEASUREMENT" or "LAST_MEASUREMENT". + parallel_trial_count: The desired number of trials to run in parallel. + max_failed_trial_count: The number of failed trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides how many trials must fail before the whole job fails. + study_spec_algorithm: The search algorithm specified for the study. One of 'ALGORITHM_UNSPECIFIED', 'GRID_SEARCH', or 'RANDOM_SEARCH'. + study_spec_measurement_selection_type: Which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. One of "BEST_MEASUREMENT" or "LAST_MEASUREMENT". worker_pool_specs: The worker pool specs. encryption_spec_key_name: The KMS key name. Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. + gcp_resources: Serialized gcp_resources proto tracking the custom training job. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py index e03036c353..e19059a303 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML XGBoost Trainer component spec.""" from typing import Optional @@ -37,8 +36,7 @@ def xgboost_trainer( encryption_spec_key_name: The KMS key name. Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. + gcp_resources: Serialized gcp_resources proto tracking the custom training job. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py index 677cb50021..239609c9e7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py @@ -62,106 +62,41 @@ def dataflow_flex_template( """Launch a job with a Dataflow Flex Template. Args: - location: The regional endpoint to which to direct the request. E.g., us-central1, - us-west1. Defaults to `us-central1` if not set. - job_name: The job name to use for the created job. For update job requests, the job - name should be the same as the existing running job. If none is specified, - a default name will be generated by the component. - container_spec_gcs_path: Cloud Storage path to a file with json serialized ContainerSpec as - content. - parameters: - The parameters for the flex template. Ex. {"my_template_param":"5"} - launch_options: - Launch options for this flex template job. This is a common set of options - across languages and templates. This should not be used to pass job - parameters. - num_workers: The initial number of Google Compute Engine instances for the job. If - empty or unspecified, the Dataflow service determines an appropriate - number of workers. - max_workers: The maximum number of Google Compute Engine instances to be made available - to your pipeline during execution, from 1 to 1000. If empty or - unspecified, the Dataflow service determines a default maximum number of - instances. For more details, see - https://cloud.google.com/dataflow/docs/horizontal-autoscaling. - service_account_email: The email address of the service account to run the job as. If - unspecified, the Dataflow service uses the project's Compute Engine - default service account. - temp_location: The Cloud Storage path to use for temporary files. Must be a valid Cloud - Storage URL, beginning with gs://. For more details, see - https://cloud.google.com/dataflow/docs/guides/setting-pipeline-options#setting_required_options. - machine_type: The machine type to use for the Dataflow job. Defaults to the value from - the template if not specified. + location: The regional endpoint to which to direct the request. E.g., us-central1, us-west1. Defaults to `us-central1` if not set. + job_name: The job name to use for the created job. For update job requests, the job name should be the same as the existing running job. If none is specified, a default name will be generated by the component. + container_spec_gcs_path: Cloud Storage path to a file with json serialized ContainerSpec as content. + parameters: The parameters for the flex template. Ex. {"my_template_param":"5"} + launch_options: Launch options for this flex template job. This is a common set of options across languages and templates. This should not be used to pass job parameters. + num_workers: The initial number of Google Compute Engine instances for the job. If empty or unspecified, the Dataflow service determines an appropriate number of workers. + max_workers: The maximum number of Google Compute Engine instances to be made available to your pipeline during execution, from 1 to 1000. If empty or unspecified, the Dataflow service determines a default maximum number of instances. For more details, see https://cloud.google.com/dataflow/docs/horizontal-autoscaling. + service_account_email: The email address of the service account to run the job as. If unspecified, the Dataflow service uses the project's Compute Engine default service account. + temp_location: The Cloud Storage path to use for temporary files. Must be a valid Cloud Storage URL, beginning with gs://. For more details, see https://cloud.google.com/dataflow/docs/guides/setting-pipeline-options#setting_required_options. + machine_type: The machine type to use for the Dataflow job. Defaults to the value from the template if not specified. additional_experiments: Additional experiment flags for the job. - network: Network to which VMs will be assigned. If empty or unspecified, the - service will use the network "default". - subnetwork: Subnetwork to which VMs will be assigned, if desired. You can specify a - subnetwork using either a complete URL or an abbreviated path. - Expected to be of the form - "https://www.googleapis.com/compute/v1/projects/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWORK" - or "regions/REGION/subnetworks/SUBNETWORK". If the subnetwork is located - in a Shared VPC network, you must use the complete URL. - additional_user_labels: - Additional user labels to be specified for the job. Keys and values must - follow the restrictions specified in the labeling restrictions page - (https://cloud.google.com/compute/docs/labeling-resources#restrictions). - An object containing a list of "key": value pairs. - Example: { "name": "wrench", "mass": "1kg", "count": "3" }. - kms_key_name: Name for the Cloud KMS key for the job. Key format is - "projects/HOST_PROJECT_ID/locations/LOCATION/keyRings/KEYRING_ID/cryptoKeys/CRYPTO_KEY_ID" + network: Network to which VMs will be assigned. If empty or unspecified, the service will use the network "default". + subnetwork: Subnetwork to which VMs will be assigned, if desired. You can specify a subnetwork using either a complete URL or an abbreviated path. Expected to be of the form "https://www.googleapis.com/compute/v1/projects/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWORK" or "regions/REGION/subnetworks/SUBNETWORK". If the subnetwork is located in a Shared VPC network, you must use the complete URL. + additional_user_labels: Additional user labels to be specified for the job. Keys and values must follow the restrictions specified in the labeling restrictions page (https://cloud.google.com/compute/docs/labeling-resources#restrictions). An object containing a list of "key": value pairs. Example: `{ "name": "wrench", "mass": "1kg", "count": "3" }`. + kms_key_name: Name for the Cloud KMS key for the job. Key format is "projects/HOST_PROJECT_ID/locations/LOCATION/keyRings/KEYRING_ID/cryptoKeys/CRYPTO_KEY_ID" ip_configuration: Configuration for VM IPs. - worker_region: The Compute Engine region - (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in - which worker processing should occur, e.g. "us-west1". Mutually exclusive - with worker_zone. If neither worker_region nor worker_zone is specified, - default to the control plane's region. - worker_zone: The Compute Engine zone - (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in - which worker processing should occur, e.g. "us-west1-a". Mutually - exclusive with workerRegion. If neither worker_region nor worker_zone is - specified, a zone in the control plane's region is chosen based on - available capacity. + worker_region: The Compute Engine region (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1". Mutually exclusive with worker_zone. If neither worker_region nor worker_zone is specified, default to the control plane's region. + worker_zone: The Compute Engine zone (https://cloud.google.com/compute/docs/regions-zones/regions-zones) in which worker processing should occur, e.g. "us-west1-a". Mutually exclusive with workerRegion. If neither worker_region nor worker_zone is specified, a zone in the control plane's region is chosen based on available capacity. enable_streaming_engine: Whether to enable Streaming Engine for the job. - flexrs_goal: Set FlexRS goal for the job. For more details, see - https://cloud.google.com/dataflow/docs/guides/flexrs. - staging_location: The Cloud Storage path for staging local files. Must be a valid Cloud - Storage URL, beginning with gs://. For more details, see - https://cloud.google.com/dataflow/docs/guides/setting-pipeline-options#setting_required_options. - sdk_container_image: Docker registry location (e.g. Artifact Registry) of the container image - to use for the worker harness. Default is the container for the version of - the SDK. Note this field is only valid for portable Dataflow pipeline - jobs. - disk_size_gb: Worker disk size, in gigabytes. If empty or unspecified, the Dataflow - service determines an appropriate disk size. - autoscaling_algorithm: The algorithm to use for autoscaling. If empty or unspecified, the - Dataflow service sets a default value. For more details, see - https://cloud.google.com/dataflow/docs/reference/pipeline-options#resource_utilization. - dump_heap_on_oom: If true, when processing time is spent almost entirely on garbage - collection (GC), saves a heap dump before ending the thread or process. - If false, ends the thread or process without saving a heap dump. Does not - save a heap dump when the Java Virtual Machine (JVM) has an out of memory - error during processing. The location of the heap file is either echoed - back to the user, or the user is given the opportunity to download the - heap file. - save_heap_dumps_to_gcs_path: Cloud Storage bucket (directory) to upload heap dumps to. Enabling this - field implies that dump_heap_on_oom is set to true. - launcher_machine_type: The machine type to use for launching the Dataflow job. The default is - n1-standard-1. + flexrs_goal: Set FlexRS goal for the job. For more details, see https://cloud.google.com/dataflow/docs/guides/flexrs. + staging_location: The Cloud Storage path for staging local files. Must be a valid Cloud Storage URL, beginning with gs://. For more details, see https://cloud.google.com/dataflow/docs/guides/setting-pipeline-options#setting_required_options. + sdk_container_image: Docker registry location (e.g. Artifact Registry) of the container image to use for the worker harness. Default is the container for the version of the SDK. Note this field is only valid for portable Dataflow pipeline jobs. + disk_size_gb: Worker disk size, in gigabytes. If empty or unspecified, the Dataflow service determines an appropriate disk size. + autoscaling_algorithm: The algorithm to use for autoscaling. If empty or unspecified, the Dataflow service sets a default value. For more details, see https://cloud.google.com/dataflow/docs/reference/pipeline-options#resource_utilization. + dump_heap_on_oom: If true, when processing time is spent almost entirely on garbage collection (GC), saves a heap dump before ending the thread or process. If false, ends the thread or process without saving a heap dump. Does not save a heap dump when the Java Virtual Machine (JVM) has an out of memory error during processing. The location of the heap file is either echoed back to the user, or the user is given the opportunity to download the heap file. + save_heap_dumps_to_gcs_path: Cloud Storage bucket (directory) to upload heap dumps to. Enabling this field implies that dump_heap_on_oom is set to true. + launcher_machine_type: The machine type to use for launching the Dataflow job. The default is n1-standard-1. enable_launcher_vm_serial_port_logging: If true serial port logging will be enabled for the launcher VM. - update: Set this to true if you are sending a request to update a running - streaming job. When set, the job name should be the same as the running - job. - transform_name_mappings: - Use this to pass transformNameMappings for streaming update jobs. - Ex:{"oldTransformName":"newTransformName",...}'. For more details, see - https://cloud.google.com/dataflow/docs/guides/updating-a-pipeline#Mapping - validate_only: If true, the request is validated but not actually executed. Defaults to - false. - project: The ID of the Cloud Platform project that the job - belongs to. Defaults to the project in which the PipelineJob is run. + update: Set this to true if you are sending a request to update a running streaming job. When set, the job name should be the same as the running job. + transform_name_mappings: Use this to pass transformNameMappings for streaming update jobs. Example: `{"oldTransformName":"newTransformName",...}`. For more details, see https://cloud.google.com/dataflow/docs/guides/updating-a-pipeline#Mapping + validate_only: If true, the request is validated but not actually executed. Defaults to false. + project: The ID of the Cloud Platform project that the job belongs to. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index 2f009f9d92..8dfc65d1c4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -42,40 +42,24 @@ def infer_pipeline( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, ) -> PipelineOutput: + # fmt: off """Uses a large-language model to perform bulk inference on a prompt dataset. Args: - large_model_reference: Name of the base model. Supported values are - `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. - `text-bison@001` and `t5-small` are supported in `us-central1` and - `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in - `europe-west4`. + large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`. model_checkpoint: Cloud storage path to the model checkpoint. - prompt_dataset: Cloud storage path to an unlabled prompt dataset used for - reinforcement learning. The dataset format is jsonl. Each example in the - dataset must have an `input_text` field that contains the prompt. - prompt_sequence_length: Maximum tokenized sequence length for input text. - Higher values increase memory overhead. This value should be at most 8192. - Default value is 512. - target_sequence_length: Maximum tokenized sequence length for target text. - Higher values increase memory overhead. This value should be at most 1024. - Default value is 64. - sampling_strategy: This field specifies the sampling strategy. The valid - options are 'greedy' and 'temperature_sampling'. - instruction: This field lets the model know what task it needs to perform. - Base models have been trained over a large set of varied instructions. You - can give a simple and intuitive description of the task and the model will - follow it, e.g. "Classify this movie review as positive or negative" or - "Translate this sentence to Danish". Do not specify this if your dataset - already prepends the instruction to the inputs field. - project: Project used to run custom jobs. If not specified the project used - to run the pipeline will be used. - location: Location used to run custom jobs. If not specified the location - used to run the pipeline will be used. + prompt_dataset: Cloud storage path to an unlabled prompt dataset used for reinforcement learning. The dataset format is jsonl. Each example in the dataset must have an `input_text` field that contains the prompt. + prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512. + target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64. + sampling_strategy: This field specifies the sampling strategy. The valid options are 'greedy' and 'temperature_sampling'. + instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field. + project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used. + location: Location used to run custom jobs. If not specified the location used to run the pipeline will be used. Returns: Cloud storage path to output predictions. """ + # fmt: on prompt_column = 'input_text' machine_spec = function_based.resolve_machine_spec( location=location, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index 175ea233b5..eef19e6d96 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -55,70 +55,32 @@ def rlhf_pipeline( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, ) -> PipelineOutput: + # fmt: off """Performs reinforcement learning from human feedback. Args: - prompt_dataset: Cloud storage path to an unlabled prompt dataset used for - reinforcement learning. The dataset format is jsonl. Each example in the - dataset must have an `input_text` field that contains the prompt. - preference_dataset: Cloud storage path to a human preference dataset used to - train a reward model. The dataset format is jsonl. Each example in the - dataset must contain the following fields: `input_text` that contains the - prompt, `candidate_0` and `candidate_1` that contain candidate responses, - `choice` that specifies the preferred candidate. - large_model_reference: Name of the base model. Supported values are - `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. - `text-bison@001` and `t5-small` are supported in `us-central1` and - `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in - `europe-west4`. - model_display_name: Name of the fine-tuned model shown in the Model - Registry. If not provided, a default name will be created. - prompt_sequence_length: Maximum tokenized sequence length for input text. - Higher values increase memory overhead. This value should be at most 8192. - Default value is 512. - target_sequence_length: Maximum tokenized sequence length for target text. - Higher values increase memory overhead. This value should be at most 1024. - Default value is 64. - reward_model_learning_rate_multiplier: Constant used to adjust the base - learning rate used when training a reward model. Multiply by a number > 1 - to increase the magnitude of updates applied at each training step or - multiply by a number < 1 to decrease the magnitude of updates. Default - value is 1.0. - reinforcement_learning_rate_multiplier: Constant used to adjust the base - learning rate used during reinforcement learning. Multiply by a number > 1 - to increase the magnitude of updates applied at each training step or - multiply by a number < 1 to decrease the magnitude of updates. Default - value is 1.0. - reward_model_train_steps: Number of steps to use when training a reward - model. Default value is 1000. - reinforcement_learning_train_steps: Number of reinforcement learning steps - to perform when tuning a base model. Default value is 1000. - kl_coeff: Coefficient for KL penalty. This regularizes the policy model and - penalizes if it diverges from its initial distribution. If set to 0, the - reference language model is not loaded into memory. Default value is 0.1. - instruction: This field lets the model know what task it needs to perform. - Base models have been trained over a large set of varied instructions. You - can give a simple and intuitive description of the task and the model will - follow it, e.g. "Classify this movie review as positive or negative" or - "Translate this sentence to Danish". Do not specify this if your dataset - already prepends the instruction to the inputs field. - deploy_model: Whether to deploy the model to an endpoint in `us-central1`. - Default is True. - eval_dataset: Optional Cloud storage path to an evaluation dataset. If - provided, inference will be performed on this dataset after training. The - dataset format is jsonl. Each example in the dataset must contain a field - `input_text` that contains the prompt. - project: Project used to run custom jobs. If not specified the project used - to run the pipeline will be used. - location: Location used to run custom jobs. If not specified the location - used to run the pipeline will be used. + prompt_dataset: Cloud storage path to an unlabled prompt dataset used for reinforcement learning. The dataset format is jsonl. Each example in the dataset must have an `input_text` field that contains the prompt. + preference_dataset: Cloud storage path to a human preference dataset used to train a reward model. The dataset format is jsonl. Each example in the dataset must contain the following fields: `input_text` that contains the prompt, `candidate_0` and `candidate_1` that contain candidate responses, `choice` that specifies the preferred candidate. + large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`. + model_display_name: Name of the fine-tuned model shown in the Model Registry. If not provided, a default name will be created. + prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512. + target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64. + reward_model_learning_rate_multiplier: Constant used to adjust the base learning rate used when training a reward model. Multiply by a number > 1 to increase the magnitude of updates applied at each training step or multiply by a number < 1 to decrease the magnitude of updates. Default value is 1.0. + reinforcement_learning_rate_multiplier: Constant used to adjust the base learning rate used during reinforcement learning. Multiply by a number > 1 to increase the magnitude of updates applied at each training step or multiply by a number < 1 to decrease the magnitude of updates. Default value is 1.0. + reward_model_train_steps: Number of steps to use when training a reward model. Default value is 1000. + reinforcement_learning_train_steps: Number of reinforcement learning steps to perform when tuning a base model. Default value is 1000. + kl_coeff: Coefficient for KL penalty. This regularizes the policy model and penalizes if it diverges from its initial distribution. If set to 0, the reference language model is not loaded into memory. Default value is 0.1. + instruction: This field lets the model know what task it needs to perform. Base models have been trained over a large set of varied instructions. You can give a simple and intuitive description of the task and the model will follow it, e.g. "Classify this movie review as positive or negative" or "Translate this sentence to Danish". Do not specify this if your dataset already prepends the instruction to the inputs field. + deploy_model: Whether to deploy the model to an endpoint in `us-central1`. Default is True. + eval_dataset: Optional Cloud storage path to an evaluation dataset. If provided, inference will be performed on this dataset after training. The dataset format is jsonl. Each example in the dataset must contain a field `input_text` that contains the prompt. + project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used. + location: Location used to run custom jobs. If not specified the location used to run the pipeline will be used. Returns: - model_resource_name: Path to the model uploaded to the Model Registry. This - will be an empty string if the model was not deployed. - endpoint_resource_name: Path the Online Prediction Endpoint. This will be an - empty string if the model was not deployed. + model_resource_name: Path to the model uploaded to the Model Registry. This will be an empty string if the model was not deployed. + endpoint_resource_name: Path the Online Prediction Endpoint. This will be an empty string if the model was not deployed. """ + # fmt: on policy_model_lora_dim = 1 reward_model_lora_dim = 0 batch_size = 64 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py index c3d2d6f7ae..80f813400a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/data_bias_component.py @@ -51,56 +51,24 @@ def detect_data_bias( Args: location: Location for running data bias detection. - target_field_name: The full name path of the features target field - in the predictions file. Formatted to be able to find nested columns, - delimited by `.`. Alternatively referred to as the ground truth (or - ground_truth_column) field. - bias_configs: A list of - `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. - When provided, compute data bias metrics for each defined slice. Below - is an example of how to format this input. - 1: First, create a BiasConfig. - `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` + target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. + bias_configs: A list of `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. When provided, compute data bias metrics for each defined slice. Below is an example of how to format this input. - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` + 1: First, create a BiasConfig. `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` + 2: Create a list to store the bias configs into. `bias_configs = []` + 3: Format each BiasConfig into a JSON or Dict. `bias_config_json = json_format.MessageToJson(bias_config` or `bias_config_dict = json_format.MessageToDict(bias_config).` + 4: Combine each bias_config JSON into a list. `bias_configs.append(bias_config_json)` + 5: Finally, pass bias_configs as an parameter for this component. `DetectDataBiasOp(bias_configs=bias_configs)` - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - - `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ - 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` - 2: Create a list to store the bias configs into. - `bias_configs = []` - 3: Format each BiasConfig into a JSON or Dict. - `bias_config_json = json_format.MessageToJson(bias_config` or - `bias_config_dict = json_format.MessageToDict(bias_config).` - 4: Combine each bias_config JSON into a list. - `bias_configs.append(bias_config_json)` - 5: Finally, pass bias_configs as an parameter for this component. - `DetectDataBiasOp(bias_configs=bias_configs)` - dataset_format: The file format for the dataset. `jsonl` and `csv` are the - currently allowed formats. - dataset_storage_source_uris: Google Cloud - Storage URI(-s) to unmanaged test datasets.`jsonl` and `csv` is currently - allowed format. If `dataset` is also provided, this field will be - overriden by the provided Vertex Dataset. - dataset: A `google.VertexDataset` - artifact of the dataset. If `dataset_gcs_source` is also provided, this - Vertex Dataset argument will override the GCS source. - encryption_spec_key_name: Customer-managed encryption key options for the - Dataflow. If this is set, then all resources created by the Dataflow - will be encrypted with the provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. + dataset_format: The file format for the dataset. `jsonl` and `csv` are the currently allowed formats. + dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged test datasets.`jsonl` and `csv` is currently allowed format. If `dataset` is also provided, this field will be overriden by the provided Vertex Dataset. + dataset: A `google.VertexDataset` artifact of the dataset. If `dataset_gcs_source` is also provided, this Vertex Dataset argument will override the GCS source. + encryption_spec_key_name: Customer-managed encryption key options for the Dataflow. If this is set, then all resources created by the Dataflow will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. Returns: - data_bias_metrics: - Artifact tracking the data bias detection output. - gcp_resources: - Serialized gcp_resources proto tracking the Dataflow job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + data_bias_metrics: Artifact tracking the data bias detection output. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index 4461332987..da5888bd42 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -53,83 +53,36 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default evaluation_metrics=ClassificationMetrics, evaluation_resource_name=str, ): + # fmt: off """The LLM Text Classification Evaluation pipeline. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. - model_name: The Model name used to run evaluation. Must be a publisher Model - or a managed Model sharing the same ancestor location. Starting this job - has no impact on any existing deployments of the Model and their - resources. - evaluation_task: The task that the large language model will be evaluated - on. The evaluation component computes a set of metrics relevant to that - specific task. Currently supported Classification tasks is: - `text-classification`. - evaluation_class_labels: The JSON array of class names for the target_field, - in the same order they appear in the batch predictions input file. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - machine_type: The machine type of the custom jobs in this pipeline. If not - set, defaulted to `e2-highmem-16`. More details: - https://cloud.google.com/compute/docs/machine-resource - service_account: Sets the default service account for workload run-as - account. The service account running the pipeline - (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - submitting jobs must have act-as permission on this run-as account. If - unspecified, the Vertex AI Custom Code Service - Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - network: The full name of the Compute Engine network to which the job should - be peered. For example, `projects/12345/global/networks/myVPC`. Format is - of the form `projects/{project}/global/networks/{network}`. Where - `{project}` is a project number, as in `12345`, and `{network}` is a - network name, as in `myVPC`. To specify this field, you must have already - configured VPC Network Peering for Vertex AI - (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left - unspecified, the job is not peered with any network. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. + model_name: The Model name used to run evaluation. Must be a publisher Model or a managed Model sharing the same ancestor location. Starting this job has no impact on any existing deployments of the Model and their resources. + evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that specific task. Currently supported Classification tasks is: `text-classification`. + evaluation_class_labels: The JSON array of class names for the target_field, in the same order they appear in the batch predictions input file. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + machine_type: The machine type of the custom jobs in this pipeline. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name, as in `myVPC`. To specify this field, you must have already configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_disk_size_gb: The disk size (in GB) of the machine executing the - evaluation run. If not set, defaulted to `50`. - dataflow_max_num_workers: The max number of workers executing the evaluation - run. If not set, defaulted to `5`. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. If not set, defaulted to `50`. + dataflow_max_num_workers: The max number of workers executing the evaluation run. If not set, defaulted to `5`. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. Returns: - evaluation_metrics: ClassificationMetrics Artifact for LLM Text - Classification. - evaluation_resource_name: If run on an user's managed VertexModel, the - imported evaluation resource name. Empty if run on a publisher model. + evaluation_metrics: ClassificationMetrics Artifact for LLM Text Classification. + evaluation_resource_name: If run on an user's managed VertexModel, the imported evaluation resource name. Empty if run on a publisher model. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index d93487ade3..3227664200 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -42,71 +42,31 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul ) -> NamedTuple( 'outputs', evaluation_metrics=Metrics, evaluation_resource_name=str ): + # fmt: off """LLM Text Generation Evaluation pipeline. This pipeline supports evaluating large language models, publisher or managed - models, performing the following generative tasks: `summarization`, - `question-answering`, and `text-generation`. + models, performing the following generative tasks: `summarization`, `question-answering`, and `text-generation`. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. - model_name: The Model name used to run evaluation. Must be a publisher Model - or a managed Model sharing the same ancestor location. Starting this job - has no impact on any existing deployments of the Model and their - resources. - evaluation_task: The task that the large language model will be evaluated - on. The evaluation component computes a set of metrics relevant to that - specific task. Currently supported tasks are: `summarization`, - `question-answering`, `text-generation`. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. Only "jsonl" is - currently supported. For more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. - Only "jsonl" is currently supported. For more details about this output - config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - machine_type: The machine type of this custom job. If not set, defaulted to - `e2-highmem-16`. More details: - https://cloud.google.com/compute/docs/machine-resource - service_account: Sets the default service account for workload run-as - account. The service account running the pipeline - (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - submitting jobs must have act-as permission on this run-as account. If - unspecified, the Vertex AI Custom Code Service - Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - network: The full name of the Compute Engine network to which the job should - be peered. For example, `projects/12345/global/networks/myVPC`. Format is - of the form `projects/{project}/global/networks/{network}`. Where - `{project}` is a project number, as in `12345`, and `{network}` is a - network name, as in `myVPC`. To specify this field, you must have already - configured VPC Network Peering for Vertex AI - (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left - unspecified, the job is not peered with any network. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. + model_name: The Model name used to run evaluation. Must be a publisher Model or a managed Model sharing the same ancestor location. Starting this job has no impact on any existing deployments of the Model and their resources. + evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that specific task. Currently supported tasks are: `summarization`, `question-answering`, `text-generation`. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. Only "jsonl" is currently supported. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. Only "jsonl" is currently supported. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name, as in `myVPC`. To specify this field, you must have already configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. Returns: evaluation_metrics: Metrics Artifact for LLM Text Generation. - evaluation_resource_name: If run on an user's managed VertexModel, the - imported evaluation resource name. Empty if run on a publisher model. + evaluation_resource_name: If run on an user's managed VertexModel, the imported evaluation resource name. Empty if run on a publisher model. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=Metrics, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py index e0f210f6ee..2081b7a4dc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_component.py @@ -58,53 +58,26 @@ def feature_attribution( possible, typically possible for AutoML Classification models. Args: - location: Location running feature attribution. If not - set, defaulted to `us-central1`. - problem_type: Problem type of the pipeline: one of `classification`, - `regression` and `forecasting`. - predictions_format: The file format for the batch - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`. - predictions_gcs_source: An artifact with its - URI pointing toward a GCS directory with prediction or explanation files - to be used for this evaluation. For prediction results, the files should - be named "prediction.results-*" or "predictions_". For explanation - results, the files should be named "explanation.results-*". - predictions_bigquery_source: BigQuery table - with prediction or explanation data to be used for this evaluation. For - prediction results, the table column should be named "predicted_*". - dataflow_service_account: Service account to run the - dataflow job. If not set, dataflow will use the default worker service - account. For more details, see - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account - dataflow_disk_size_gb: The disk size (in GB) of the machine - executing the evaluation run. If not set, defaulted to `50`. - dataflow_machine_type: The machine type executing the - evaluation run. If not set, defaulted to `n1-standard-4`. - dataflow_workers_num: The number of workers executing the - evaluation run. If not set, defaulted to `10`. - dataflow_max_workers_num: The max number of workers - executing the evaluation run. If not set, defaulted to `25`. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - encryption_spec_key_name: Customer-managed encryption key - for the Dataflow job. If this is set, then all resources created by the - Dataflow job will be encrypted with the provided encryption key. - force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` - and `Dataflow`. + location: Location running feature attribution. If not set, defaulted to `us-central1`. + problem_type: Problem type of the pipeline: one of `classification`, `regression` and `forecasting`. + predictions_format: The file format for the batch prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`. + predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named "prediction.results-*" or "predictions_". For explanation results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". + dataflow_service_account: Service account to run the dataflow job. If not set, dataflow will use the default worker service account. For more details, see https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. If not set, defaulted to `50`. + dataflow_machine_type: The machine type executing the evaluation run. If not set, defaulted to `n1-standard-4`. + dataflow_workers_num: The number of workers executing the evaluation run. If not set, defaulted to `10`. + dataflow_max_workers_num: The max number of workers executing the evaluation run. If not set, defaulted to `25`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key for the Dataflow job. If this is set, then all resources created by the Dataflow job will be encrypted with the provided encryption key. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` and `Dataflow`. project: Project to run feature attribution container. Defaults to the project in which the PipelineJob is run. Returns: - gcs_output_directory: JsonArray of the downsampled dataset GCS - output. - bigquery_output_table: String of the downsampled dataset BigQuery - output. - gcp_resources: Serialized gcp_resources proto tracking the dataflow - job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcs_output_directory: JsonArray of the downsampled dataset GCS output. + bigquery_output_table: String of the downsampled dataset BigQuery output. + gcp_resources: Serialized gcp_resources proto tracking the dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py index 610d041eed..5e587d9216 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/feature_attribution_graph_component.py @@ -51,6 +51,7 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics): + # fmt: off """A pipeline to compute feature attributions by sampling data for batch explanations. This pipeline guarantees support for AutoML Tabular models that contain a @@ -58,127 +59,36 @@ def feature_attribution_graph_component( # pylint: disable=dangerous-default-va Args: location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification", "regression", or "forecasting". + prediction_type: The type of prediction the model is to produce. "classification", "regression", or "forecasting". vertex_model: The Vertex model artifact used for batch explanation. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. Returns: A system.Metrics artifact with feature attributions. """ + # fmt: øn outputs = NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics) # Sample the input dataset for a quicker batch explanation. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py index d069656f81..5489e91cc1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/model_bias_component.py @@ -50,59 +50,25 @@ def detect_model_bias( Args: location: Location for running data bias detection. - target_field_name: The full name path of the features target field in the - predictions file. Formatted to be able to find nested columns, delimited - by `.`. Alternatively referred to as the ground truth (or - ground_truth_column) field. - predictions_format: The file format for the batch prediction results. - `jsonl` is the only currently allow format. - predictions_gcs_source: An artifact with its URI pointing toward a GCS - directory with prediction or explanation files to be used for this - evaluation. For prediction results, the files should be named - "prediction.results-*". For explanation results, the files should be - named "explanation.results-*". - predictions_bigquery_source: BigQuery table with prediction or explanation - data to be used for this evaluation. For prediction results, the table - column should be named "predicted_*". - bias_configs: A list of - `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. - When provided, compute model bias metrics for each defined slice. Below - is an example of how to format this input. - 1: First, create a BiasConfig. - `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` + target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. + predictions_format: The file format for the batch prediction results. `jsonl` is the only currently allow format. + predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named "prediction.results-*". For explanation results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". + bias_configs: A list of `google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig`. When provided, compute model bias metrics for each defined slice. Below is an example of how to format this input. - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` + 1: First, create a BiasConfig. `from google.cloud.aiplatform_v1beta1.types.ModelEvaluation import BiasConfig` `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` + 2: Create a list to store the bias configs into. `bias_configs = []` + 3: Format each BiasConfig into a JSON or Dict. `bias_config_json = json_format.MessageToJson(bias_config` or `bias_config_dict = json_format.MessageToDict(bias_config)` + 4: Combine each bias_config JSON into a list. `bias_configs.append(bias_config_json)` + 5: Finally, pass bias_configs as an parameter for this component. `DetectModelBiasOp(bias_configs=bias_configs)` - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - - `bias_config = BiasConfig(bias_slices=SliceSpec(configs={ - 'feature_a': SliceConfig(SliceSpec.Value(string_value= 'label_a') ) }))` - 2: Create a list to store the bias configs into. - `bias_configs = []` - 3: Format each BiasConfig into a JSON or Dict. - `bias_config_json = json_format.MessageToJson(bias_config` or - `bias_config_dict = json_format.MessageToDict(bias_config)` - 4: Combine each bias_config JSON into a list. - `bias_configs.append(bias_config_json)` - 5: Finally, pass bias_configs as an parameter for this component. - `DetectModelBiasOp(bias_configs=bias_configs)` - thresholds: A list of float values to be used as prediction decision - thresholds. - encryption_spec_key_name: Customer-managed encryption key options for the - Dataflow. If this is set, then all resources created by the Dataflow - will be encrypted with the provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. + thresholds: A list of float values to be used as prediction decision thresholds. + encryption_spec_key_name: Customer-managed encryption key options for the Dataflow. If this is set, then all resources created by the Dataflow will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to run data bias detection. Defaults to the project in which the PipelineJob is run. Returns: - bias_model_metrics: - Artifact tracking the model bias detection output. - gcp_resources: - Serialized gcp_resources proto tracking the Dataflow job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + bias_model_metrics: Artifact tracking the model bias detection output. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py index af413ef3ec..d235543250 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/utils.py @@ -34,8 +34,7 @@ def create_slice_specs_list( ModelEvaluationSlice.Slice.SliceSpec.Value; a bool for `all_values` or a list for ModelEvaluationSlice.Slice.SliceSpec.Range. - Returns: - A list of ModelEvaluationSlice.Slice.SliceSpec proto. + Returns: A list of ModelEvaluationSlice.Slice.SliceSpec proto. Raises: ValueError: if the format of a feature's value is invalid. @@ -97,12 +96,11 @@ def create_bias_configs_list( slice_b: `list_of_slice_a_and_slice_b = [[{'education': 'low'}, {'education': 'high'}]]`. - Returns: - A list of BiasConfig. + Returns: A list of BiasConfig. Raises: ValueError: if a feature's value is `all_values` or the format of the - feature's value is invalid. + feature's value is invalid. """ bias_configs_list = [] for slice_a_and_slice_b in list_of_slice_a_and_slice_b: diff --git a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py index 4d770a98c2..9aef1f37d6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py +++ b/components/google-cloud/google_cloud_pipeline_components/types/artifact_types.py @@ -63,22 +63,18 @@ def create( uri: str, model_resource_name: str, ) -> 'VertexModel': + # fmt: off """Create a VertexModel artifact instance. Args: name: The artifact name. - uri: the Vertex Model resource uri, in a form of - https://{service-endpoint}/v1/projects/{project}/locations/{location}/models/{model}, - where {service-endpoint} is one of the supported service endpoints at - https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints - model_resource_name: The name of the Model resource, in a form of - projects/{project}/locations/{location}/models/{model}. For more - details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/get + uri: the Vertex Model resource uri, in a form of https://{service-endpoint}/v1/projects/{project}/locations/{location}/models/{model}, where {service-endpoint} is one of the supported service endpoints at https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints + model_resource_name: The name of the Model resource, in a form of projects/{project}/locations/{location}/models/{model}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/get Returns: VertexModel instance. """ + # fmt: on return cls( name=name, uri=uri, @@ -104,22 +100,18 @@ def create( uri: str, endpoint_resource_name: str, ) -> 'VertexEndpoint': + # fmt: off """Create a VertexEndpoint artifact instance. Args: name: The artifact name. - uri: the Vertex Endpoint resource uri, in a form of - https://{service-endpoint}/v1/projects/{project}/locations/{location}/endpoints/{endpoint}, - where {service-endpoint} is one of the supported service endpoints at - https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints - endpoint_resource_name: The name of the Endpoint resource, in a form of - projects/{project}/locations/{location}/endpoints/{endpoint}. For more - details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/get + uri: the Vertex Endpoint resource uri, in a form of https://{service-endpoint}/v1/projects/{project}/locations/{location}/endpoints/{endpoint}, where {service-endpoint} is one of the supported service endpoints at https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints + endpoint_resource_name: The name of the Endpoint resource, in a form of projects/{project}/locations/{location}/endpoints/{endpoint}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/get Returns: VertexEndpoint instance. """ + # fmt: on return cls( name=name, uri=uri, @@ -154,35 +146,21 @@ def create( bigquery_output_dataset: Optional[str] = None, gcs_output_directory: Optional[str] = None, ) -> 'VertexBatchPredictionJob': + # fmt: off """Create a VertexBatchPredictionJob artifact instance. Args: name: The artifact name. - uri: the Vertex Batch Prediction resource uri, in a form of - https://{service-endpoint}/v1/projects/{project}/locations/{location}/batchPredictionJobs/{batchPredictionJob}, - where {service-endpoint} is one of the supported service endpoints at - https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints - job_resource_name: The name of the batch prediction job resource, in a - form of - projects/{project}/locations/{location}/batchPredictionJobs/{batchPredictionJob}. - For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/get - bigquery_output_table: The name of the BigQuery table created, in - predictions_ format, into which the prediction output is - written. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo - bigquery_output_dataset: The path of the BigQuery dataset created, in - bq://projectId.bqDatasetId format, into which the prediction output is - written. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo - gcs_output_directory: The full path of the Cloud Storage directory - created, into which the prediction output is written. For more details, - see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo + uri: the Vertex Batch Prediction resource uri, in a form of https://{service-endpoint}/v1/projects/{project}/locations/{location}/batchPredictionJobs/{batchPredictionJob}, where {service-endpoint} is one of the supported service endpoints at https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints + job_resource_name: The name of the batch prediction job resource, in a form of projects/{project}/locations/{location}/batchPredictionJobs/{batchPredictionJob}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/get + bigquery_output_table: The name of the BigQuery table created, in predictions_ format, into which the prediction output is written. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo + bigquery_output_dataset: The path of the BigQuery dataset created, in bq://projectId.bqDatasetId format, into which the prediction output is written. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo + gcs_output_directory: The full path of the Cloud Storage directory created, into which the prediction output is written. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#outputinfo Returns: VertexBatchPredictionJob instance. """ + # fmt: on return cls( name=name, uri=uri, @@ -213,18 +191,13 @@ def create( uri: str, dataset_resource_name: str, ) -> 'VertexDataset': + # fmt: off """Create a VertexDataset artifact instance. Args: name: The artifact name. - uri: the Vertex Dataset resource uri, in a form of - https://{service-endpoint}/v1/projects/{project}/locations/{location}/datasets/{datasets_name}, - where {service-endpoint} is one of the supported service endpoints at - https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints - dataset_resource_name: The name of the Dataset resource, in a form of - projects/{project}/locations/{location}/datasets/{datasets_name}. For - more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets/get + uri: the Vertex Dataset resource uri, in a form of https://{service-endpoint}/v1/projects/{project}/locations/{location}/datasets/{datasets_name}, where {service-endpoint} is one of the supported service endpoints at https://cloud.google.com/vertex-ai/docs/reference/rest#rest_endpoints + dataset_resource_name: The name of the Dataset resource, in a form of projects/{project}/locations/{location}/datasets/{datasets_name}. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets/get Returns: VertexDataset instance. @@ -259,18 +232,19 @@ def create( dataset_id: str, model_id: str, ) -> 'BQMLModel': + # fmt: off """Create a BQMLModel artifact instance. Args: name: The artifact name. project_id: The ID of the project containing this model. dataset_id: The ID of the dataset containing this model. - model_id: The ID of the model. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/models#ModelReference + model_id: The ID of the model. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/models#ModelReference Returns: BQMLModel instance. """ + # fmt: on return cls( name=name, uri=f'https://www.googleapis.com/bigquery/v2/projects/{project_id}/datasets/{dataset_id}/models/{model_id}', @@ -307,18 +281,19 @@ def create( dataset_id: str, table_id: str, ) -> 'BQTable': + # fmt: off """Create a BQTable artifact instance. Args: name: The artifact name. project_id: The ID of the project containing this table. dataset_id: The ID of the dataset containing this table. - table_id: The ID of the table. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/TableReference + table_id: The ID of the table. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/TableReference Returns: BQTable instance. """ + # fmt: on return cls( name=name, uri=f'https://www.googleapis.com/bigquery/v2/projects/{project_id}/datasets/{dataset_id}/tables/{table_id}', @@ -387,21 +362,17 @@ def create( predict_schemata: Dict[str, str], container_spec: Dict[str, Any], ) -> 'UnmanagedContainerModel': + # fmt: off """Create a UnmanagedContainerModel artifact instance. Args: - predict_schemata: Contains the schemata used in Model's predictions and - explanations via PredictionService.Predict, PredictionService.Explain - and BatchPredictionJob. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/PredictSchemata - container_spec: Specification of a container for serving predictions. Some - fields in this message correspond to fields in the Kubernetes Container - v1 core specification. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ModelContainerSpec + predict_schemata: Contains the schemata used in Model's predictions and explanations via PredictionService.Predict, PredictionService.Explain and BatchPredictionJob. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/PredictSchemata + container_spec: Specification of a container for serving predictions. Some fields in this message correspond to fields in the Kubernetes Container v1 core specification. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ModelContainerSpec Returns: UnmanagedContainerModel instance. """ + # fmt: on return cls( metadata={ 'predictSchemata': predict_schemata, @@ -421,10 +392,7 @@ class ClassificationMetrics(dsl.Artifact): properties: aggregationType: type: string - enum: - - AGGREGATION_TYPE_UNSPECIFIED - - MACRO_AVERAGE - - MICRO_AVERAGE + enum: - AGGREGATION_TYPE_UNSPECIFIED - MACRO_AVERAGE - MICRO_AVERAGE aggregationThreshold: type: number format: float @@ -550,6 +518,7 @@ def create( au_roc: Optional[float] = None, log_loss: Optional[float] = None, ) -> 'ClassificationMetrics': + # fmt: off """Create a ClassificationMetrics artifact instance. Args: @@ -565,6 +534,7 @@ def create( Returns: ClassificationMetrics instance. """ + # fmt: on metadata = {} if recall is not None: metadata['recall'] = recall @@ -621,6 +591,7 @@ def create( r_squared: Optional[float] = None, root_mean_squared_log_error: Optional[float] = None, ) -> 'RegressionMetrics': + # fmt: off """Create a RegressionMetrics artifact instance. Args: @@ -628,13 +599,13 @@ def create( root_mean_squared_error: Root Mean Squared Error (RMSE). mean_absolute_error: Mean Absolute Error (MAE). mean_absolute_percentage_error: Mean absolute percentage error. - r_squared: Coefficient of determination as Pearson correlation - coefficient. + r_squared: Coefficient of determination as Pearson correlation coefficient. root_mean_squared_log_error: Root mean squared log error. Returns: RegressionMetrics instance. """ + # fmt: on metadata = {} if root_mean_squared_error is not None: metadata['rootMeanSquaredError'] = root_mean_squared_error @@ -713,6 +684,7 @@ def create( root_mean_squared_percentage_error: Optional[float] = None, symmetric_mean_absolute_percentage_error: Optional[float] = None, ) -> 'ForecastingMetrics': + # fmt: off """Create a ForecastingMetrics artifact instance. Args: @@ -720,21 +692,16 @@ def create( root_mean_squared_error: Root Mean Squared Error (RMSE). mean_absolute_error: Mean Absolute Error (MAE). mean_absolute_percentage_error: Mean absolute percentage error. - r_squared: Coefficient of determination as Pearson correlation - coefficient. + r_squared: Coefficient of determination as Pearson correlation coefficient. root_mean_squared_log_error: Root mean squared log error. - weighted_absolute_percentage_error: Weighted Absolute Percentage Error. - Does not use weights, this is just what the metric is called. Undefined - if actual values sum to zero. Will be very large if actual values sum to - a very small number. - root_mean_squared_percentage_error: Root Mean Square Percentage Error. - Square root of MSPE. Undefined/imaginary when MSPE is negative. - symmetric_mean_absolute_percentage_error: Symmetric Mean Absolute - Percentage Error. + weighted_absolute_percentage_error: Weighted Absolute Percentage Error. Does not use weights, this is just what the metric is called. Undefined if actual values sum to zero. Will be very large if actual values sum to a very small number. + root_mean_squared_percentage_error: Root Mean Square Percentage Error. Square root of MSPE. Undefined/imaginary when MSPE is negative. + symmetric_mean_absolute_percentage_error: Symmetric Mean Absolute Percentage Error. Returns: ForecastingMetrics instance. """ + # fmt: on metadata = {} if root_mean_squared_error is not None: metadata['rootMeanSquaredError'] = root_mean_squared_error diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py index 6dbcd85caf..09583f8b5b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """GA AutoML forecasting components.""" from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py index 6b91061171..3844900ff8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -11,12 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Prophet trainer component spec.""" from typing import Optional -from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel +from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel from kfp import dsl from kfp.dsl import Artifact from kfp.dsl import Output @@ -56,48 +55,26 @@ def prophet_trainer( project: The GCP project that runs the pipeline components. location: The GCP region for Vertex AI. root_dir: The Cloud Storage location to store the output. - time_column: Name of the column that identifies time order in the - time series. - time_series_identifier_column: Name of the column that identifies - the time series. - target_column: Name of the column that the model is to predict - values for. - forecast_horizon: The number of time periods into the future for - which forecasts will be created. Future periods start after the latest - timestamp for each time series. - optimization_objective: Optimization objective for tuning. Supported - metrics come from Prophet's performance_metrics function. These are mse, - rmse, mae, mape, mdape, smape, and coverage. - data_granularity_unit: String representing the units of time for the - time column. - predefined_split_column: The predefined_split column name. A string - that represents a list of comma separated CSV filenames. - source_bigquery_uri: The BigQuery table path of format - bq (str)://bq_project.bq_dataset.bq_table - window_column: Name of the column that should be used to filter - input rows. The column should contain either booleans or string - booleans; if the value of the row is True, generate a sliding window - from that row. - max_num_trials: Maximum number of tuning trials to perform - per time series. There are up to 100 possible combinations to explore - for each time series. Recommended values to try are 3, 6, and 24. + time_column: Name of the column that identifies time order in the time series. + time_series_identifier_column: Name of the column that identifies the time series. + target_column: Name of the column that the model is to predict values for. + forecast_horizon: The number of time periods into the future for which forecasts will be created. Future periods start after the latest timestamp for each time series. + optimization_objective: Optimization objective for tuning. Supported metrics come from Prophet's performance_metrics function. These are mse, rmse, mae, mape, mdape, smape, and coverage. + data_granularity_unit: String representing the units of time for the time column. + predefined_split_column: The predefined_split column name. A string that represents a list of comma separated CSV filenames. + source_bigquery_uri: The BigQuery table path of format bq (str)://bq_project.bq_dataset.bq_table + window_column: Name of the column that should be used to filter input rows. The column should contain either booleans or string booleans; if the value of the row is True, generate a sliding window from that row. + max_num_trials: Maximum number of tuning trials to perform per time series. There are up to 100 possible combinations to explore for each time series. Recommended values to try are 3, 6, and 24. encryption_spec_key_name: Customer-managed encryption key. - dataflow_machine_type: The dataflow machine type used for - training. - dataflow_max_num_workers: The max number of Dataflow - workers used for training. - dataflow_disk_size_gb: Dataflow worker's disk size in GB - during training. - dataflow_service_account: Custom service account to run - dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. + dataflow_machine_type: The dataflow machine type used for training. + dataflow_max_num_workers: The max number of Dataflow workers used for training. + dataflow_disk_size_gb: Dataflow worker's disk size in GB during training. + dataflow_service_account: Custom service account to run dataflow jobs. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. Returns: - gcp_resources: Serialized gcp_resources proto tracking the custom training - job. + gcp_resources: Serialized gcp_resources proto tracking the custom training job. unmanaged_container_model: The UnmanagedContainerModel artifact. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py index b69d5430a5..0e025ff059 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py @@ -76,8 +76,7 @@ def get_bqml_arima_train_pipeline_and_parameters( but also the longest training runtime. run_evaluation: Whether to run evaluation steps during training. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -134,8 +133,7 @@ def get_bqml_arima_predict_pipeline_and_parameters( results. This will cause the batch prediction output to include explanations. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -237,8 +235,7 @@ def get_prophet_train_pipeline_and_parameters( addresses. run_evaluation: Whether to run evaluation steps during training. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -319,8 +316,7 @@ def get_prophet_prediction_pipeline_and_parameters( machine_type: The machine type used for batch prediction. max_num_workers: The max number of workers used for batch prediction. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py index 840cd055f5..0bda943e91 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """GA AutoML tabular components.""" import os diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py index 1c17a6fc8c..ffb9afb4a7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Tabular Cross Validation Trainer component spec.""" from typing import Optional @@ -49,17 +48,12 @@ def automl_tabular_cv_trainer( project: Project to run Cross-validation trainer. location: Location for running the Cross-validation trainer. root_dir: The Cloud Storage location to store the output. - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}] deadline_hours: Number of hours the cross-validation trainer should run. num_parallel_trials: Number of parallel training trials. single_run_max_secs: Max number of seconds each training trial runs. - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - num_selected_features: Number of selected features. The number of - features to learn in the NN models. + num_selected_trials: Number of selected trials. The number of weak learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of features to learn in the NN models. transform_output: The transform output artifact. metadata: The tabular example gen metadata. materialized_cv_splits: The materialized cross-validation splits. @@ -68,8 +62,7 @@ def automl_tabular_cv_trainer( Returns: tuning_result_output: The trained model and architectures. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. execution_metrics: Core metrics in dictionary of component execution. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py index 15d1a5463a..5bcd942ee9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Tabular Ensemble component spec.""" from typing import Optional @@ -56,21 +55,14 @@ def automl_tabular_ensemble( transform_output: The transform output artifact. metadata: The tabular example gen metadata. dataset_schema: The schema of the dataset. - tuning_result_input: AutoML Tabular tuning - result. - instance_baseline: The instance baseline - used to calculate explanations. - warmup_data: The warm up data. Ensemble component will save the - warm up data together with the model artifact, used to warm up the model - when prediction server starts. + tuning_result_input: AutoML Tabular tuning result. + instance_baseline: The instance baseline used to calculate explanations. + warmup_data: The warm up data. Ensemble component will save the warm up data together with the model artifact, used to warm up the model when prediction server starts. encryption_spec_key_name: Customer-managed encryption key. - export_additional_model_without_custom_ops: True if export - an additional model without custom TF operators to the - `model_without_custom_ops` output. + export_additional_model_without_custom_ops: True if export an additional model without custom TF operators to the `model_without_custom_ops` output. Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. model_architecture: The architecture of the output model. model: The output model. model_without_custom_ops: The output model without custom TF operators, this output will be empty unless `export_additional_model_without_custom_ops` is set. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py index c6ecce48be..6bd420b1a0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Pipeline Finalizer component spec.""" from typing import Optional @@ -37,8 +36,7 @@ def automl_tabular_finalizer( encryption_spec_key_name: Customer-managed encryption key. Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py index fd38a7e298..fd2fd96b10 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Infra Validator component spec.""" from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel @@ -27,8 +26,7 @@ def automl_tabular_infra_validator( """Validates the trained AutoML Tabular model is a valid model. Args: - unmanaged_container_model: google.UnmanagedContainerModel for model - to be validated. + unmanaged_container_model: google.UnmanagedContainerModel for model to be validated. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py index 8b8af9e135..8f9b8dca8c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Split Materialized Data component spec.""" from kfp import dsl diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py index 24a25fa05c..1664e75443 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Tabular Stage 1 Tuner component spec.""" from typing import Optional @@ -55,37 +54,25 @@ def automl_tabular_stage_1_tuner( project: Project to run Cross-validation trainer. location: Location for running the Cross-validation trainer. root_dir: The Cloud Storage location to store the output. - study_spec_parameters_override: JSON study spec. E.g., - [{"parameter_id": "model_type","categorical_value_spec": {"values": - ["nn"]}}] - worker_pool_specs_override_json: JSON worker pool specs. E.g., - [{"machine_spec": {"machine_type": - "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": - "n1-standard-16"}}] + study_spec_parameters_override: JSON study spec. E.g., [{"parameter_id": "model_type","categorical_value_spec": {"values": ["nn"]}}] + worker_pool_specs_override_json: JSON worker pool specs. E.g., [{"machine_spec": {"machine_type": "n1-standard-16"}},{},{},{"machine_spec": {"machine_type": "n1-standard-16"}}] reduce_search_space_mode: The reduce search space mode. Possible values: "regular" (default), "minimal", "full". - num_selected_trials: Number of selected trials. The number of weak - learners in the final model is 5 * num_selected_trials. - num_selected_features: Number of selected features. The number of - features to learn in the NN models. - deadline_hours: Number of hours the cross-validation trainer - should run. - disable_early_stopping: True if disable early stopping. Default - value is false. + num_selected_trials: Number of selected trials. The number of weak learners in the final model is 5 * num_selected_trials. + num_selected_features: Number of selected features. The number of features to learn in the NN models. + deadline_hours: Number of hours the cross-validation trainer should run. + disable_early_stopping: True if disable early stopping. Default value is false. num_parallel_trials: Number of parallel training trials. single_run_max_secs: Max number of seconds each training trial runs. metadata: The tabular example gen metadata. transform_output: The transform output artifact. - materialized_train_split: The materialized train - split. + materialized_train_split: The materialized train split. materialized_eval_split: The materialized eval split. encryption_spec_key_name: Customer-managed encryption key. - run_distillation: True if in distillation mode. The default value - is false. + run_distillation: True if in distillation mode. The default value is false. Returns: - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. tuning_result_output: The trained model and architectures. execution_metrics: Core metrics in dictionary of component execution. """ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py index 73ee939b0d..d3794ea3d4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Stats and Example Generation component spec.""" from typing import Optional @@ -71,62 +70,28 @@ def tabular_stats_and_example_gen( """Generates stats and training instances for tabular data. Args: - project: Project to run dataset statistics and example - generation. - location: Location for running dataset statistics and example - generation. + project: Project to run dataset statistics and example generation. + location: Location for running dataset statistics and example generation. root_dir: The Cloud Storage location to store the output. target_column_name: The target column name. weight_column_name: The weight column name. - prediction_type: The prediction type. Supported values: - "classification", "regression". - optimization_objective: Objective function the model is optimizing - towards. The training process creates a model that maximizes/minimizes - the value of the objective function over the validation set. The - supported optimization objectives depend on the prediction type. If the - field is not set, a default objective function is used. - classification: "maximize-au-roc" (default) - Maximize the - area under the receiver operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a - specified precision value. - classification (multi-class): "minimize-log-loss" (default) - Minimize - log loss. - regression: "minimize-rmse" (default) - Minimize root-mean-squared - error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - optimization_objective_recall_value: Required when - optimization_objective is "maximize-precision-at-recall". Must be - between 0 and 1, inclusive. - optimization_objective_precision_value: Required when - optimization_objective is "maximize-recall-at-precision". Must be - between 0 and 1, inclusive. - transformations: Quote escaped JSON string for transformations. Each - transformation will apply transform function to given input column. And - the result will be used for training. When creating transformation for - BigQuery Struct column, the column should be flattened using "." as the - delimiter. - transformations_path: Path to a GCS file containing JSON - string for transformations. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. + prediction_type: The prediction type. Supported values: "classification", "regression". + optimization_objective: Objective function the model is optimizing towards. The training process creates a model that maximizes/minimizes the value of the objective function over the validation set. The supported optimization objectives depend on the prediction type. If the field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall curve. "maximize-precision-at-recall" - Maximize precision for a specified recall value. "maximize-recall-at-precision" - Maximize recall for a specified precision value. classification (multi-class): "minimize-log-loss" (default) - Minimize log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when optimization_objective is "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + transformations: Quote escaped JSON string for transformations. Each transformation will apply transform function to given input column. And the result will be used for training. When creating transformation for BigQuery Struct column, the column should be flattened using "." as the delimiter. + transformations_path: Path to a GCS file containing JSON string for transformations. + dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More + details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + dataflow_service_account: Custom service account to run dataflow jobs. encryption_spec_key_name: Customer-managed encryption key. - run_distillation: True if in distillation mode. The default value - is false. + run_distillation: True if in distillation mode. The default value is false. Returns: dataset_schema: The schema of the dataset. @@ -138,8 +103,7 @@ def tabular_stats_and_example_gen( downsampled_test_split_json: The downsampled test split JSON object. instance_baseline: The instance baseline used to calculate explanations. metadata: The tabular example gen metadata. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py index 62fae23365..6ec30c3b44 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Training Configurator and Validator component spec.""" from typing import Optional @@ -58,85 +57,38 @@ def training_configurator_and_validator( """Configures training and validates data and user-input configurations. Args: - dataset_stats: Dataset stats generated by - feature transform engine. - split_example_counts: JSON string of data split example counts for - train, validate, and test splits. - training_schema_path: Schema of input data to the tf_model - at training time. - instance_schema: Schema of input data to the tf_model at - serving time. + dataset_stats: Dataset stats generated by feature transform engine. + split_example_counts: JSON string of data split example counts for train, validate, and test splits. + training_schema_path: Schema of input data to the tf_model at training time. + instance_schema: Schema of input data to the tf_model at serving time. target_column: Target column of input data. weight_column: Weight column of input data. - prediction_type: Model prediction type. One of "classification", - "regression", "time_series". - optimization_objective: Objective function the model is optimizing - towards. The training process creates a model that maximizes/minimizes - the value of the objective function over the validation set. The - supported optimization objectives depend on the prediction type. If the - field is not set, a default objective function is used. - classification: "maximize-au-roc" (default) - Maximize the - area under the receiver operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified - recall value. "maximize-recall-at-precision" - Maximize recall for a - specified precision value. - classification (multi-class): "minimize-log-loss" (default) - Minimize - log loss. - regression: "minimize-rmse" (default) - Minimize root-mean-squared - error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - optimization_objective_recall_value: Required when - optimization_objective is "maximize-precision-at-recall". Must be - between 0 and 1, inclusive. - optimization_objective_precision_value: Required when - optimization_objective is "maximize-recall-at-precision". Must be - between 0 and 1, inclusive. - run_evaluation: Whether we are running evaluation in the training - pipeline. - run_distill: Whether the distillation should be applied to the - training. - enable_probabilistic_inference: If probabilistic inference is - enabled, the model will fit a distribution that captures the uncertainty - of a prediction. At inference time, the predictive distribution is used - to make a point prediction that minimizes the optimization objective. - For example, the mean of a predictive distribution is the point - prediction that minimizes RMSE loss. If quantiles are specified, then - the quantiles of the distribution are also returned. - time_series_identifier_column: [Deprecated] The time series identifier - column. Used by forecasting only. Raises exception if used - - use the "time_series_identifier_column" field instead. - time_series_identifier_columns: The list of time series identifier columns. - Used by forecasting only. - time_column: The column that indicates the time. Used by forecasting - only. - time_series_attribute_columns: The column names of the time series - attributes. - available_at_forecast_columns: The names of the columns that are - available at forecast time. - unavailable_at_forecast_columns: The names of the columns that are - not available at forecast time. + prediction_type: Model prediction type. One of "classification", "regression", "time_series". + optimization_objective: Objective function the model is optimizing towards. The training process creates a model that maximizes/minimizes the value of the objective function over the validation set. The supported optimization objectives depend on the prediction type. If the field is not set, a default objective function is used. + classification: "maximize-au-roc" (default) - Maximize the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall curve. "maximize-precision-at-recall" - Maximize precision for a specified recall value. "maximize-recall-at-precision" - Maximize recall for a specified precision value. classification (multi-class): "minimize-log-loss" (default) - Minimize log loss. + regression: "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + optimization_objective_recall_value: Required when optimization_objective is "maximize-precision-at-recall". Must be between 0 and 1, inclusive. + optimization_objective_precision_value: Required when optimization_objective is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. + run_evaluation: Whether we are running evaluation in the training pipeline. + run_distill: Whether the distillation should be applied to the training. + enable_probabilistic_inference: If probabilistic inference is enabled, the model will fit a distribution that captures the uncertainty of a prediction. At inference time, the predictive distribution is used to make a point prediction that minimizes the optimization objective. For example, the mean of a predictive distribution is the point prediction that minimizes RMSE loss. If quantiles are specified, then the quantiles of the distribution are also returned. + time_series_identifier_column: [Deprecated] The time series identifier column. Used by forecasting only. Raises exception if used - use the "time_series_identifier_column" field instead. + time_series_identifier_columns: The list of time series identifier columns. Used by forecasting only. + time_column: The column that indicates the time. Used by forecasting only. + time_series_attribute_columns: The column names of the time series attributes. + available_at_forecast_columns: The names of the columns that are available at forecast time. + unavailable_at_forecast_columns: The names of the columns that are not available at forecast time. quantiles: All quantiles that the model need to predict. context_window: The length of the context window. forecast_horizon: The length of the forecast horizon. forecasting_model_type: The model types, e.g. l2l, seq2seq, tft. - forecasting_transformations: Dict mapping auto and/or type-resolutions to - feature columns. The supported types are auto, categorical, numeric, - text, and timestamp. - stage_1_deadline_hours: Stage 1 training budget in - hours. - stage_2_deadline_hours: Stage 2 training budget in - hours. - group_columns: A list of time series attribute column - names that define the time series hierarchy. - group_total_weight: The weight of the loss for - predictions aggregated over time series in the same group. - temporal_total_weight: The weight of the loss for - predictions aggregated over the horizon for a single time series. - group_temporal_total_weight: The weight of the loss for - predictions aggregated over both the horizon and time series in the same - hierarchy group. + forecasting_transformations: Dict mapping auto and/or type-resolutions to feature columns. The supported types are auto, categorical, numeric, text, and timestamp. + stage_1_deadline_hours: Stage 1 training budget in hours. + stage_2_deadline_hours: Stage 2 training budget in hours. + group_columns: A list of time series attribute column names that define the time series hierarchy. + group_total_weight: The weight of the loss for predictions aggregated over time series in the same group. + temporal_total_weight: The weight of the loss for predictions aggregated over the horizon for a single time series. + group_temporal_total_weight: The weight of the loss for predictions aggregated over both the horizon and time series in the same hierarchy group. Returns: metadata: The tabular example gen metadata. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py index 0bef5c4d66..8b1a366956 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """AutoML Transform component spec.""" from typing import Optional @@ -59,19 +58,13 @@ def automl_tabular_transform( train_split: The train split. eval_split: The eval split. test_split: The test split. - dataflow_machine_type: The machine type used for dataflow - jobs. If not set, default to n1-standard-16. - dataflow_max_num_workers: The number of workers to run the - dataflow job. If not set, default to 25. - dataflow_disk_size_gb: The disk size, in gigabytes, to use - on each Dataflow worker instance. If not set, default to 40. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More + dataflow_machine_type: The machine type used for dataflow jobs. If not set, default to n1-standard-16. + dataflow_max_num_workers: The number of workers to run the dataflow job. If not set, default to 25. + dataflow_disk_size_gb: The disk size, in gigabytes, to use on each Dataflow worker instance. If not set, default to 40. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - dataflow_service_account: Custom service account to run - dataflow jobs. + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + dataflow_service_account: Custom service account to run dataflow jobs. encryption_spec_key_name: Customer-managed encryption key. Returns: @@ -80,8 +73,7 @@ def automl_tabular_transform( materialized_eval_split: The materialized test split. training_schema_uri: The training schema. transform_output: The transform output artifact. - gcp_resources: GCP resources created by this component. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: GCP resources created by this component. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py index 6889db79ae..21221e5bd1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py @@ -135,10 +135,10 @@ def _get_default_pipeline_params( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -155,7 +155,7 @@ def _get_default_pipeline_params( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -209,8 +209,7 @@ def _get_default_pipeline_params( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ if not study_spec_parameters_override: study_spec_parameters_override = [] @@ -504,10 +503,10 @@ def get_automl_tabular_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -524,7 +523,7 @@ def get_automl_tabular_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -575,8 +574,7 @@ def get_automl_tabular_pipeline_and_parameters( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ parameter_values = _get_default_pipeline_params( project=project, @@ -668,8 +666,7 @@ def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: Args: input_dict: The input json dictionary. - Returns: - The encoded string used for parameter. + Returns: The encoded string used for parameter. """ if not input_dict: return '' @@ -742,10 +739,10 @@ def get_skip_evaluation_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -762,14 +759,13 @@ def get_skip_evaluation_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. additional_experiments: Use this field to config private preview features. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ return get_default_pipeline_and_parameters( project=project, @@ -884,10 +880,10 @@ def get_default_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -904,7 +900,7 @@ def get_default_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -931,8 +927,7 @@ def get_default_pipeline_and_parameters( distill_batch_predict_max_replica_count: The max number of prediction server for batch predict component in the model distillation. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'This method is deprecated,' @@ -1184,7 +1179,7 @@ def get_skip_architecture_search_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1201,7 +1196,7 @@ def get_skip_architecture_search_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1229,8 +1224,7 @@ def get_skip_architecture_search_pipeline_and_parameters( evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ return get_automl_tabular_pipeline_and_parameters( @@ -1327,7 +1321,9 @@ def get_distill_skip_evaluation_pipeline_and_parameters( distill_batch_predict_starting_replica_count: int = 25, distill_batch_predict_max_replica_count: int = 25, ) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that distill and skips evaluation. + """Get the AutoML Tabular training pipeline that distill and skips. + + evaluation. Args: project: The GCP project that runs the pipeline components. @@ -1360,10 +1356,10 @@ def get_distill_skip_evaluation_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1380,7 +1376,7 @@ def get_distill_skip_evaluation_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1392,8 +1388,7 @@ def get_distill_skip_evaluation_pipeline_and_parameters( distill_batch_predict_max_replica_count: The max number of prediction server for batch predict component in the model distillation. - Returns: - Tuple of pipeline_definition_path and parameter_values. + Returns: Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'Depreciated. Please use get_automl_tabular_pipeline_and_parameters.' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py index 3b00d88901..1f5612bcc4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide) for image, text, video, and forecasting.""" +"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex- + +ai/docs/beginner/beginners-guide) for image, text, video, and forecasting. +""" from google_cloud_pipeline_components.v1.automl.training_job.automl_forecasting_training_job.component import automl_forecasting_training_job as AutoMLForecastingTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_image_training_job.component import automl_image_training_job as AutoMLImageTrainingJobRunOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py index d214a3a255..e2466fc76f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_forecasting_training_job/component.py @@ -78,260 +78,65 @@ def automl_forecasting_training_job( # fmt: off """Runs the training job and returns a model. - If training on a Vertex AI dataset, you can use one of the following split configurations: - - Data fraction splits: - Any of `training_fraction_split`, `validation_fraction_split` and - `test_fraction_split` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, `predefined_split_column_name` must be provided. - Supported only for tabular Datasets. - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - - Supported only for tabular Datasets. + If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: Any of `training_fraction_split`, `validation_fraction_split` and `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. If using predefined splits, `predefined_split_column_name` must be provided. Supported only for tabular Datasets. Timestamp splits: Assigns input data to training, validation, and test sets based on a provided timestamps. The youngest data pieces are assigned to training set, next to validation set, and the oldest to the test set. Supported only for tabular Datasets. Args: - dataset: The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to - training, to pick and choose from. - target_column: Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column: Name of the column that identifies time order in the time series. - This column must be available at forecast. + dataset: The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For time series Datasets, all their data is exported to training, to pick and choose from. + target_column: Name of the column that the Model is to predict values for. This column must be unavailable at forecast. + time_column: Name of the column that identifies time order in the time series. This column must be available at forecast. time_series_identifier_column: Name of the column that identifies the time series. - unavailable_at_forecast_columns: Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns: Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: - The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit: The data granularity unit. Accepted values are `minute`, - `hour`, `day`, `week`, `month`, `year`. - data_granularity_count: The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. - training_fraction_split: The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split: The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split: The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - predefined_split_column_name: The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {`TRAIN`, - `VALIDATE`, `TEST`}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - Supported only for tabular and time series Datasets. - timestamp_split_column_name: The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column: Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. - time_series_attribute_columns: Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window: The amount of time into the past training and prediction data is used for - model training and prediction respectively. Expressed in number of units defined by the - [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the - default value of 0 which means the model sets each series context window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items: Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. - Expected format: - `bq://::
` - If not specified, then results are exported to the following auto-created BigQuery - table: - `:export_evaluated_examples__.evaluated_examples` - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles: Quantiles to use for the `minimize-quantile-loss` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options: Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline - budget_milli_node_hours: The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name: If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - If not provided upon creation, the job's display_name is used. - model_labels: The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - model_id: The ID to use for the Model produced by this job, - which will become the final component of the model resource name. - This value may be up to 63 characters, and valid characters - are `[a-z0-9_-]`. The first character cannot be a number or hyphen. - parent_model: The resource name or model ID of an existing model. - The new model uploaded by this job will be a version of `parent_model`. - Only set this field when training a new version of an existing model. - is_default_version: When set to True, the newly uploaded model version will - automatically have alias "default" included. Subsequent uses of - the model produced by this job without a version specified will - use this "default" version. - When set to False, the "default" alias will not be moved. - Actions targeting the model version produced by this job will need - to specifically reference this version by ID or alias. - New model uploads, i.e. version 1, will always be "default" aliased. - model_version_aliases: User provided version aliases so that the model version - uploaded by this job can be referenced via alias instead of - auto-generated version ID. A default version alias will be created - for the first version of the model. - The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] + unavailable_at_forecast_columns: Column names of columns that are unavailable at forecast. Each column contains information for the given entity (identified by the [time_series_identifier_column]) that is unknown before the forecast (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns: Column names of columns that are available at forecast. Each column contains information for the given entity (identified by the [time_series_identifier_column]) that is known at forecast. + forecast_horizon: The amount of time into the future for which forecasted values for the target are returned. Expressed in number of units defined by the [data_granularity_unit] and [data_granularity_count] field. Inclusive. + data_granularity_unit: The data granularity unit. Accepted values are `minute`, `hour`, `day`, `week`, `month`, `year`. + data_granularity_count: The number of data granularity units between data points in the training data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other values of [data_granularity_unit], must be 1. + training_fraction_split: The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. + validation_fraction_split: The fraction of the input data that is to be used to validate the Model. This is ignored if Dataset is not provided. + test_fraction_split: The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + predefined_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or value in the column) must be one of {`TRAIN`, `VALIDATE`, `TEST`}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key values of the key (the values in the column) must be in RFC 3339 `date-time` format, where `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. Supported only for tabular and time series Datasets. This parameter must be used with training_fraction_split, validation_fraction_split, and test_fraction_split. + weight_column: Name of the column that should be used as the weight column. Higher values in this column give more importance to the row during Model training. The column must have numeric values between 0 and 10000 inclusively, and 0 value means that the row is ignored. If the weight column field is not set, then all rows are assumed to have equal weight of 1. + time_series_attribute_columns: Column names that should be used as attribute columns. Each column is constant within a time series. + context_window: The amount of time into the past training and prediction data is used for model training and prediction respectively. Expressed in number of units defined by the [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the default value of 0 which means the model sets each series context window to be 0 (also known as "cold start"). Inclusive. + export_evaluated_data_items: Whether to export the test set predictions to a BigQuery table. If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. Expected format: `bq://::
` If not specified, then results are exported to the following auto-created BigQuery table: `:export_evaluated_examples__.evaluated_examples` Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], if the table exists, for exported test set predictions. If False, and the table exists, then the training job will fail. Applies only if [export_evaluated_data_items] is True and [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles: Quantiles to use for the `minimize-quantile-loss` [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in this case. Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. Each quantile must be unique. + validation_options: Validation options for the data validation component. The available options are: "fail-pipeline" - (default), will validate against the validation and fail the pipeline if it fails. "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours: The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. The training cost of the model will not exceed this budget. The final cost will be attempted to be close to the budget, though may end up being (even) noticeably smaller - at the backend's discretion. This especially may happen when further model training ceases to provide any improvements. If the budget is set to a value known to be insufficient to train a Model for the given training set, the training won't be attempted and will error. The minimum value is 1000 and the maximum is 72000. + model_display_name: If the script produces a managed Vertex AI Model. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + model_id: The ID to use for the Model produced by this job, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. + parent_model: The resource name or model ID of an existing model. The new model uploaded by this job will be a version of `parent_model`. Only set this field when training a new version of an existing model. + is_default_version: When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of the model produced by this job without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the model version produced by this job will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. + model_version_aliases: User provided version aliases so that the model version uploaded by this job can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] model_version_description: The description of the model version being uploaded by this job. - hierarchy_group_columns: A list of time series attribute column names that - define the time series hierarchy. Only one level of hierarchy is - supported, ex. `region` for a hierarchy of stores or - `department` for a hierarchy of products. If multiple columns - are specified, time series will be grouped by their combined - values, ex. (`blue`, `large`) for `color` and `size`, up - to 5 columns are accepted. If no group columns are specified, - all time series are considered to be part of the same group. - hierarchy_group_total_weight: The weight of the loss for predictions aggregated over - time series in the same hierarchy group. - hierarchy_temporal_total_weight: The weight of the loss for predictions aggregated over - the horizon for a single time series. - hierarchy_group_temporal_total_weight: The weight of the loss for predictions aggregated over - both the horizon and time series in the same hierarchy group. - window_column: Name of the column that should be used to filter input - rows. The column should contain either booleans or string - booleans; if the value of the row is True, generate a sliding - window from that row. - window_stride_length: Step length used to generate input examples. Every - `window_stride_length` rows will be used to generate a sliding - window. - window_max_count: Number of rows that should be used to generate input - examples. If the total row count is larger than this number, the - input data will be randomly sampled to hit the count. - holiday_regions: The geographical regions to use when creating holiday - features. This option is only allowed when data_granularity_unit - is `day`. Acceptable values can come from any of the following - levels: - Top level: GLOBAL - Second level: continental regions - NA: North America - JAPAC: Japan and Asia Pacific - EMEA: Europe, the Middle East and Africa - LAC: Latin America and the Caribbean - Third level: countries from ISO 3166-1 Country codes. + hierarchy_group_columns: A list of time series attribute column names that define the time series hierarchy. Only one level of hierarchy is supported, ex. `region` for a hierarchy of stores or `department` for a hierarchy of products. If multiple columns are specified, time series will be grouped by their combined values, ex. (`blue`, `large`) for `color` and `size`, up to 5 columns are accepted. If no group columns are specified, all time series are considered to be part of the same group. + hierarchy_group_total_weight: The weight of the loss for predictions aggregated over time series in the same hierarchy group. + hierarchy_temporal_total_weight: The weight of the loss for predictions aggregated over the horizon for a single time series. + hierarchy_group_temporal_total_weight: The weight of the loss for predictions aggregated over both the horizon and time series in the same hierarchy group. + window_column: Name of the column that should be used to filter input rows. The column should contain either booleans or string booleans; if the value of the row is True, generate a sliding window from that row. + window_stride_length: Step length used to generate input examples. Every `window_stride_length` rows will be used to generate a sliding window. + window_max_count: Number of rows that should be used to generate input examples. If the total row count is larger than this number, the input data will be randomly sampled to hit the count. + holiday_regions: The geographical regions to use when creating holiday features. This option is only allowed when data_granularity_unit is `day`. Acceptable values can come from any of the following levels: + Top level: GLOBAL Second level: continental regions + NA: North America + JAPAC: Japan and Asia Pacific + EMEA: Europe, the Middle East and Africa + LAC: Latin America and the Caribbean Third level: countries from ISO 3166-1 Country codes. display_name: The user-defined name of this TrainingPipeline. - optimization_objective: Objective function the model is to be optimized towards. - The training process creates a Model that optimizes the value of the objective - function over the validation set. The supported optimization objectives: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). - "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) and mean-absolute-error (MAE). - "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. - (Set this objective to build quantile forecasts.) - column_specs: Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - column_transformations: - Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. + optimization_objective: Objective function the model is to be optimized towards. The training process creates a Model that optimizes the value of the objective function over the validation set. The supported optimization objectives: "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) and mean-absolute-error (MAE). "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. (Set this objective to build quantile forecasts.) + column_specs: Alternative to column_transformations where the keys of the dict are column names and their respective values are one of AutoMLTabularTrainingJob.column_data_types. When creating transformation for BigQuery Struct column, the column should be flattened using "." as the delimiter. Only columns with no child should have a transformation. If an input column has no transformations on it, such a column is ignored by the training, except for the targetColumn, which should have no transformations defined on. Only one of column_transformations or column_specs should be passed. + column_transformations: Transformations to apply to the input columns (i.e. columns other than the targetColumn). Each transformation may produce multiple result values from the column's value, and all are used for training. When creating transformation for BigQuery Struct column, the column should be flattened using "." as the delimiter. Only columns with no child should have a transformation. If an input column has no transformations on it, such a column is ignored by the training, except for the targetColumn, which should have no transformations defined on. Only one of column_transformations or column_specs should be passed. Consider using column_specs as column_transformations will be deprecated eventually. project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. - labels: The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured - by this key if `model_to_upload` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. + labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. additional_experiments: Additional experiment flags for the time series forcasting training. Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. + model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py index f68fe04169..68a5d62700 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py @@ -51,176 +51,33 @@ def automl_image_training_job( # fmt: off """Runs the AutoML Image training job and returns a model. - If training on a Vertex AI dataset, you can use one of the following split configurations: - - Data fraction splits: - Any of `training_fraction_split`, `validation_fraction_split` and - `test_fraction_split` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - Data filter splits: - Assigns input data to training, validation, and test sets - based on the given filters, data pieces not matched by any - filter are ignored. Currently only supported for Datasets - containing DataItems. - If any of the filters in this message are to match nothing, then - they can be set as '-' (the minus sign). - If using filter splits, all of `training_filter_split`, `validation_filter_split` and - `test_filter_split` must be provided. - - Supported only for unstructured Datasets. + If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: Any of `training_fraction_split`, `validation_fraction_split` and `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Data filter splits: Assigns input data to training, validation, and test sets based on the given filters, data pieces not matched by any filter are ignored. Currently only supported for Datasets containing DataItems. If any of the filters in this message are to match nothing, then they can be set as '-' (the minus sign). If using filter splits, all of `training_filter_split`, `validation_filter_split` and `test_filter_split` must be provided. Supported only for unstructured Datasets. Args: - dataset: The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. - training_fraction_split: The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split: The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split: The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - training_filter_split: A filter on DataItems of the Dataset. DataItems that match - this filter are used to train the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - Example usage: training_filter_split="labels.aiplatform.googleapis.com/ml_use=training". - validation_filter_split: A filter on DataItems of the Dataset. DataItems that match - this filter are used to validate the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - Example usage: validation_filter_split= "labels.aiplatform.googleapis.com/ml_use=validation". - test_filter_split: A filter on DataItems of the Dataset. DataItems that match - this filter are used to test the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - Example usage: test_filter_split= "labels.aiplatform.googleapis.com/ml_use=test". - budget_milli_node_hours: The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - Defaults by `prediction_type`: - `classification` - For Cloud models the budget must be: 8,000 - 800,000 - milli node hours (inclusive). The default value is 192,000 which - represents one day in wall time, assuming 8 nodes are used. - `object_detection` - For Cloud models the budget must be: 20,000 - 900,000 - milli node hours (inclusive). The default value is 216,000 which represents - one day in wall time, assuming 9 nodes are used. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. If the budget is set to a value known to be insufficient to - train a Model for the given training set, the training won't be attempted and - will error. - model_display_name: The display name of the managed Vertex AI Model. The name - can be up to 128 characters long and can be consist of any UTF-8 - characters. If not provided upon creation, the job's display_name is used. - model_labels: The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - disable_early_stopping: bool = False - If true, the entire budget is used. This disables the early stopping - feature. By default, the early stopping feature is enabled, which means - that training might stop before the entire training budget has been - used, if further training does no longer brings significant improvement - to the model. + dataset: The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. + training_fraction_split: The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. + validation_fraction_split: The fraction of the input data that is to be used to validate the Model. This is ignored if Dataset is not provided. + test_fraction_split: The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + training_filter_split: A filter on DataItems of the Dataset. DataItems that match this filter are used to train the Model. A filter with same syntax as the one used in DatasetService.ListDataItems may be used. If a single DataItem is matched by more than one of the FilterSplit filters, then it is assigned to the first set that applies to it in the training, validation, test order. This is ignored if Dataset is not provided. Example usage: training_filter_split="labels.aiplatform.googleapis.com/ml_use=training". + validation_filter_split: A filter on DataItems of the Dataset. DataItems that match this filter are used to validate the Model. A filter with same syntax as the one used in DatasetService.ListDataItems may be used. If a single DataItem is matched by more than one of the FilterSplit filters, then it is assigned to the first set that applies to it in the training, validation, test order. This is ignored if Dataset is not provided. Example usage: validation_filter_split= "labels.aiplatform.googleapis.com/ml_use=validation". + test_filter_split: A filter on DataItems of the Dataset. DataItems that match this filter are used to test the Model. A filter with same syntax as the one used in DatasetService.ListDataItems may be used. If a single DataItem is matched by more than one of the FilterSplit filters, then it is assigned to the first set that applies to it in the training, validation, test order. This is ignored if Dataset is not provided. Example usage: test_filter_split= "labels.aiplatform.googleapis.com/ml_use=test". + budget_milli_node_hours: The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. Defaults by `prediction_type`: `classification` - For Cloud models the budget must be: 8,000 - 800,000 milli node hours (inclusive). The default value is 192,000 which represents one day in wall time, assuming 8 nodes are used. `object_detection` - For Cloud models the budget must be: 20,000 - 900,000 milli node hours (inclusive). The default value is 216,000 which represents one day in wall time, assuming 9 nodes are used. The training cost of the model will not exceed this budget. The final cost will be attempted to be close to the budget, though may end up being (even) noticeably smaller - at the backend's discretion. This especially may happen when further model training ceases to provide any improvements. If the budget is set to a value known to be insufficient to train a Model for the given training set, the training won't be attempted and will error. + model_display_name: The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + disable_early_stopping: bool = False If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. display_name: The user-defined name of this TrainingPipeline. - prediction_type: The type of prediction the Model is to produce, one of: - "classification" - Predict one out of multiple target values is picked for each row. - "object_detection" - Predict a value based on its relation to other values. - This type is available only to columns that contain - semantically numeric values, i.e. integers or floating - point number, even if stored as e.g. strings. - multi_label: bool = False - Default is False. - If false, a single-label (multi-class) Model will be trained - (i.e. assuming that for each image just up to one annotation may be - applicable). If true, a multi-label Model will be trained (i.e. - assuming that for each image multiple annotations may be applicable). - This is only applicable for the "classification" prediction_type and - will be ignored otherwise. - model_type: str = "CLOUD" - One of the following: - "CLOUD" - Default for Image Classification. - A Model best tailored to be used within Google Cloud, and - which cannot be exported. - "CLOUD_HIGH_ACCURACY_1" - Default for Image Object Detection. - A model best tailored to be used within Google Cloud, and - which cannot be exported. Expected to have a higher latency, - but should also have a higher prediction quality than other - cloud models. - "CLOUD_LOW_LATENCY_1" - A model best tailored to be used within - Google Cloud, and which cannot be exported. Expected to have a - low latency, but may have lower prediction quality than other - cloud models. - "MOBILE_TF_LOW_LATENCY_1" - A model that, in addition to being - available within Google Cloud, can also be exported as TensorFlow - or Core ML model and used on a mobile or edge device afterwards. - Expected to have low latency, but may have lower prediction - quality than other mobile models. - "MOBILE_TF_VERSATILE_1" - A model that, in addition to being - available within Google Cloud, can also be exported as TensorFlow - or Core ML model and used on a mobile or edge device with afterwards. - "MOBILE_TF_HIGH_ACCURACY_1" - A model that, in addition to being - available within Google Cloud, can also be exported as TensorFlow - or Core ML model and used on a mobile or edge device afterwards. - Expected to have a higher latency, but should also have a higher - prediction quality than other mobile models. - base_model: Optional[models.Model] = None - Only permitted for Image Classification models. - If it is specified, the new model will be trained based on the `base` model. - Otherwise, the new model will be trained from scratch. The `base` model - must be in the same Project and Location as the new Model to train, - and have the same model_type. + prediction_type: The type of prediction the Model is to produce, one of: "classification" - Predict one out of multiple target values is picked for each row. "object_detection" - Predict a value based on its relation to other values. This type is available only to columns that contain semantically numeric values, i.e. integers or floating point number, even if stored as e.g. strings. + multi_label: bool = False Default is False. If false, a single-label (multi-class) Model will be trained (i.e. assuming that for each image just up to one annotation may be applicable). If true, a multi-label Model will be trained (i.e. assuming that for each image multiple annotations may be applicable). This is only applicable for the "classification" prediction_type and will be ignored otherwise. + model_type: str = "CLOUD" One of the following: "CLOUD" - Default for Image Classification. A Model best tailored to be used within Google Cloud, and which cannot be exported. "CLOUD_HIGH_ACCURACY_1" - Default for Image Object Detection. A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a higher latency, but should also have a higher prediction quality than other cloud models. "CLOUD_LOW_LATENCY_1" - A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a low latency, but may have lower prediction quality than other cloud models. "MOBILE_TF_LOW_LATENCY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have low latency, but may have lower prediction quality than other mobile models. "MOBILE_TF_VERSATILE_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device with afterwards. "MOBILE_TF_HIGH_ACCURACY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have a higher latency, but should also have a higher prediction quality than other mobile models. + base_model: Optional[models.Model] = None Only permitted for Image Classification models. If it is specified, the new model will be trained based on the `base` model. Otherwise, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same model_type. project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. - labels: The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured - by this key if `model_to_upload` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. + labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. + model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py index f77d56cc92..89b057117d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_tabular_training_job/component.py @@ -62,216 +62,44 @@ def automl_tabular_training_job( # fmt: off """Runs the training job and returns a model. - If training on a Vertex AI dataset, you can use one of the following split configurations: - - Data fraction splits: - Any of `training_fraction_split`, `validation_fraction_split` and - `test_fraction_split` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, `predefined_split_column_name` must be provided. - Supported only for tabular Datasets. - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - - Supported only for tabular Datasets. + If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: Any of `training_fraction_split`, `validation_fraction_split` and `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. If using predefined splits, `predefined_split_column_name` must be provided. Supported only for tabular Datasets. Timestamp splits: Assigns input data to training, validation, and test sets based on a provided timestamps. The youngest data pieces are assigned to training set, next to validation set, and the oldest to the test set. Supported only for tabular Datasets. Args: - dataset: The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. + dataset: The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. target_column: The name of the column values of which the Model is to predict. - training_fraction_split: The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split: The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split: The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - predefined_split_column_name: The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {`training`, - `validation`, `test`}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - Supported only for tabular and time series Datasets. - timestamp_split_column_name: The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. - weight_column: Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. - budget_milli_node_hours: The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name: If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - If not provided upon creation, the job's display_name is used. - model_labels: The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - model_id: The ID to use for the Model produced by this job, - which will become the final component of the model resource name. - This value may be up to 63 characters, and valid characters - are `[a-z0-9_-]`. The first character cannot be a number or hyphen. - parent_model: The resource name or model ID of an existing model. - The new model uploaded by this job will be a version of `parent_model`. - Only set this field when training a new version of an existing model. - is_default_version: When set to True, the newly uploaded model version will - automatically have alias "default" included. Subsequent uses of - the model produced by this job without a version specified will - use this "default" version. - When set to False, the "default" alias will not be moved. - Actions targeting the model version produced by this job will need - to specifically reference this version by ID or alias. - New model uploads, i.e. version 1, will always be "default" aliased. - model_version_aliases: User provided version aliases so that the model version - uploaded by this job can be referenced via alias instead of - auto-generated version ID. A default version alias will be created - for the first version of the model. - The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] + training_fraction_split: The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. + validation_fraction_split: The fraction of the input data that is to be used to validate the Model. This is ignored if Dataset is not provided. + test_fraction_split: The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + predefined_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or value in the column) must be one of {`training`, `validation`, `test`}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name: The key is a name of one of the Dataset's data columns. The value of the key values of the key (the values in the column) must be in RFC 3339 `date-time` format, where `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. Supported only for tabular and time series Datasets. This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. + weight_column: Name of the column that should be used as the weight column. Higher values in this column give more importance to the row during Model training. The column must have numeric values between 0 and 10000 inclusively, and 0 value means that the row is ignored. If the weight column field is not set, then all rows are assumed to have equal weight of 1. + budget_milli_node_hours: The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. The training cost of the model will not exceed this budget. The final cost will be attempted to be close to the budget, though may end up being (even) noticeably smaller - at the backend's discretion. This especially may happen when further model training ceases to provide any improvements. If the budget is set to a value known to be insufficient to train a Model for the given training set, the training won't be attempted and will error. The minimum value is 1000 and the maximum is 72000. + model_display_name: If the script produces a managed Vertex AI Model. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + model_id: The ID to use for the Model produced by this job, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. + parent_model: The resource name or model ID of an existing model. The new model uploaded by this job will be a version of `parent_model`. Only set this field when training a new version of an existing model. + is_default_version: When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of the model produced by this job without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the model version produced by this job will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. + model_version_aliases: User provided version aliases so that the model version uploaded by this job can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] model_version_description: The description of the model version being uploaded by this job. - disable_early_stopping: If true, the entire budget is used. This disables the early stopping - feature. By default, the early stopping feature is enabled, which means - that training might stop before the entire training budget has been - used, if further training does no longer brings significant improvement - to the model. - export_evaluated_data_items: Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. - Expected format: - `bq://::
` - If not specified, then results are exported to the following auto-created BigQuery - table: - `:export_evaluated_examples__.evaluated_examples` - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. + disable_early_stopping: If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. + export_evaluated_data_items: Whether to export the test set predictions to a BigQuery table. If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri: URI of desired destination BigQuery table for exported test set predictions. Expected format: `bq://::
` If not specified, then results are exported to the following auto-created BigQuery table: `:export_evaluated_examples__.evaluated_examples` Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination: Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], if the table exists, for exported test set predictions. If False, and the table exists, then the training job will fail. Applies only if [export_evaluated_data_items] is True and [export_evaluated_data_items_bigquery_destination_uri] is specified. display_name: The user-defined name of this TrainingPipeline. - optimization_prediction_type: The type of prediction the Model is to produce. - "classification" - Predict one out of multiple target values is - picked for each row. - "regression" - Predict a value based on its relation to other values. - This type is available only to columns that contain - semantically numeric values, i.e. integers or floating - point number, even if stored as e.g. strings. - optimization_objective: Objective function the Model is to be optimized towards. The training - task creates a Model that maximizes/minimizes the value of the objective - function over the validation set. - The supported optimization objectives depend on the prediction type, and - in the case of classification also the number of distinct values in the - target column (two distint values -> binary, 3 or more distinct values - -> multi class). - If the field is not set, the default objective function is used. - Classification: "maximize-au-roc" (default) - Maximize the area under the receiver operating characteristic (ROC) curve. - "minimize-log-loss" - Minimize log loss. - "maximize-au-prc" - Maximize the area under the precision-recall curve. - "maximize-precision-at-recall" - Maximize precision for a specified recall value. - "maximize-recall-at-precision" - Maximize recall for a specified precision value. - Classification (multi class): - "minimize-log-loss" (default) - Minimize log loss. - Regression: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - column_specs: Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - column_transformations: - Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. - optimization_objective_recall_value: Required when maximize-precision-at-recall optimizationObjective was - picked, represents the recall value at which the optimization is done. - The minimum value is 0 and the maximum is 1.0. - optimization_objective_precision_value: Required when maximize-recall-at-precision optimizationObjective was - picked, represents the precision value at which the optimization is - done. - The minimum value is 0 and the maximum is 1.0. + optimization_prediction_type: The type of prediction the Model is to produce. "classification" - Predict one out of multiple target values is picked for each row. "regression" - Predict a value based on its relation to other values. This type is available only to columns that contain semantically numeric values, i.e. integers or floating point number, even if stored as e.g. strings. + optimization_objective: Objective function the Model is to be optimized towards. The training task creates a Model that maximizes/minimizes the value of the objective function over the validation set. The supported optimization objectives depend on the prediction type, and in the case of classification also the number of distinct values in the target column (two distint values -> binary, 3 or more distinct values -> multi class). If the field is not set, the default objective function is used. Classification: "maximize-au-roc" (default) - Maximize the area under the receiver operating characteristic (ROC) curve. "minimize-log-loss" - Minimize log loss. "maximize-au-prc" - Maximize the area under the precision-recall curve. "maximize-precision-at-recall" - Maximize precision for a specified recall value. "maximize-recall-at-precision" - Maximize recall for a specified precision value. Classification (multi class): "minimize-log-loss" (default) - Minimize log loss. Regression: "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). "minimize-mae" - Minimize mean-absolute error (MAE). "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + column_specs: Alternative to column_transformations where the keys of the dict are column names and their respective values are one of AutoMLTabularTrainingJob.column_data_types. When creating transformation for BigQuery Struct column, the column should be flattened using "." as the delimiter. Only columns with no child should have a transformation. If an input column has no transformations on it, such a column is ignored by the training, except for the targetColumn, which should have no transformations defined on. Only one of column_transformations or column_specs should be passed. + column_transformations: Transformations to apply to the input columns (i.e. columns other than the targetColumn). Each transformation may produce multiple result values from the column's value, and all are used for training. When creating transformation for BigQuery Struct column, the column should be flattened using "." as the delimiter. Only columns with no child should have a transformation. If an input column has no transformations on it, such a column is ignored by the training, except for the targetColumn, which should have no transformations defined on. Only one of column_transformations or column_specs should be passed. Consider using column_specs as column_transformations will be deprecated eventually. + optimization_objective_recall_value: Required when maximize-precision-at-recall optimizationObjective was picked, represents the recall value at which the optimization is done. The minimum value is 0 and the maximum is 1.0. + optimization_objective_precision_value: Required when maximize-recall-at-precision optimizationObjective was picked, represents the precision value at which the optimization is done. The minimum value is 0 and the maximum is 1.0. project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. - labels: The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured - by this key if `model_to_upload` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. + labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. + model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py index 541ef7a1bf..6ad67e24d0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_text_training_job/component.py @@ -45,99 +45,24 @@ def automl_text_training_job( # fmt: off """Runs the training job and returns a model. - If training on a Vertex AI dataset, you can use one of the following split configurations: - - Data fraction splits: - Any of `training_fraction_split`, `validation_fraction_split` and - `test_fraction_split` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - Data filter splits: - Assigns input data to training, validation, and test sets - based on the given filters, data pieces not matched by any - filter are ignored. Currently only supported for Datasets - containing DataItems. - If any of the filters in this message are to match nothing, then - they can be set as '-' (the minus sign). - - Supported only for unstructured Datasets. + If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: Any of `training_fraction_split`, `validation_fraction_split` and `test_fraction_split` may optionally be provided, they must sum to up to 1. If the provided ones sum to less than 1, the remainder is assigned to sets as decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. Data filter splits: Assigns input data to training, validation, and test sets based on the given filters, data pieces not matched by any filter are ignored. Currently only supported for Datasets containing DataItems. If any of the filters in this message are to match nothing, then they can be set as '-' (the minus sign). Supported only for unstructured Datasets. Args: - dataset: The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - training_fraction_split: The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split: The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split: The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - model_display_name: The display name of the managed Vertex AI Model. - The name can be up to 128 characters long and can consist - of any UTF-8 characters. - If not provided upon creation, the job's display_name is used. - model_labels: The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. + dataset: The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + training_fraction_split: The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. + validation_fraction_split: The fraction of the input data that is to be used to validate the Model. This is ignored if Dataset is not provided. + test_fraction_split: The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + model_display_name: The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. display_name: The user-defined name of this TrainingPipeline. - prediction_type: The type of prediction the Model is to produce, one of: - "classification" - A classification model analyzes text data and returns a list of categories that apply to the text found in the data. - Vertex AI offers both single-label and multi-label text classification models. - "extraction" - An entity extraction model inspects text data known entities referenced in the data and labels those entities in the text. - "sentiment" - A sentiment analysis model inspects text data and identifies the prevailing emotional opinion within it, especially to determine a writer's attitude as positive, negative, or neutral. - multi_label: Required and only applicable for text classification task. If false, a single-label (multi-class) Model will be trained (i.e. - assuming that for each text snippet just up to one annotation may be - applicable). If true, a multi-label Model will be trained (i.e. - assuming that for each text snippet multiple annotations may be - applicable). - sentiment_max: Required and only applicable for sentiment task. A sentiment is expressed as an integer - ordinal, where higher value means a more - positive sentiment. The range of sentiments that - will be used is between 0 and sentimentMax - (inclusive on both ends), and all the values in - the range must be represented in the dataset - before a model can be created. - Only the Annotations with this sentimentMax will - be used for training. sentimentMax value must be - between 1 and 10 (inclusive). + prediction_type: The type of prediction the Model is to produce, one of: "classification" - A classification model analyzes text data and returns a list of categories that apply to the text found in the data. Vertex AI offers both single-label and multi-label text classification models. "extraction" - An entity extraction model inspects text data known entities referenced in the data and labels those entities in the text. "sentiment" - A sentiment analysis model inspects text data and identifies the prevailing emotional opinion within it, especially to determine a writer's attitude as positive, negative, or neutral. + multi_label: Required and only applicable for text classification task. If false, a single-label (multi-class) Model will be trained (i.e. assuming that for each text snippet just up to one annotation may be applicable). If true, a multi-label Model will be trained (i.e. assuming that for each text snippet multiple annotations may be applicable). + sentiment_max: Required and only applicable for sentiment task. A sentiment is expressed as an integer ordinal, where higher value means a more positive sentiment. The range of sentiments that will be used is between 0 and sentimentMax (inclusive on both ends), and all the values in the range must be represented in the dataset before a model can be created. Only the Annotations with this sentimentMax will be used for training. sentimentMax value must be between 1 and 10 (inclusive). project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. - labels: The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured - by this key if `model_to_upload` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. + labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: model: The trained Vertex AI Model resource. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py index 4e14de8df0..6818d02577 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_video_training_job/component.py @@ -43,108 +43,25 @@ def automl_video_training_job( # fmt: off """Runs the AutoML Video training job and returns a model. - If training on a Vertex AI dataset, you can use one of the following split configurations: - - Data fraction splits: - `training_fraction_split`, and `test_fraction_split` may optionally - be provided, they must sum to up to 1. If none of the fractions are set, - by default roughly 80% of data will be used for training, and 20% for test. - Data filter splits: - Assigns input data to training, validation, and test sets - based on the given filters, data pieces not matched by any - filter are ignored. Currently only supported for Datasets - containing DataItems. - If any of the filters in this message are to match nothing, then - they can be set as '-' (the minus sign). - - Supported only for unstructured Datasets. + If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: `training_fraction_split`, and `test_fraction_split` may optionally be provided, they must sum to up to 1. If none of the fractions are set, by default roughly 80% of data will be used for training, and 20% for test. Data filter splits: Assigns input data to training, validation, and test sets based on the given filters, data pieces not matched by any filter are ignored. Currently only supported for Datasets containing DataItems. If any of the filters in this message are to match nothing, then they can be set as '-' (the minus sign). Supported only for unstructured Datasets. Args: - dataset: The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. - training_fraction_split: The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - test_fraction_split: The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - model_display_name: The display name of the managed Vertex AI Model. The name - can be up to 128 characters long and can be consist of any UTF-8 - characters. If not provided upon creation, the job's display_name is used. - model_labels: The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. + dataset: The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. + training_fraction_split: The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. + test_fraction_split: The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. + model_display_name: The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. display_name: The user-defined name of this TrainingPipeline. - prediction_type: The type of prediction the Model is to produce, one of: - "classification" - A video classification model classifies shots and segments in your videos according to your own defined labels. - "object_tracking" - A video object tracking model detects and tracks multiple objects in shots and segments. You can use these models to track objects in your videos according to your own pre-defined, custom labels. - "action_recognition" - A video action reconition model pinpoints the location of actions with short temporal durations (~1 second). - model_type: str = "CLOUD" - One of the following: - "CLOUD" - available for "classification", "object_tracking" and "action_recognition" - A Model best tailored to be used within Google Cloud, - and which cannot be exported. - "MOBILE_VERSATILE_1" - available for "classification", "object_tracking" and "action_recognition" - A model that, in addition to being available within Google - Cloud, can also be exported (see ModelService.ExportModel) - as a TensorFlow or TensorFlow Lite model and used on a - mobile or edge device with afterwards. - "MOBILE_CORAL_VERSATILE_1" - available only for "object_tracking" - A versatile model that is meant to be exported (see - ModelService.ExportModel) and used on a Google Coral device. - "MOBILE_CORAL_LOW_LATENCY_1" - available only for "object_tracking" - A model that trades off quality for low latency, to be - exported (see ModelService.ExportModel) and used on a - Google Coral device. - "MOBILE_JETSON_VERSATILE_1" - available only for "object_tracking" - A versatile model that is meant to be exported (see - ModelService.ExportModel) and used on an NVIDIA Jetson device. - "MOBILE_JETSON_LOW_LATENCY_1" - available only for "object_tracking" - A model that trades off quality for low latency, to be - exported (see ModelService.ExportModel) and used on an - NVIDIA Jetson device. + prediction_type: The type of prediction the Model is to produce, one of: "classification" - A video classification model classifies shots and segments in your videos according to your own defined labels. "object_tracking" - A video object tracking model detects and tracks multiple objects in shots and segments. You can use these models to track objects in your videos according to your own pre-defined, custom labels. "action_recognition" - A video action reconition model pinpoints the location of actions with short temporal durations (~1 second). + model_type: str = "CLOUD" One of the following: "CLOUD" - available for "classification", "object_tracking" and "action_recognition" A Model best tailored to be used within Google Cloud, and which cannot be exported. "MOBILE_VERSATILE_1" - available for "classification", "object_tracking" and "action_recognition" A model that, in addition to being available within Google Cloud, can also be exported (see ModelService.ExportModel) as a TensorFlow or TensorFlow Lite model and used on a mobile or edge device with afterwards. "MOBILE_CORAL_VERSATILE_1" - available only for "object_tracking" A versatile model that is meant to be exported (see ModelService.ExportModel) and used on a Google Coral device. "MOBILE_CORAL_LOW_LATENCY_1" - available only for "object_tracking" A model that trades off quality for low latency, to be exported (see ModelService.ExportModel) and used on a Google Coral device. "MOBILE_JETSON_VERSATILE_1" - available only for "object_tracking" A versatile model that is meant to be exported (see ModelService.ExportModel) and used on an NVIDIA Jetson device. "MOBILE_JETSON_LOW_LATENCY_1" - available only for "object_tracking" A model that trades off quality for low latency, to be exported (see ModelService.ExportModel) and used on an NVIDIA Jetson device. project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. - labels: The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured - by this key if `model_to_upload` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. + labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + training_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this TrainingPipeline will be secured by this key. Note: Model trained by this TrainingPipeline is also secured by this key if `model_to_upload` is not set separately. Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, the trained Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. + model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. """ # fmt`:` on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py index 7a836012ad..bc8b2730f2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Serve batch predictions from your models using [Vertex AI Batch Predictions](https://cloud.google.com/vertex-ai/docs/predictions/overview?_ga=2.161419069.-1686833729.1684288907#batch_predictions).""" +# fmt: on from google_cloud_pipeline_components.v1.batch_predict_job.component import model_batch_predict as ModelBatchPredictOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py index 0f3166a431..b179913af9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py @@ -65,219 +65,42 @@ def model_batch_predict( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a Google Cloud Vertex [BatchPredictionJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs) and waits for it to complete. - - For more details, see [BatchPredictionJob.Create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/create). + """Creates a Google Cloud Vertex [BatchPredictionJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs) and waits for it to complete. For more details, see [BatchPredictionJob.Create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs/create). Args: job_display_name: The user-defined name of this BatchPredictionJob. location: Location for creating the BatchPredictionJob. - instances_format: The format in which instances are - given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)'s supportedInputStorageFormats. - For more details about this input config, see - [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.) - predictions_format: The format in which Vertex AI gives the predictions. Must be one of the - Model's supportedOutputStorageFormats. - For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig). - model: The Model used to get predictions via this job. Must share the same - ancestor Location. Starting this job has no impact on any existing - deployments of the Model and their resources. Either this or - `unmanaged_container_model` must be specified. - unmanaged_container_model: The unmanaged container model used to get predictions via this job. - This should be used for models that are not uploaded to Vertex. Either - this or model must be specified. - gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction - on. They must match `instances_format`. May contain wildcards. For more - information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). - For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). - bigquery_source_input_uri: BigQuery URI to a table, up to 2000 characters long. For example: - `projectId.bqDatasetId.bqTableId` For more details about this input - config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + instances_format: The format in which instances are given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)'s supportedInputStorageFormats. For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.) + predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig). + model: The Model used to get predictions via this job. Must share the same ancestor Location. Starting this job has no impact on any existing deployments of the Model and their resources. Either this or `unmanaged_container_model` must be specified. + unmanaged_container_model: The unmanaged container model used to get predictions via this job. This should be used for models that are not uploaded to Vertex. Either this or model must be specified. + gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. They must match `instances_format`. May contain wildcards. For more information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). + bigquery_source_input_uri: BigQuery URI to a table, up to 2000 characters long. For example: `projectId.bqDatasetId.bqTableId` For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. model_parameters: The parameters that govern the predictions. The schema of the parameters - instance_type: The format of the instance that the Model - accepts. Vertex AI will convert compatible - [InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) - to the specified format. Supported values are: - `object`: Each input is converted to JSON object format. - * For `bigquery`, each row is converted to an object. - * For `jsonl`, each line of the JSONL input must be an object. - * Does not apply to `csv`, `file-list`, `tf-record`, or `tf-record-gzip`. - `array`: Each input is converted to JSON array format. - * For `bigquery`, each row is converted to an array. The order - of columns is determined by the BigQuery column order, unless - [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) is populated. - `included_fields` must be populated for specifying field orders. - * For `jsonl`, if each line of the JSONL input is an object, - `included_fields` must be populated for specifying field orders. - * Does not apply to `csv`, `file-list`, `tf-record`, or - `tf-record-gzip`. - If not specified, Vertex AI converts the batch prediction input as - follows: - * For `bigquery` and `csv`, the behavior is the same as `array`. The - order of columns is the same as defined in the file or table, unless - included_fields is populated. - * For `jsonl`, the prediction instance format is determined by - each line of the input. - * For `tf-record`/`tf-record-gzip`, each record will be converted to - an object in the format of `{"b64": }`, where `` is - the Base64-encoded string of the content of the record. - * For `file-list`, each file in the list will be converted to an - object in the format of `{"b64": }`, where `` is - the Base64-encoded string of the content of the file. - key_field: The name of the field that is considered as a key. - The values identified by the key field is not included in the - transformed instances that is sent to the Model. This is similar to - specifying this name of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). In addition, - the batch prediction output will not include the instances. Instead the - output will only include the value of the key field, in a field named - `key` in the output: - * For `jsonl` output format, the output will have a `key` field - instead of the `instance` field. - * For `csv`/`bigquery` output format, the output will have have a `key` - column instead of the instance feature columns. - The input must be JSONL with objects at each line, CSV, BigQuery - or TfRecord. - included_fields: Fields that will be included in the prediction instance that is - sent to the Model. - If `instance_type` is `array`, the order of field names in - `included_fields` also determines the order of the values in the array. - When `included_fields` is populated, `excluded_fields` must be empty. - The input must be JSONL with objects at each line, CSV, BigQuery - or TfRecord. - excluded_fields: Fields that will be excluded in the prediction instance that is - sent to the Model. - Excluded will be attached to the batch prediction output if - key_field is not specified. - When `excluded_fields` is populated, `included_fields` must be empty. - The input must be JSONL with objects at each line, CSV, BigQuery - or TfRecord. - may be specified via the Model's `parameters_schema_uri`. - gcs_destination_output_uri_prefix: The Google Cloud - Storage location of the directory where the output is to be written - to. In the given directory a new directory is created. Its name is - `prediction--`, where timestamp - is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, - ..., `predictions_N.` are created where `` - depends on chosen `predictions_format`, and N may equal 0001 and - depends on the total number of successfully predicted instances. If - the Model has both `instance` and `prediction` schemata defined - then each such file contains predictions as per the - `predictions_format`. If prediction for any instance failed - (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number - of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional `error` field which as - value has `google.rpc.Status` containing only `code` and - `message` fields. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In - the given project a new dataset is created with name - `prediction__` where is made - BigQuery-dataset-name compatible (for example, most special characters - become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ - "based on ISO-8601" format. In the dataset two tables will be created, - `predictions`, and `errors`. If the Model has both `instance` - and `prediction` schemata defined then the tables have columns as - follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the - Model's instance and prediction schemata. The `errors` table - contains rows for which the prediction has failed, it has instance - columns, as per the instance schema, followed by a single "errors" - column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only `code` and - `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - machine_type: The type of machine for running batch - prediction on dedicated resources. If the Model supports - DEDICATED_RESOURCES this config may be provided (and the job will use - these resources). If the Model doesn't support AUTOMATIC_RESOURCES, - this config must be provided. For more details about the - BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `accelerator_count`. Only used if - `machine_type` is set. For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - accelerator_count: The number of accelerators to attach - to the `machine_type`. Only used if `machine_type` is set. For more - details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - starting_replica_count: The number of machine replicas - used at the start of the batch operation. If not set, Vertex AI - decides starting number, not greater than `max_replica_count`. Only - used if `machine_type` is set. - max_replica_count: The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. - manual_batch_tuning_parameters_batch_size: The number of - the records (e.g. instances) of the operation given in each batch to a - machine replica. Machine type, and size of a single record should be - considered when setting this parameter, higher value speeds up the - batch operation's execution, but too high value will result in a whole - batch not fitting in a machine's memory, and the whole operation will - fail. - generate_explanation: Generate explanation along with - the batch prediction results. This will cause the batch prediction - output to include explanations based on the `prediction_format`: - - `bigquery`: output includes a column named `explanation`. The value is - a struct that conforms to the [aiplatform.gapic.Explanation] object. - - `jsonl`: The JSON objects on each line include an additional entry - keyed `explanation`. The value of the entry is a JSON object that - conforms to the [aiplatform.gapic.Explanation] object. - `csv`: - Generating explanations for CSV format is not supported. If this - field is set to true, either the Model.explanation_spec or - explanation_metadata and explanation_parameters must be populated. - explanation_metadata: Explanation metadata - configuration for this BatchPredictionJob. Can be specified only if - `generate_explanation` is set to `True`. This value overrides the - value of `Model.explanation_metadata`. All fields of - `explanation_metadata` are optional in the request. If a field of the - `explanation_metadata` object is not populated, the corresponding - field of the `Model.explanation_metadata` object is inherited. For - more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - explanation_parameters: Parameters to configure - explaining for Model's predictions. Can be specified only if - `generate_explanation` is set to `True`. This value overrides the - value of `Model.explanation_parameters`. All fields of - `explanation_parameters` are optional in the request. If a field of - the `explanation_parameters` object is not populated, the - corresponding field of the `Model.explanation_parameters` object is - inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - labels: The labels with user-defined metadata to - organize your BatchPredictionJobs. Label keys and values can be no - longer than 64 characters (Unicode codepoints), can only contain - lowercase letters, numeric characters, underscores and dashes. - International characters are allowed. See https://goo.gl/xmQnxf for - more information and examples of labels. - encryption_spec_key_name: Customer-managed encryption - key options for a BatchPredictionJob. If this is set, then all - resources created by the BatchPredictionJob will be encrypted with the - provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource - is created. + instance_type: The format of the instance that the Model accepts. Vertex AI will convert compatible [InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) to the specified format. Supported values are: `object`: Each input is converted to JSON object format. * For `bigquery`, each row is converted to an object. * For `jsonl`, each line of the JSONL input must be an object. * Does not apply to `csv`, `file-list`, `tf-record`, or `tf-record-gzip`. `array`: Each input is converted to JSON array format. * For `bigquery`, each row is converted to an array. The order of columns is determined by the BigQuery column order, unless [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig) is populated. `included_fields` must be populated for specifying field orders. * For `jsonl`, if each line of the JSONL input is an object, `included_fields` must be populated for specifying field orders. * Does not apply to `csv`, `file-list`, `tf-record`, or `tf-record-gzip`. If not specified, Vertex AI converts the batch prediction input as follows: * For `bigquery` and `csv`, the behavior is the same as `array`. The order of columns is the same as defined in the file or table, unless included_fields is populated. * For `jsonl`, the prediction instance format is determined by each line of the input. * For `tf-record`/`tf-record-gzip`, each record will be converted to an object in the format of `{"b64": }`, where `` is the Base64-encoded string of the content of the record. * For `file-list`, each file in the list will be converted to an object in the format of `{"b64": }`, where `` is the Base64-encoded string of the content of the file. + key_field: The name of the field that is considered as a key. The values identified by the key field is not included in the transformed instances that is sent to the Model. This is similar to specifying this name of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig). In addition, the batch prediction output will not include the instances. Instead the output will only include the value of the key field, in a field named `key` in the output: * For `jsonl` output format, the output will have a `key` field instead of the `instance` field. * For `csv`/`bigquery` output format, the output will have have a `key` column instead of the instance feature columns. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. + included_fields: Fields that will be included in the prediction instance that is sent to the Model. If `instance_type` is `array`, the order of field names in `included_fields` also determines the order of the values in the array. When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. + excluded_fields: Fields that will be excluded in the prediction instance that is sent to the Model. Excluded will be attached to the batch prediction output if key_field is not specified. When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery or TfRecord. may be specified via the Model's `parameters_schema_uri`. + gcs_destination_output_uri_prefix: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has [google.rpc.Status](Status) represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + accelerator_type: The type of accelerator(s) that may be attached to the machine as per `accelerator_count`. Only used if `machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + accelerator_count: The number of accelerators to attach to the `machine_type`. Only used if `machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + manual_batch_tuning_parameters_batch_size: The number of the records (e.g. instances) of the operation given in each batch to a machine replica. Machine type, and size of a single record should be considered when setting this parameter, higher value speeds up the batch operation's execution, but too high value will result in a whole batch not fitting in a machine's memory, and the whole operation will fail. + generate_explanation: Generate explanation along with the batch prediction results. This will cause the batch prediction output to include explanations based on the `prediction_format`: - `bigquery`: output includes a column named `explanation`. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - `jsonl`: The JSON objects on each line include an additional entry keyed `explanation`. The value of the entry is a JSON object that conforms to the [aiplatform.gapic.Explanation] object. - `csv`: Generating explanations for CSV format is not supported. If this field is set to true, either the Model.explanation_spec or explanation_metadata and explanation_parameters must be populated. + explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + labels: The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + encryption_spec_key_name: Customer-managed encryption key options for a BatchPredictionJob. If this is set, then all resources created by the BatchPredictionJob will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. project: Project to create the BatchPredictionJob. Defaults to the project in which the PipelineJob is run. Returns: - batchpredictionjob: [**Deprecated. Use gcs_output_directory and bigquery_output_table - instead.**] Artifact - representation of the created batch prediction job. - gcs_output_directory: Artifact tracking the batch prediction job output. This is only - available if - gcs_destination_output_uri_prefix is specified. - bigquery_output_table: Artifact tracking the batch prediction job output. This is only - available if - bigquery_output_table is specified. - gcp_resources: Serialized gcp_resources proto tracking the batch prediction job. - - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + batchpredictionjob: [**Deprecated. Use gcs_output_directory and bigquery_output_table instead.**] Artifact representation of the created batch prediction job. + gcs_output_directory: Artifact tracking the batch prediction job output. This is only available if gcs_destination_output_uri_prefix is specified. + bigquery_output_table: Artifact tracking the batch prediction job output. This is only available if bigquery_output_table is specified. + gcp_resources: Serialized gcp_resources proto tracking the batch prediction job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py index 156ea9fc19..de0fb5cf3f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Create and execute machine learning models via SQL using [Google Cloud BigQuery ML](https://cloud.google.com/bigquery/docs/bqml-introduction).""" +# fmt: on from google_cloud_pipeline_components.v1.bigquery.create_model.component import bigquery_create_model_job as BigqueryCreateModelJobOp from google_cloud_pipeline_components.v1.bigquery.detect_anomalies_model.component import bigquery_detect_anomalies_job as BigqueryDetectAnomaliesModelJobOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py index d68ca55473..7b36700e47 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/create_model/component.py @@ -39,32 +39,17 @@ def bigquery_create_model_job( """Launch a BigQuery create model job and waits for it to finish. Args: - location: Location of the job to create the BigQuery model. If not set, default to - `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - query: SQL query text to execute. Only standard SQL is - supported. If query are both specified in here and in - job_configuration_query, the value in here will override the other - one. - query_parameters: Query parameters for standard SQL queries. - If query_parameters are both specified in here and in - job_configuration_query, the value in here will override the other one. - job_configuration_query: A json formatted string describing the rest of the job configuration. - For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + query: SQL query text to execute. Only standard SQL is supported. If query are both specified in here and in job_configuration_query, the value in here will override the other one. + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. Returns: model: Describes the model which is created. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py index 95f080f23d..d281320922 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/detect_anomalies_model/component.py @@ -46,63 +46,22 @@ def bigquery_detect_anomalies_job( """Launch a BigQuery detect anomalies model job and waits for it to finish. Args: - location: Location to run the BigQuery model prediction job. If not set, default - to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for prediction. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#model_name - table_name: BigQuery table id of the input table that contains the data. For more - details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#table_name - query_statement: Query statement string used to generate - the data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#query_statement - contamination: Contamination is the proportion of anomalies in the training dataset - that are used to create the - AUTOENCODER, KMEANS, or PCA input models. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#contamination - anomaly_prob_threshold: The ARIMA_PLUS model supports the - anomaly_prob_threshold custom threshold for anomaly detection. The - value of the anomaly probability at each timestamp is calculated - using the actual time-series data value and the values of the - predicted time-series data and the variance from the model - training. The actual time-series data value at a specific - timestamp is identified as anomalous if the anomaly probability - exceeds the anomaly_prob_threshold value. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#anomaly_prob_threshold - query_parameters: Query parameters for standard SQL queries. - If query_parameters are both specified in here and in - job_configuration_query, the value in here will override the other one. - job_configuration_query: A json formatted string describing the rest of the job configuration. - For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for prediction. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#model_name + table_name: BigQuery table id of the input table that contains the data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#table_name + query_statement: Query statement string used to generate the data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#query_statement + contamination: Contamination is the proportion of anomalies in the training dataset that are used to create the AUTOENCODER, KMEANS, or PCA input models. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#contamination + anomaly_prob_threshold: The ARIMA_PLUS model supports the anomaly_prob_threshold custom threshold for anomaly detection. The value of the anomaly probability at each timestamp is calculated using the actual time-series data value and the values of the predicted time-series data and the variance from the model training. The actual time-series data value at a specific timestamp is identified as anomalous if the anomaly probability exceeds the anomaly_prob_threshold value. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-detect-anomalies#anomaly_prob_threshold + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model prediction results should be - stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model prediction results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py index 32660189e8..359b1f1209 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/drop_model/component.py @@ -38,31 +38,16 @@ def bigquery_drop_model_job( """Launch a BigQuery drop model job and waits for it to finish. Args: - location: Location of the job to drop the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location + location: Location of the job to drop the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location model: BigQuery ML model to drop. - query_parameters: Query parameters for standard SQL queries. - If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery model drop job. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py index cc1db25803..6a9cbb3a23 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/evaluate_model/component.py @@ -45,55 +45,21 @@ def bigquery_evaluate_model_job( """Launch a BigQuery evaluate model job and waits for it to finish. Args: - location: Location to run the BigQuery model evaluation - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for evaluation. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_model_name - table_name: BigQuery table id of the input table that - contains the evaluation data, as in ML.EVALUATE(MODEL model_name[, - {TABLE table_name | (query_statement)}] For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_table_name - query_statement: Query statement string used to generate - the evaluation data, as in ML.EVALUATE(MODEL model_name[, {TABLE - table_name | (query_statement)}] For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_query_statement - threshold: A custom threshold for the binary-class - classification model to be used for evaluation. The default value is - 0.5. The threshold value that is supplied must be of type STRUCT. - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_threshold - query_parameters: jobs.query parameters for standard - SQL queries. If query_parameters are both specified in here and in - job_configuration_query, the value in here will override the other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can use - these to organize and group your jobs. Label keys and values can be no - longer than 63 characters, can only containlowercase letters, numeric - characters, underscores and dashes. International characters are - allowed. Label values are optional. Label keys must start with a letter - and each label in the list must have a different key. + location: Location to run the BigQuery model evaluation job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for evaluation. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_model_name + table_name: BigQuery table id of the input table that contains the evaluation data, as in ML.EVALUATE(MODEL model_name[, {TABLE table_name | (query_statement)}] For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_table_name + query_statement: Query statement string used to generate the evaluation data, as in ML.EVALUATE(MODEL model_name[, {TABLE table_name | (query_statement)}] For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_query_statement + threshold: A custom threshold for the binary-class classification model to be used for evaluation. The default value is 0.5. The threshold value that is supplied must be of type STRUCT. https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#eval_threshold + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model evaluation job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model prediction results should be - stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model prediction results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py index 861385cef8..cb3521abdd 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_forecast_model/component.py @@ -47,49 +47,20 @@ def bigquery_explain_forecast_model_job( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - location: Location to run the BigQuery job. If not set, - default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for ML.EXPLAIN_FORECAST. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast - horizon: Horizon is the number of time points to explain forecast. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#horizon - confidence_level: The percentage of the future values that fall in the prediction - interval. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#confidence_level - query_parameters: Query parameters for standard SQL queries. If query_parameters are both - specified in here and in job_configuration_query, the value in here will - override the other one. - job_configuration_query: A json formatted string describing the rest of the job configuration. - For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.EXPLAIN_FORECAST. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast + horizon: Horizon is the number of time points to explain forecast. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#horizon + confidence_level: The percentage of the future values that fall in the prediction interval. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#confidence_level + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model explain forecast results should - be stored. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#mlexplain_forecast_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model explain forecast results should be stored. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-forecast#mlexplain_forecast_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py index f133af8a9e..5d5312c0fc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/explain_predict_model/component.py @@ -46,67 +46,23 @@ def bigquery_explain_predict_model_job( """Launch a BigQuery explain predict model job and waits for it to finish. Args: - location: Location to run the BigQuery model prediction - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for explaining - prediction. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#model_name - table_name: BigQuery table id of the input table that - contains the prediction data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#table_name - query_statement: Query statement string used to generate - the prediction data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#query_statement - top_k_features: This argument specifies how many top - feature attribution pairs are generated per row of input data. The - features are ranked by the absolute values of their attributions. For - more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#top_k_features - threshold: A custom threshold for the binary logistic - regression model used as the cutoff between two labels. Predictions - above the threshold are treated as positive prediction. Predictions - below the threshold are negative predictions. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#threshold - num_integral_steps: This argument specifies the number - of steps to sample between the example being explained and its - baseline for approximating the integral in integrated gradients - attribution methods. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#num_integral_steps - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for explaining prediction. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#model_name + table_name: BigQuery table id of the input table that contains the prediction data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#table_name + query_statement: Query statement string used to generate the prediction data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#query_statement + top_k_features: This argument specifies how many top feature attribution pairs are generated per row of input data. The features are ranked by the absolute values of their attributions. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#top_k_features + threshold: A custom threshold for the binary logistic regression model used as the cutoff between two labels. Predictions above the threshold are treated as positive prediction. Predictions below the threshold are negative predictions. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#threshold + num_integral_steps: This argument specifies the number of steps to sample between the example being explained and its baseline for approximating the integral in integrated gradients attribution methods. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict#num_integral_steps + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model prediction results should be - stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model prediction results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py index 772b00d04c..da56098be7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/export_model/component.py @@ -39,30 +39,17 @@ def bigquery_export_model_job( """Launch a BigQuery export model job and waits for it to finish. Args: - location: Location of the job to export the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location + location: Location of the job to export the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location model: BigQuery ML model to export. - model_destination_path: - The gcs bucket to export the - model to. - job_configuration_extract: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + model_destination_path: The gcs bucket to export the model to. + job_configuration_extract: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery model export job. Defaults to the project in which the PipelineJob is run. Returns: exported_model_path: The gcs bucket path where you export the model to. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py index 68fa94f944..f6ae5a23a1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/feature_importance/component.py @@ -43,43 +43,18 @@ def bigquery_ml_feature_importance_job( finish. Args: - location: Location of the job to create the BigQuery - model. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for feature - importance. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for feature importance. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. Returns: - feature_importance: Describes common metrics applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-importance - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + feature_importance: Describes common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-importance + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py index e2c0ce7f72..c94d3aa3dc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/forecast_model/component.py @@ -47,49 +47,21 @@ def bigquery_forecast_model_job( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - location: Location to run the BigQuery job. If not set, - default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for ML.FORECAST. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast - horizon: Horizon is the number of time points to - forecast. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#horizon - confidence_level: The percentage of the future values - that fall in the prediction interval. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#confidence_level - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.FORECAST. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast + horizon: Horizon is the number of time points to forecast. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#horizon + confidence_level: The percentage of the future values that fall in the prediction interval. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#confidence_level + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model forecast results should be - stored. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#mlforecast_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model forecast results should be stored. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-forecast#mlforecast_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py index 3791cc5298..dbd64a7e14 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/global_explain/component.py @@ -43,26 +43,14 @@ def bigquery_ml_global_explain_job( """Launch a BigQuery global explain fetching job and waits for it to finish. Args: - location: Location of the job to create the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for global - explain. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name - class_level_explain: For classification - models, if class_level_explain is set to TRUE then global feature - importances are returned for each class. Otherwise, the global - feature importance of the entire model is returned rather than that - of each class. By default, class_level_explain is set to FALSE. This - option only applies to classification models. Regression models only - have model-level global feature importance. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for global explain. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name + class_level_explain: For classification models, if class_level_explain is set to TRUE then global feature importances are returned for each class. Otherwise, the global feature importance of the entire model is returned rather than that of each class. By default, class_level_explain is set to FALSE. This option only applies to classification models. Regression models only have model-level global feature importance. project: Project to run BigQuery model creation job. Defaults to the project in which the PipelineJob is run. Returns: destination_table: Describes the table where the global explain results should be stored. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py index d41ccee4dc..a51e414d10 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_advanced_weights/component.py @@ -41,36 +41,17 @@ def bigquery_ml_advanced_weights_job( """Launch a BigQuery ml advanced weights job and waits for it to finish. Args: - location: Location of the job to create the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for ml advanced - weights job. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ml advanced weights job. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery ml advanced weights job. Defaults to the project in which the PipelineJob is run. Returns: - weights: Describes different output columns for different models. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-advanced-weights#mladvanced_weights_output. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + weights: Describes different output columns for different models. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-advanced-weights#mladvanced_weights_output. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py index ac1e2eea70..748c1547b6 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_coefficients/component.py @@ -45,36 +45,16 @@ def bigquery_ml_arima_coefficients( This function only applies to the time-series ARIMA_PLUS and ARIMA models. Args: - location: Location to run the BigQuery job. If not set, - default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for - ML.ARIMA_COEFFICIENTS. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-coefficients - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + location: Location to run the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.ARIMA_COEFFICIENTS. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-coefficients + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run the BigQuery job. Defaults to the project in which the PipelineJob is run. Returns: - arima_coefficients: Describes arima_coefficients to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-coefficients#mlarima_coefficients_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + arima_coefficients: Describes arima_coefficients to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-coefficients#mlarima_coefficients_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py index 130a3e105b..841a095966 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_arima_evaluate/component.py @@ -43,49 +43,19 @@ def bigquery_ml_arima_evaluate_job( """Launch a BigQuery ML.ARIMA_EVALUATE job and waits for it to finish. Args: - location: Location to run the BigQuery model evaluation - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for - ML.ARIMA_EVALUATE. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#model_name - show_all_candidate_models: You can use - show_all_candidate_models to show evaluation metrics or an error - message for either all candidate models or for only the best model - with the lowest AIC. The value is type BOOL and is part of the - settings STRUCT. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#show_all_candidate_models - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery model evaluation job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.ARIMA_EVALUATE. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#model_name + show_all_candidate_models: You can use show_all_candidate_models to show evaluation metrics or an error message for either all candidate models or for only the best model with the lowest AIC. The value is type BOOL and is part of the settings STRUCT. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#show_all_candidate_models + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model evaluation job. Defaults to the project in which the PipelineJob is run. Returns: - arima_evaluation_metrics: Describes arima metrics. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#mlarima_evaluate_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + arima_evaluation_metrics: Describes arima metrics. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-arima-evaluate#mlarima_evaluate_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py index 71e2493914..5c30d8f1d7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_centroids/component.py @@ -43,47 +43,19 @@ def bigquery_ml_centroids_job( """Launch a BigQuery ML.CENTROIDS job and waits for it to finish. Args: - location: Location to run the BigQuery ML.CENTROIDS job. - If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for ML.CENTROIDS. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_syntax - standardize: Determines whether the centroid features - should be standardized to assume that all features have a mean of zero - and a standard deviation of one. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_syntax - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ML.CENTROIDS job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.CENTROIDS. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_syntax + standardize: Determines whether the centroid features should be standardized to assume that all features have a mean of zero and a standard deviation of one. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_syntax + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery ML.CENTROIDS job. Defaults to the project in which the PipelineJob is run. Returns: - centroids: Information about the centroids in a k-means model. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + centroids: Information about the centroids in a k-means model. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-centroids#mlcentroids_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py index fa86384ac7..7ce55a27d7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_confusion_matrix/component.py @@ -44,44 +44,20 @@ def bigquery_ml_confusion_matrix_job( """Launch a BigQuery confusion matrix job and waits for it to finish. Args: - location: Location to run the BigQuery confusion matrix - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for confusion - matrix. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_model_name - table_name: BigQuery table id of the input table that - contains the evaluation data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_table_name - query_statement: Query statement string used to generate - the evaluation data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_query_statement - threshold: A custom threshold for your binary - classification model used for evaluation. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_threshold - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery confusion matrix job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for confusion matrix. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_model_name + table_name: BigQuery table id of the input table that contains the evaluation data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_table_name + query_statement: Query statement string used to generate the evaluation data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_query_statement + threshold: A custom threshold for your binary classification model used for evaluation. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#eval_threshold + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery confusion matrix job. Defaults to the project in which the PipelineJob is run. Returns: - confusion_matrix: Describes common metrics applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#mlconfusion_matrix_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + confusion_matrix: Describes common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-confusion#mlconfusion_matrix_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py index 6af06e09a3..4ce9884154 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_feature_info/component.py @@ -41,36 +41,17 @@ def bigquery_ml_feature_info_job( """Launch a BigQuery feature info job and waits for it to finish. Args: - location: Location of the job to run BigQuery feature - info job. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for evaluation. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to run BigQuery feature info job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for evaluation. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery feature info job. Defaults to the project in which the PipelineJob is run. Returns: - feature_info: Describes common metrics applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-feature#mlfeature_info_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + feature_info: Describes common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-feature#mlfeature_info_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py index 16af856380..505b301a6a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_component_info/component.py @@ -43,45 +43,18 @@ def bigquery_ml_principal_component_info_job( finish. Args: - location: Location to run the BigQuery - ML.principal_component_info job. If not set, default to `US` - multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for - ML.principal_component_info. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-component-info#mlprincipal_component_info_syntax - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ML.principal_component_info job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.principal_component_info. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-component-info#mlprincipal_component_info_syntax + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery ML.principal_component_info job. Defaults to the project in which PipelineJob is run. Returns: - destination_table: Describes the table which stores common metrics applicable to the type - of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-component-info#mlprincipal_component_info_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table which stores common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-component-info#mlprincipal_component_info_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py index bf1858dd8f..a3b7ede1c8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_principal_components/component.py @@ -42,44 +42,18 @@ def bigquery_ml_principal_components_job( """Launch a BigQuery ML.principal_components job and waits for it to finish. Args: - location: Location to run the BigQuery - ML.principal_components job. If not set, default to `US` multi-region. - For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for - ML.principal_components. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-components#mlprincipal_components_syntax - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ML.principal_components job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.principal_components. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-components#mlprincipal_components_syntax + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. - project: Project to run BigQuery ML.principal_components - job. Defaults to the project in which the PipelineJob is run. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ML.principal_components job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table which stores common metrics applicable to the type - of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-components#mlprincipal_components_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table which stores common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-principal-components#mlprincipal_components_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py index da86596e7e..cda1362b0f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_recommend/component.py @@ -44,48 +44,20 @@ def bigquery_ml_recommend_job( """Launch a BigQuery ML.Recommend job and waits for it to finish. Args: - location: Location to run the BigQuery ML.Recommend job. - If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for ML.Recoomend. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_model_name - table_name: BigQuery table id of the input table that - contains the the user and/or item data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_table_name - query_statement: query statement string used to generate - the evaluation data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_query_statement - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ML.Recommend job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for ML.Recoomend. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_model_name + table_name: BigQuery table id of the input table that contains the the user and/or item data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_table_name + query_statement: query statement string used to generate the evaluation data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-recommend#recommend_query_statement + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery ML.Recommend job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the recommendation results should be stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the recommendation results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py index b65bac2645..4a80cfb88e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_reconstruction_loss/component.py @@ -43,49 +43,20 @@ def bigquery_ml_reconstruction_loss_job( """Launch a BigQuery ml reconstruction loss job and waits for it to finish. Args: - location: Location to run the BigQuery ml reconstruction - loss job. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model. For more details, - see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_model_name - table_name: BigQuery table id of the input table that - contains the input data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_table_name - query_statement: Query statement string used to generate - the input data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_query_statement - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ml reconstruction loss job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_model_name + table_name: BigQuery table id of the input table that contains the input data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_table_name + query_statement: Query statement string used to generate the input data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-reconstruction-loss#reconstruction_loss_query_statement + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. - project: Project to run BigQuery ml reconstruction loss - job. Defaults to the project in which the PipelineJob is run. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. + project: Project to run BigQuery ml reconstruction loss job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the ml reconstruction loss job results - should be stored. This property must be set for large results that - exceed the maximum response size. For queries that produce anonymous - (cached) results, this field will be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the ml reconstruction loss job results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py index c10e723a49..c7a7f2f841 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_roc_curve/component.py @@ -44,44 +44,20 @@ def bigquery_ml_roc_curve_job( """Launch a BigQuery roc curve job and waits for it to finish. Args: - location: Location of the job to run BigQuery roc curve - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for BigQuery roc - curv job. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_model_name - table_name: BigQuery table id of the input table that - contains the evaluation data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_table_name - query_statement: Query statement string used to generate - the evaluation data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_query_statement - thresholds: Percentile values of the prediction output. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_thresholds - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to run BigQuery roc curve job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for BigQuery roc curv job. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_model_name + table_name: BigQuery table id of the input table that contains the evaluation data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_table_name + query_statement: Query statement string used to generate the evaluation data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_query_statement + thresholds: Percentile values of the prediction output. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#roc_thresholds + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery roc curve job. Defaults to the project in which the PipelineJob is run. Returns: - roc_curve: Describes common metrics applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#mlroc_curve_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + roc_curve: Describes common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-roc#mlroc_curve_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py index 1cb6aceb43..4cb871769b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_training_info/component.py @@ -42,37 +42,17 @@ def bigquery_ml_training_info_job( finish. Args: - location: Location of the job to create the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - query: SQL query text to execute. Only standard SQL is - supported. If query are both specified in here and in - job_configuration_query, the value in here will override the other - one. - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + query: SQL query text to execute. Only standard SQL is supported. If query are both specified in here and in job_configuration_query, the value in here will override the other one. + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery ML training info job. Defaults to the project in which the PipelineJob is run. Returns: - ml_training_info: Describes common metrics applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#mlevaluate_output - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + ml_training_info: Describes common metrics applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#mlevaluate_output + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py index f3dda16415..87e61cfc90 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_trial_info/component.py @@ -42,40 +42,18 @@ def bigquery_ml_trial_info_job( """Launch a BigQuery ml trial info job and waits for it to finish. Args: - location: Location to run the BigQuery ml trial info - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model. For more details, - see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-trial-info#predict_model_name - query_parameters: Query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery ml trial info job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-trial-info#predict_model_name + query_parameters: Query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery ml trial info job. Defaults to the project in which the PipelineJob is run. Returns: - trial_info: Describes the trial info applicable to the type of model supplied. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-trial-info - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + trial_info: Describes the trial info applicable to the type of model supplied. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-trial-info + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py index eae4732d05..10841021a2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/ml_weights/component.py @@ -41,37 +41,17 @@ def bigquery_ml_weights_job( """Launch a BigQuery ml weights job and waits for it to finish. Args: - location: Location of the job to create the BigQuery - model. If not set, default to `US` multi-region. For more details, - see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - query: SQL query text to execute. Only standard SQL is - supported. If query are both specified in here and in - job_configuration_query, the value in here will override the other - one. - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location of the job to create the BigQuery model. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + query: SQL query text to execute. Only standard SQL is supported. If query are both specified in here and in job_configuration_query, the value in here will override the other one. + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. project: Project to run BigQuery ml weights job. Defaults to the project in which the PipelineJob is run. Returns: - weights: Describes different output columns for different models. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-weights#mlweights_output. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + weights: Describes different output columns for different models. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-weights#mlweights_output. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py index bc6a9456c9..8fbb21d354 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/predict_model/component.py @@ -45,55 +45,21 @@ def bigquery_predict_model_job( """Launch a BigQuery predict model job and waits for it to finish. Args: - location: Location to run the BigQuery model prediction - job. If not set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - model: BigQuery ML model for prediction. - For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name - table_name: BigQuery table id of the input table that - contains the prediction data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_table_name - query_statement: Query statement string used to generate - the prediction data. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_query_statement - threshold: A custom threshold for the binary logistic - regression model used as the cutoff between two labels. Predictions - above the threshold are treated as positive prediction. Predictions - below the threshold are negative predictions. For more details, see - https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#threshold - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location to run the BigQuery model prediction job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + model: BigQuery ML model for prediction. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_model_name + table_name: BigQuery table id of the input table that contains the prediction data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_table_name + query_statement: Query statement string used to generate the prediction data. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#predict_query_statement + threshold: A custom threshold for the binary logistic regression model used as the cutoff between two labels. Predictions above the threshold are treated as positive prediction. Predictions below the threshold are negative predictions. For more details, see https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict#threshold + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: Describes the Cloud KMS - encryption key that will be used to protect destination BigQuery - table. The BigQuery Service Account associated with your project - requires access to this encryption key. If encryption_spec_key_name - are both specified in here and in job_configuration_query, the value - in here will override the other one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run BigQuery model prediction job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the model prediction results should be - stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the model prediction results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py index 1662156b4f..625330844d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/bigquery/query_job/component.py @@ -40,46 +40,18 @@ def bigquery_query_job( """Launch a BigQuery query job and waits for it to finish. Args: - location: Location for creating the BigQuery job. If not - set, default to `US` multi-region. For more details, see - https://cloud.google.com/bigquery/docs/locations#specifying_your_location - query: SQL query text to execute. Only standard SQL is - supported. If query are both specified in here and in - job_configuration_query, the value in here will override the other - one. - query_parameters: jobs.query parameters for - standard SQL queries. If query_parameters are both specified in here - and in job_configuration_query, the value in here will override the - other one. - job_configuration_query: A json formatted string - describing the rest of the job configuration. For more details, see - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery - labels: The labels associated with this job. You can - use these to organize and group your jobs. Label keys and values can - be no longer than 63 characters, can only containlowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. Label values are optional. Label keys must start with a - letter and each label in the list must have a different key. + location: Location for creating the BigQuery job. If not set, default to `US` multi-region. For more details, see https://cloud.google.com/bigquery/docs/locations#specifying_your_location + query: SQL query text to execute. Only standard SQL is supported. If query are both specified in here and in job_configuration_query, the value in here will override the other one. + query_parameters: jobs.query parameters for standard SQL queries. If query_parameters are both specified in here and in job_configuration_query, the value in here will override the other one. + job_configuration_query: A json formatted string describing the rest of the job configuration. For more details, see https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery + labels: The labels associated with this job. You can use these to organize and group your jobs. Label keys and values can be no longer than 63 characters, can only containlowercase letters, numeric characters, underscores and dashes. International characters are allowed. Label values are optional. Label keys must start with a letter and each label in the list must have a different key. Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }. - encryption_spec_key_name: - Describes the Cloud - KMS encryption key that will be used to protect destination - BigQuery table. The BigQuery Service Account associated with your - project requires access to this encryption key. If - encryption_spec_key_name are both specified in here and in - job_configuration_query, the value in here will override the other - one. + encryption_spec_key_name: Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account associated with your project requires access to this encryption key. If encryption_spec_key_name are both specified in here and in job_configuration_query, the value in here will override the other one. project: Project to run the BigQuery query job. Defaults to the project in which the PipelineJob is run. Returns: - destination_table: Describes the table where the query results should be stored. - This property must be set for large results that exceed the maximum - response size. - For queries that produce anonymous (cached) results, this field will - be populated by BigQuery. - gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. - For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + destination_table: Describes the table where the query results should be stored. This property must be set for large results that exceed the maximum response size. For queries that produce anonymous (cached) results, this field will be populated by BigQuery. + gcp_resources: Serialized gcp_resources proto tracking the BigQuery job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py index 397119290d..0075f3f221 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Run KFP components as [Vertex AI Custom Training Jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) with customized worker and cloud configurations.""" +# fmt: on from google_cloud_pipeline_components.v1.custom_job.component import custom_training_job as CustomTrainingJobOp from google_cloud_pipeline_components.v1.custom_job.utils import create_custom_training_job_from_component diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py index 25c0b259f3..5134a5e4d3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/component.py @@ -39,58 +39,22 @@ def custom_training_job( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Launch a Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - - See [Create custom training jobs - ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for - more information. + """Launch a Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: - location: Location for creating the custom training job. - If not set, default to us-central1. + location: Location for creating the custom training job. If not set, default to us-central1. display_name: The name of the CustomJob. - worker_pool_specs: Serialized json spec of the - worker pools including machine type and Docker image. All worker pools - except the first one are optional and can be skipped by providing an - empty value. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#WorkerPoolSpec). - timeout: The maximum job running time. The default is 7 days. A duration in - seconds with up to nine fractional digits, terminated by 's', for example: - "3.5s". - restart_job_on_worker_restart: Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by distributed training jobs that - are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as - account. The [service account - ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - running the pipeline submitting jobs must have act-as permission on this - run-as account. If unspecified, the Vertex AI Custom Code [Service Agent - ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - tensorboard: The name of a Vertex AI Tensorboard resource to which this - CustomJob will upload Tensorboard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell - access - ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) - to training containers. If `True`, you can access interactive shells at - the URIs given by [CustomJob.web_access_uris][]. - network: The full name of the Compute Engine network to which the job should - be peered. For example, `projects/12345/global/networks/myVPC`. Format - is of the form `projects/{project}/global/networks/{network}`. Where - `{project}` is a project number, as in `12345`, and `{network}` is a - network name. Private services access must already be configured for the - network. If left unspecified, the job is not peered with any network. - reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC - network that can be used for this job. If set, we will deploy the job - within the provided IP ranges. Otherwise, the job will be deployed to any - IP ranges under the provided VPC network. - base_output_directory: The Cloud Storage location to store the output of - this CustomJob or HyperparameterTuningJob. See [more information - ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). - labels: The labels with user-defined metadata to organize the CustomJob. See - [more information](https://goo.gl/xmQnxf). - encryption_spec_key_name: Customer-managed encryption key options for the - CustomJob. If this is set, then all resources created by the CustomJob - will be encrypted with the provided encryption key. + worker_pool_specs: Serialized json spec of the worker pools including machine type and Docker image. All worker pools except the first one are optional and can be skipped by providing an empty value. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#WorkerPoolSpec). + timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". + restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. + service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). + encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index cea7f018a8..4faba30c68 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -33,8 +33,7 @@ def _replace_executor_placeholder( Args: container_input: Container command or args. - Returns: - container_input with executor placeholder replaced. + Returns: container_input with executor placeholder replaced. """ # Executor replacement is used as executor content needs to be jsonified before # injection into the payload, since payload is already a JSON serialized string. @@ -70,98 +69,36 @@ def create_custom_training_job_from_component( base_output_directory: str = '', labels: Optional[Dict[str, str]] = None, ) -> Callable: + # fmt: off """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - This utility converts a [KFP component - ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) - provided to `component_spec` into `CustomTrainingJobOp` component. Your - components inputs, outputs, and logic are carried over, with additional - [CustomJob - ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) - parameters exposed. - - Note that this utility constructs a ClusterSpec where the master and all the - workers use the same spec, meaning all disk/machine spec related parameters - will apply to all replicas. This is suitable for uses cases such as executing - a training component over multiple replicas with [MultiWorkerMirroredStrategy - ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) - or [MirroredStrategy - ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). - - See [Create custom training jobs - ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for - more information. + This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: - component_spec: A KFP component. - display_name: The name of the CustomJob. If not provided the component's - name will be used instead. - replica_count: The count of instances in the cluster. One replica always - counts towards the master in worker_pool_spec[0] and the remaining - replicas will be allocated in worker_pool_spec[1]. See [more information. - ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) - machine_type: The type of the machine to run the CustomJob. The default - value is "n1-standard-4". See [more information - ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). - accelerator_type: The type of accelerator(s) that may be attached to the - machine per `accelerator_count`. See [more information - ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). - accelerator_count: The number of accelerators to attach to the machine. - Defaults to 1 if `accelerator_type` is set. - boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: - "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent - Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot - be changed as a pipeline parameter. - boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). - `boot_disk_size_gb` is set as a static value and cannot be changed as a - pipeline parameter. - timeout: The maximum job running time. The default is 7 days. A duration in - seconds with up to nine fractional digits, terminated by 's', for - example: "3.5s". - restart_job_on_worker_restart: Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by distributed training jobs - that are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as - account. The [service account - ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) - running the pipeline submitting jobs must have act-as permission on this - run-as account. If unspecified, the Vertex AI Custom Code [Service Agent - ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) - for the CustomJob's project. - network: The full name of the Compute Engine network to which the job - should be peered. For example, `projects/12345/global/networks/myVPC`. - Format is of the form `projects/{project}/global/networks/{network}`. - Where `{project}` is a project number, as in `12345`, and `{network}` is - a network name. Private services access must already be configured for - the network. If left unspecified, the job is not peered with any network. - encryption_spec_key_name: Customer-managed encryption key options for the - CustomJob. If this is set, then all resources created by the CustomJob - will be encrypted with the provided encryption key. - tensorboard: The name of a Vertex AI TensorBoard resource to which this - CustomJob will upload TensorBoard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell - access - ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) - to training containers. If `True`, you can access interactive shells at - the URIs given by [CustomJob.web_access_uris][]. - reserved_ip_ranges: A list of names for the reserved IP ranges under the - VPC network that can be used for this job. If set, we will deploy the job - within the provided IP ranges. Otherwise, the job will be deployed to any - IP ranges under the provided VPC network. - nfs_mounts: A list of [NfsMount - ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) - resource specs in Json dict format. For more details about mounting NFS - for CustomJob, see [Mount an NFS share for custom training - ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). - base_output_directory: The Cloud Storage location to store the output of - this CustomJob or HyperparameterTuningJob. See [more information - ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). - labels: The labels with user-defined metadata to organize the CustomJob. - See [more information](https://goo.gl/xmQnxf). + component_spec: A KFP component. + display_name: The name of the CustomJob. If not provided the component's name will be used instead. + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. + boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. + boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. + timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". + restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. + service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. + tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. + nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). Returns: - A KFP component with CustomJob specification applied. + A KFP component with CustomJob specification applied. """ + # fmt: on # This function constructs a Custom Job component based on the input # component, by performing a 3-way merge of the inputs/outputs of the # input component, the Custom Job component and the arguments given to this @@ -330,13 +267,6 @@ def create_custom_training_job_op_from_component(*args, **kwargs) -> Callable: """Deprecated. Please use create_custom_training_job_from_component instead. - - Args: - *args: Positional arguments for create_custom_training_job_from_component. - **kwargs: Keyword arguments for create_custom_training_job_from_component. - - Returns: - A KFP component with CustomJob features applied. """ warnings.warn( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py index 9b766731b5..9a57623884 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Create [Google Cloud Dataflow](https://cloud.google.com/dataflow) jobs from within Vertex AI Pipelines.""" +# fmt: on from google_cloud_pipeline_components.v1.dataflow.python_job.component import dataflow_python as DataflowPythonJobOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py index 40d213cb2f..5f965d8814 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/python_job/component.py @@ -35,19 +35,15 @@ def dataflow_python( Dataflow Runner. Args: - location: Location of the Dataflow job. If not set, defaults to - `'us-central1'`. + location: Location of the Dataflow job. If not set, defaults to `'us-central1'`. python_module_path: The GCS path to the Python file to run. - temp_location: A GCS path for Dataflow to stage temporary job - files created during the execution of the pipeline. + temp_location: A GCS path for Dataflow to stage temporary job files created during the execution of the pipeline. requirements_file_path: The GCS path to the pip requirements file. - args: The list of args to pass to the Python file. Can include additional - parameters for the Dataflow Runner. + args: The list of args to pass to the Python file. Can include additional parameters for the Dataflow Runner. project: Project to create the Dataflow job. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py index c23660af80..c739fb403a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Create [Google Cloud Dataproc](https://cloud.google.com/dataproc) jobs from within Vertex AI Pipelines.""" +# fmt: on from google_cloud_pipeline_components.v1.dataproc.create_pyspark_batch.component import dataproc_create_pyspark_batch as DataprocPySparkBatchOp from google_cloud_pipeline_components.v1.dataproc.create_spark_batch.component import dataproc_create_spark_batch as DataprocSparkBatchOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py index 5276ea785c..73efc04c23 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py @@ -50,55 +50,29 @@ def dataproc_create_pyspark_batch( """Create a Dataproc PySpark batch workload and wait for it to finish. Args: - location: Location of the Dataproc batch workload. If - not set, defaults to `"us-central1"`. - batch_id: The ID to use for the batch, which will become - the final component of the batch's resource name. If none is - specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. - labels: The labels to associate with this batch. Label - keys must contain 1 to 63 characters, and must conform to RFC 1035. - Label values may be empty, but, if present, must contain 1 to 63 - characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of `"key": - value` pairs. - Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. - container_image: Optional custom container image for the - job runtime environment. If not specified, a default container image - will be used. + location: Location of the Dataproc batch workload. If not set, defaults to `"us-central1"`. + batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. + labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can be associated with a batch. An object containing a list of `"key": value` pairs. Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. + container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. runtime_config_version: Version of the batch runtime. runtime_config_properties: Runtime configuration for the workload. service_account: Service account that is used to execute the workload. - network_tags: Tags used for network traffic - control. + network_tags: Tags used for network traffic control. kms_key: The Cloud KMS key to use for encryption. network_uri: Network URI to connect workload to. subnetwork_uri: Subnetwork URI to connect workload to. - metastore_service: Resource name of an existing Dataproc - Metastore service. - spark_history_dataproc_cluster: The Spark History Server - configuration for the workload. - main_python_file_uri: The HCFS URI of the main Python - file to use as the Spark driver. Must be a `.py` file. - python_file_uris: HCFS file URIs of Python files to - pass to the PySpark framework. Supported file types: `.py`, `.egg`, - and `.zip`. - jar_file_uris: HCFS URIs of jar files to add to the - classpath of the Spark driver and tasks. - file_uris: HCFS URIs of files to be placed in the - working directory of each executor. - archive_uris: HCFS URIs of archives to be extracted - into the working directory of each executor. Supported file types: - `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. - args: The arguments to pass to the driver. Do not - include arguments that can be set as batch properties, such as - `--conf`, since a collision can occur that causes an incorrect batch - submission. + metastore_service: Resource name of an existing Dataproc Metastore service. + spark_history_dataproc_cluster: The Spark History Server configuration for the workload. + main_python_file_uri: The HCFS URI of the main Python file to use as the Spark driver. Must be a `.py` file. + python_file_uris: HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: `.py`, `.egg`, and `.zip`. + jar_file_uris: HCFS URIs of jar files to add to the classpath of the Spark driver and tasks. + file_uris: HCFS URIs of files to be placed in the working directory of each executor. + archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. + args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py index ab6a860969..7d705247d5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py @@ -50,55 +50,29 @@ def dataproc_create_spark_batch( """Create a Dataproc Spark batch workload and wait for it to finish. Args: - location: Location of the Dataproc batch workload. If - not set, defaults to `"us-central1"`. - batch_id: The ID to use for the batch, which will become - the final component of the batch's resource name. If none is - specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. - labels: The labels to associate with this batch. Label - keys must contain 1 to 63 characters, and must conform to RFC 1035. - Label values may be empty, but, if present, must contain 1 to 63 - characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of `"key": - value` pairs. - Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. - container_image: Optional custom container image for the - job runtime environment. If not specified, a default container image - will be used. + location: Location of the Dataproc batch workload. If not set, defaults to `"us-central1"`. + batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. + labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can be associated with a batch. An object containing a list of `"key": value` pairs. Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. + container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. runtime_config_version: Version of the batch runtime. runtime_config_properties: Runtime configuration for the workload. service_account: Service account that is used to execute the workload. - network_tags: Tags used for network traffic - control. + network_tags: Tags used for network traffic control. kms_key: The Cloud KMS key to use for encryption. network_uri: Network URI to connect workload to. subnetwork_uri: Subnetwork URI to connect workload to. - metastore_service: Resource name of an existing Dataproc - Metastore service. - spark_history_dataproc_cluster: The Spark History Server - configuration for the workload. - main_jar_file_uri: The HCFS URI of the jar file that - contains the main class. - main_class: The name of the driver main class. The jar - file that contains the class must be in the classpath or specified in - jar_file_uris. - jar_file_uris: HCFS URIs of jar files to add to the classpath of the Spark - driver and tasks. - file_uris: HCFS URIs of files to be placed in the working directory of - each executor. - archive_uris: HCFS URIs of archives to be extracted into the working - directory of each executor. Supported file types: - `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. - args: The arguments to pass to the driver. Do not - include arguments that can be set as batch properties, such as - `--conf`, since a collision can occur that causes an incorrect batch - submission. + metastore_service: Resource name of an existing Dataproc Metastore service. + spark_history_dataproc_cluster: The Spark History Server configuration for the workload. + main_jar_file_uri: The HCFS URI of the jar file that contains the main class. + main_class: The name of the driver main class. The jar file that contains the class must be in the classpath or specified in jar_file_uris. + jar_file_uris: HCFS URIs of jar files to add to the classpath of the Spark driver and tasks. + file_uris: HCFS URIs of files to be placed in the working directory of each executor. + archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. + args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py index 811ba5cc8e..8ed58fd66d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py @@ -48,22 +48,10 @@ def dataproc_create_spark_r_batch( """Create a Dataproc SparkR batch workload and wait for it to finish. Args: - location: Location of the Dataproc batch workload. If not set, defaults to - `"us-central1"`. - batch_id: The ID to use for the batch, which will become - the final component of the batch's resource name. If none is - specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. - labels: The labels to associate with this batch. Label - keys must contain 1 to 63 characters, and must conform to RFC 1035. - Label values may be empty, but, if present, must contain 1 to 63 - characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of `"key": - value` pairs. - Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. - container_image: Optional custom container image for the - job runtime environment. If not specified, a default container image - will be used. + location: Location of the Dataproc batch workload. If not set, defaults to `"us-central1"`. + batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. + labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can be associated with a batch. An object containing a list of `"key": value` pairs. Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. + container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. runtime_config_version: Version of the batch runtime. runtime_config_properties: Runtime configuration for the workload. service_account: Service account that is used to execute the workload. @@ -71,26 +59,16 @@ def dataproc_create_spark_r_batch( kms_key: The Cloud KMS key to use for encryption. network_uri: Network URI to connect workload to. subnetwork_uri: Subnetwork URI to connect workload to. - metastore_service: Resource name of an existing Dataproc Metastore - service. - spark_history_dataproc_cluster: The Spark History Server configuration for - the workload. - main_r_file_uri: The HCFS URI of the main R file to use as the driver. - Must be a `.R` or `.r` file. - file_uris: HCFS URIs of files to be placed in the working directory of - each executor. - archive_uris: HCFS URIs of archives to be extracted into the working - directory of each executor. Supported file types: - `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. - args: The arguments to pass to the driver. Do not - include arguments that can be set as batch properties, such as - `--conf`, since a collision can occur that causes an incorrect batch - submission. + metastore_service: Resource name of an existing Dataproc Metastore service. + spark_history_dataproc_cluster: The Spark History Server configuration for the workload. + main_r_file_uri: The HCFS URI of the main R file to use as the driver. Must be a `.R` or `.r` file. + file_uris: HCFS URIs of files to be placed in the working directory of each executor. + archive_uris: HCFS URIs of archives to be extracted into the working directory of each executor. Supported file types: `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. + args: The arguments to pass to the driver. Do not include arguments that can be set as batch properties, such as `--conf`, since a collision can occur that causes an incorrect batch submission. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py index 6a9120e024..76611cd64e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py @@ -46,22 +46,10 @@ def dataproc_create_spark_sql_batch( """Create a Dataproc Spark SQL batch workload and wait for it to finish. Args: - location: Location of the Dataproc batch workload. If - not set, defaults to `"us-central1"`. - batch_id: The ID to use for the batch, which will become - the final component of the batch's resource name. If none is - specified, a default name will be generated by the component. This - value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - labels: The labels to associate with this batch. Label - keys must contain 1 to 63 characters, and must conform to RFC 1035. - Label values may be empty, but, if present, must contain 1 to 63 - characters, and must conform to RFC 1035. No more than 32 labels can - be associated with a batch. An object containing a list of `"key": - value` pairs. - Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. - container_image: Optional custom container image for the - job runtime environment. If not specified, a default container image - will be used. + location: Location of the Dataproc batch workload. If not set, defaults to `"us-central1"`. + batch_id: The ID to use for the batch, which will become the final component of the batch's resource name. If none is specified, a default name will be generated by the component. This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. + labels: The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035. Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035. No more than 32 labels can be associated with a batch. An object containing a list of `"key": value` pairs. Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. + container_image: Optional custom container image for the job runtime environment. If not specified, a default container image will be used. runtime_config_version: Version of the batch runtime. runtime_config_properties: Runtime configuration for the workload. service_account: Service account that is used to execute the workload. @@ -69,23 +57,15 @@ def dataproc_create_spark_sql_batch( kms_key: The Cloud KMS key to use for encryption. network_uri: Network URI to connect workload to. subnetwork_uri: Subnetwork URI to connect workload to. - metastore_service: Resource name of an existing Dataproc Metastore - service. - spark_history_dataproc_cluster: The Spark History Server configuration for - the workload. - query_file_uri: The HCFS URI of the script that contains Spark SQL queries - to execute. - query_variables: Mapping of query variable names to values (equivalent to - the Spark SQL command: `SET name="value";`). An object containing a - list of `"key": value` pairs. - Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. - jar_file_uris: HCFS URIs of jar files to be added to the Spark - `CLASSPATH`. + metastore_service: Resource name of an existing Dataproc Metastore service. + spark_history_dataproc_cluster: The Spark History Server configuration for the workload. + query_file_uri: The HCFS URI of the script that contains Spark SQL queries to execute. + query_variables: Mapping of query variable names to values (equivalent to the Spark SQL command: `SET name="value";`). An object containing a list of `"key": value` pairs. Example: `{ "name": "wrench", "mass": "1.3kg", "count": "3" }`. + jar_file_uris: HCFS URIs of jar files to be added to the Spark `CLASSPATH`. project: Project to run the Dataproc batch workload. Defaults to the project in which the PipelineJob is run. Returns: - gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py index 852ad1bb81..5bb4c1a7c0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Manage datasets via [Vertex AI Datasets](https://cloud.google.com/vertex-ai/docs/training/using-managed-datasets).""" +# fmt: on from google_cloud_pipeline_components.v1.dataset.create_image_dataset.component import image_dataset_create as ImageDatasetCreateOp from google_cloud_pipeline_components.v1.dataset.create_tabular_dataset.component import tabular_dataset_create as TabularDatasetCreateOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py index fcce18eb33..b1bef2a3fa 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_image_dataset/component.py @@ -34,54 +34,16 @@ def image_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new image [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when - `source` and `import_schema_uri` are passed. + """Creates a new image [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when `source` and `import_schema_uri` are passed. Args: - display_name: The user-defined name of the Dataset. - The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, e.g. jsonl file. + display_name: The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, e.g. jsonl file. location: Optional location to retrieve Dataset from. - labels: Labels with user-defined metadata to organize your Tensorboards. - Label keys and values can be no longer than 64 characters - (Unicode codepoints), can only contain lowercase letters, numeric - characters, underscores and dashes. International characters are allowed. - No more than 64 user labels can be associated with one Tensorboard - (System labels are excluded). - See https://goo.gl/xmQnxf for more information and examples of labels. - System reserved label keys are prefixed with "aiplatform.googleapis.com/" - and are immutable. - encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the Dataset. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides `encryption_spec_key_name` set in `aiplatform.init`. + labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. + encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py index c5e901ba69..4dcc1e7513 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_tabular_dataset/component.py @@ -37,34 +37,12 @@ def tabular_dataset_create( """Creates a new tabular [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: - display_name: The user-defined name of the Dataset. - The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. + display_name: The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. bq_source: BigQuery URI to the input table. For example, "bq://project.dataset.table_name". location: Optional location to retrieve Dataset from. - labels: Labels with user-defined metadata to organize your Tensorboards. - Label keys and values can be no longer than 64 characters - (Unicode codepoints), can only contain lowercase letters, numeric - characters, underscores and dashes. International characters are allowed. - No more than 64 user labels can be associated with one Tensorboard - (System labels are excluded). - See https://goo.gl/xmQnxf for more information and examples of labels. - System reserved label keys are prefixed with "aiplatform.googleapis.com/" - and are immutable. - encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the Dataset. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides `encryption_spec_key_name` set in `aiplatform.init`. + labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. + encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py index a466396a28..4819d39b9e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_text_dataset/component.py @@ -35,54 +35,18 @@ def text_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new text [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when - `source` and `import_schema_uri` are passed. + """Creates a new text [Dataset](https://cloud.google.com/vertex- + ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports + data into Dataset when `source` and `import_schema_uri` are passed. Args: - display_name: The user-defined name of the Dataset. - The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, e.g. jsonl file. + display_name: The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, e.g. jsonl file. location: Optional location to retrieve Dataset from. - labels: Labels with user-defined metadata to organize your Tensorboards. - Label keys and values can be no longer than 64 characters - (Unicode codepoints), can only contain lowercase letters, numeric - characters, underscores and dashes. International characters are allowed. - No more than 64 user labels can be associated with one Tensorboard - (System labels are excluded). - See https://goo.gl/xmQnxf for more information and examples of labels. - System reserved label keys are prefixed with "aiplatform.googleapis.com/" - and are immutable. - encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the Dataset. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides `encryption_spec_key_name` set in `aiplatform.init`. + labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. + encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py index 2e93a41c15..1c8fb70d97 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_time_series_dataset/component.py @@ -37,34 +37,12 @@ def time_series_dataset_create( """Creates a new time series [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets). Args: - display_name: The user-defined name of the Dataset. - The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. + display_name: The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. bq_source: BigQuery URI to the input table. For example, bq://project.dataset.table_name". location: Optional location to retrieve Dataset from. - labels: Labels with user-defined metadata to organize your Tensorboards. - Label keys and values can be no longer than 64 characters - (Unicode codepoints), can only contain lowercase letters, numeric - characters, underscores and dashes. International characters are allowed. - No more than 64 user labels can be associated with one Tensorboard - (System labels are excluded). - See https://goo.gl/xmQnxf for more information and examples of labels. - System reserved label keys are prefixed with "aiplatform.googleapis.com/" - and are immutable. - encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the dataset. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides `encryption_spec_key_name` set in `aiplatform.init`. + labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. + encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py index 78c10227d4..4789d9aa8d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/create_video_dataset/component.py @@ -35,55 +35,16 @@ def video_dataset_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a new video [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when - `source` and `import_schema_uri` are passed. + """Creates a new video [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) and optionally imports data into Dataset when `source` and `import_schema_uri` are passed. Args: - display_name: The user-defined name of the Dataset. - The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema - Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, + display_name: The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, `"gs://bucket/file.csv"` or `["gs://bucket/file1.csv", "gs://bucket/file2.csv"]`. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, location: Optional location to retrieve Dataset from. - labels: Labels with user-defined metadata to organize your Tensorboards. - Label keys and values can be no longer than 64 characters - (Unicode codepoints), can only contain lowercase letters, numeric - characters, underscores and dashes. International characters are allowed. - No more than 64 user labels can be associated with one Tensorboard - (System labels are excluded). - See https://goo.gl/xmQnxf for more information and examples of labels. - System reserved label keys are prefixed with "aiplatform.googleapis.com/" - and are immutable. - encryption_spec_key_name: The Cloud KMS resource identifier of the customer - managed encryption key used to protect the Dataset. Has the - form: - `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute - resource is created. - If set, this Dataset and all sub-resources of this Dataset will be secured by this key. - Overrides `encryption_spec_key_name` set in `aiplatform.init`. + labels: Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. + encryption_spec_key_name: The Cloud KMS resource identifier of the customer managed encryption key used to protect the Dataset. Has the form: `projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides `encryption_spec_key_name` set in `aiplatform.init`. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py index f109013786..f948067f64 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_image_dataset/component.py @@ -34,18 +34,7 @@ def image_dataset_export( """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: - output_dir: The Google Cloud Storage location where the output is to - be written to. In the given directory a new directory will be - created with name: - `export-data--` - where timestamp is in YYYYMMDDHHMMSS format. All export - output will be written into that directory. Inside that - directory, annotations with the same schema will be grouped - into sub directories which are named with the corresponding - annotations' schema title. Inside these sub directories, a - schema.yaml will be created to describe the output format. - If the uri doesn't end with '/', a '/' will be automatically - appended. The directory is created if it doesn't exist. + output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped into sub directories which are named with the corresponding annotations' schema title. Inside these sub directories, a schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. location: Optional location to retrieve Dataset from. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py index 68ec1b5bcd..aa33a38150 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_tabular_dataset/component.py @@ -34,18 +34,7 @@ def tabular_dataset_export( """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: - output_dir: The Google Cloud Storage location where the output is to - be written to. In the given directory a new directory will be - created with name: - `export-data--` - where timestamp is in YYYYMMDDHHMMSS format. All export - output will be written into that directory. Inside that - directory, annotations with the same schema will be grouped - into sub directories which are named with the corresponding - annotations' schema title. Inside these sub directories, a - schema.yaml will be created to describe the output format. - If the uri doesn't end with '/', a '/' will be automatically - appended. The directory is created if it doesn't exist. + output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped into sub directories which are named with the corresponding annotations' schema title. Inside these sub directories, a schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. location: Optional location to retrieve Dataset from. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py index 0f78b4bbb8..e3a0e9956c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_text_dataset/component.py @@ -34,18 +34,7 @@ def text_dataset_export( """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: - output_dir: The Google Cloud Storage location where the output is to - be written to. In the given directory a new directory will be - created with name: - `export-data--` - where timestamp is in YYYYMMDDHHMMSS format. All export - output will be written into that directory. Inside that - directory, annotations with the same schema will be grouped - into sub directories which are named with the corresponding - annotations' schema title. Inside these sub directories, a - schema.yaml will be created to describe the output format. - If the uri doesn't end with '/', a '/' will be automatically - appended. The directory is created if it doesn't exist. + output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped into sub directories which are named with the corresponding annotations' schema title. Inside these sub directories, a schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. location: Optional location to retrieve Dataset from. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py index fd74cf451b..144770ddcb 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_time_series_dataset/component.py @@ -34,18 +34,7 @@ def time_series_dataset_export( """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: - output_dir: The Google Cloud Storage location where the output is to - be written to. In the given directory a new directory will be - created with name: - `export-data--` - where timestamp is in YYYYMMDDHHMMSS format. All export - output will be written into that directory. Inside that - directory, annotations with the same schema will be grouped - into sub directories which are named with the corresponding - annotations' schema title. Inside these sub directories, a - schema.yaml will be created to describe the output format. - If the uri doesn't end with '/', a '/' will be automatically - appended. The directory is created if it doesn't exist. + output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped into sub directories which are named with the corresponding annotations' schema title. Inside these sub directories, a schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. location: Optional location to retrieve Datasetfrom. project: Project to retrieve Datasetfrom. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py index abbd43daf3..4fb30c8e10 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/export_video_dataset/component.py @@ -34,18 +34,7 @@ def video_dataset_export( """Exports [Dataset](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.datasets) to a GCS output directory. Args: - output_dir: The Google Cloud Storage location where the output is to - be written to. In the given directory a new directory will be - created with name: - `export-data--` - where timestamp is in YYYYMMDDHHMMSS format. All export - output will be written into that directory. Inside that - directory, annotations with the same schema will be grouped - into sub directories which are named with the corresponding - annotations' schema title. Inside these sub directories, a - schema.yaml will be created to describe the output format. - If the uri doesn't end with '/', a '/' will be automatically - appended. The directory is created if it doesn't exist. + output_dir: The Google Cloud Storage location where the output is to be written to. In the given directory a new directory will be created with name: `export-data--` where timestamp is in YYYYMMDDHHMMSS format. All export output will be written into that directory. Inside that directory, annotations with the same schema will be grouped into sub directories which are named with the corresponding annotations' schema title. Inside these sub directories, a schema.yaml will be created to describe the output format. If the uri doesn't end with '/', a '/' will be automatically appended. The directory is created if it doesn't exist. location: Optional location to retrieve Dataset from. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py index 61dec5950f..cfe3c35715 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_image_dataset/component.py @@ -41,29 +41,9 @@ def image_dataset_import( Args: location: Optional location to retrieve Dataset from. dataset: The Dataset to be updated. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, e.g. jsonl file. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py index fe7ea37320..c43497437c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_text_dataset/component.py @@ -40,31 +40,9 @@ def text_dataset_import( Args: location: Optional location to retrieve Datasetfrom. dataset: The Datasetto be updated. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema - Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, - e.g. jsonl file. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py index fb6c275590..b6c5a83281 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataset/import_video_dataset/component.py @@ -41,31 +41,9 @@ def video_dataset_import( Args: location: Optional location to retrieve Dataset from. dataset: The Dataset to be updated. - gcs_source: - Google Cloud Storage URI(-s) to the - input file(s). May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. - For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. - import_schema_uri: Points to a YAML file stored on Google Cloud - Storage describing the import format. Validation will be - done against the schema. The schema is defined as an - [OpenAPI 3.0.2 Schema - Object](https://tinyurl.com/y538mdwt). - data_item_labels: Labels that will be applied to newly imported DataItems. If - an identical DataItem as one being imported already exists - in the Dataset, then these labels will be appended to these - of the already existing one, and if labels with identical - key is imported before, the old label value will be - overwritten. If two DataItems are identical in the same - import data operation, the labels will be combined and if - key collision happens in this case, one of the values will - be picked randomly. Two DataItems are considered identical - if their content bytes are identical (e.g. image bytes or - pdf bytes). These labels will be overridden by Annotation - labels specified inside index file refenced by - `import_schema_uri`, - e.g. jsonl file. + gcs_source: Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For example, "gs://bucket/file.csv" or ["gs://bucket/file1.csv", "gs://bucket/file2.csv"]. + import_schema_uri: Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an [OpenAPI 3.0.2 Schema Object](https://tinyurl.com/y538mdwt). + data_item_labels: Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by `import_schema_uri`, e.g. jsonl file. project: Project to retrieve Dataset from. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py index 93a41c6bf1..ad76f4ebac 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Manage model serving endpoints via [Vertex AI Endpoints](https://cloud.google.com/vertex-ai/docs/predictions/overview?_ga=2.161419069.-1686833729.1684288907#model_deployment).""" +# fmt: on from google_cloud_pipeline_components.v1.endpoint.create_endpoint.component import endpoint_create as EndpointCreateOp from google_cloud_pipeline_components.v1.endpoint.delete_endpoint.component import endpoint_delete as EndpointDeleteOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py index 7a827e4c4a..e25e24a996 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/create_endpoint/component.py @@ -37,38 +37,15 @@ def endpoint_create( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """[Creates](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) and waits for it to be ready. - - - See the [Endpoint create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) method for more information. + """[Creates](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) and waits for it to be ready. See the [Endpoint create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/create) method for more information. Args: - location: Location to create the Endpoint. If not set, - default to us-central1. - display_name: The user-defined name of the Endpoint. The - name can be up to 128 characters long and can be consist of any UTF-8 - characters. + location: Location to create the Endpoint. If not set, default to us-central1. + display_name: The user-defined name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description: The description of the Endpoint. - labels: The labels with user-defined metadata to - organize your Endpoints. Label keys and values can be no longer than - 64 characters (Unicode codepoints), can only contain lowercase - letters, numeric characters, underscores and dashes. International - characters are allowed. See https://goo.gl/xmQnxf for more - information and examples of labels. - encryption_spec_key_name: Customer-managed encryption - key spec for an Endpoint. If set, this Endpoint and all of this - Endoint's sub-resources will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource - is created. If set, this Endpoint and all sub-resources of this - Endpoint will be secured by this key. - network: The full name of the Google Compute Engine - network to which the Endpoint should be peered. Private services - access must already be configured for the network. If left - unspecified, the Endpoint is not peered with any network. - [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): - `projects/{project}/global/networks/{network}`. Where `{project}` is a - project number, as in `'12345'`, and `{network}` is network name. + labels: The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + encryption_spec_key_name: Customer-managed encryption key spec for an Endpoint. If set, this Endpoint and all of this Endoint's sub-resources will be secured by this key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. + network: The full name of the Google Compute Engine network to which the Endpoint should be peered. Private services access must already be configured for the network. If left unspecified, the Endpoint is not peered with any network. [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert): `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `'12345'`, and `{network}` is network name. project: Project to create the Endpoint. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py index ca05eea9a0..6ee491e180 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/delete_endpoint/component.py @@ -25,9 +25,7 @@ def endpoint_delete( gcp_resources: dsl.OutputPath(str), ): # fmt: off - """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). - - See the [Endpoint delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) method for more information. + """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) a Google Cloud Vertex [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). See the [Endpoint delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/delete) method for more information. Args: endpoint: The Endpoint to be deleted. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py index 773ff9fe20..5cc5fa78f2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/deploy_model/component.py @@ -45,92 +45,25 @@ def model_deploy( explanation_parameters: Dict[str, str] = {}, ): # fmt: off - """[Deploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) a Google Cloud Vertex Model to an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) creating a - [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within it. - - See the [deploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) method for more information. + """[Deploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) a Google Cloud Vertex Model to an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints) creating a [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within it. See the [deploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/deployModel) method for more information. Args: model: The model to be deployed. - endpoint: The Endpoint to be deployed - to. - deployed_model_display_name: The display name of the - DeployedModel. If not provided upon creation, the Model's display_name - is used. - traffic_split: - A map from a DeployedModel's - ID to the percentage of this Endpoint's traffic that should be - forwarded to that DeployedModel. If this field is non-empty, then the - Endpoint's trafficSplit will be overwritten with it. To refer to the - ID of the just being deployed Model, a "0" should be used, and the - actual ID of the new DeployedModel will be filled in its place by this - method. The traffic percentage values must add up to 100. If this - field is empty, then the Endpoint's trafficSplit is not updated. - dedicated_resources_machine_type: The specification of a - single machine used by the prediction. This field is required if - `automatic_resources_min_replica_count` is not specified. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#dedicatedresources). - dedicated_resources_accelerator_type: Hardware - accelerator type. Must also set accelerator_count if used. See [available options](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType). This field is required if - `dedicated_resources_machine_type` is specified. - dedicated_resources_accelerator_count: The number of - accelerators to attach to a worker replica. - dedicated_resources_min_replica_count: The minimum - number of machine replicas this DeployedModel will be always deployed - on. This value must be greater than or equal to 1. If traffic against - the DeployedModel increases, it may dynamically be deployed onto more - replicas, and as traffic decreases, some of these extra replicas may - be freed. - dedicated_resources_max_replica_count: The maximum - number of replicas this deployed model may the larger value of - min_replica_count or 1 will be used. If value provided is smaller than - min_replica_count, it will automatically be increased to be - min_replica_count. The maximum number of replicas this deployed model - may be deployed on when the traffic against it increases. If requested - value is too large, the deployment will error, but if deployment - succeeds then the ability to scale the model to that many replicas is - guaranteed (barring service outages). If traffic against the deployed - model increases beyond what its replicas at maximum may handle, a - portion of the traffic will be dropped. If this value is not provided, - will use `dedicated_resources_min_replica_count` as the default value. - automatic_resources_min_replica_count: The minimum - number of replicas this DeployedModel will be always deployed on. If - traffic against it increases, it may dynamically be deployed onto more - replicas up to `automatic_resources_max_replica_count`, and as traffic - decreases, some of these extra replicas may be freed. If the requested - value is too large, the deployment will error. This field is required - if `dedicated_resources_machine_type` is not specified. - automatic_resources_max_replica_count: The maximum - number of replicas this DeployedModel may be deployed on when the - traffic against it increases. If the requested value is too large, the - deployment will error, but if deployment succeeds then the ability to - scale the model to that many replicas is guaranteed (barring service - outages). If traffic against the DeployedModel increases beyond what - its replicas at maximum may handle, a portion of the traffic will be - dropped. If this value is not provided, a no upper bound for scaling - under heavy traffic will be assume, though Vertex AI may be unable to - scale beyond certain replica number. - service_account: The service account that the - DeployedModel's container runs as. Specify the email address of the - service account. If this service account is not specified, the - container runs as a service account that doesn't have access to the - resource project. Users deploying the Model must have the - `iam.serviceAccounts.actAs` permission on this service account. - disable_container_logging: For custom-trained Models - and AutoML Tabular Models, the container of the DeployedModel - instances will send stderr and stdout streams to Stackdriver Logging - by default. Please note that the logs incur cost, which are subject to - Cloud Logging pricing. User can disable container logging by setting - this flag to true. - enable_access_logging: These logs are like standard - server access logs, containing information like timestamp and latency - for each prediction request. Note that Stackdriver logs may incur a - cost, especially if your project receives prediction requests at a - high queries per second rate (QPS). Estimate your costs before - enabling this option. - explanation_metadata: Metadata describing the Model's - input and output for explanation. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). - explanation_parameters: Parameters that configure - explaining information of the Model's predictions. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). + endpoint: The Endpoint to be deployed to. + deployed_model_display_name: The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. + traffic_split: A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If this field is non-empty, then the Endpoint's trafficSplit will be overwritten with it. To refer to the ID of the just being deployed Model, a "0" should be used, and the actual ID of the new DeployedModel will be filled in its place by this method. The traffic percentage values must add up to 100. If this field is empty, then the Endpoint's trafficSplit is not updated. + dedicated_resources_machine_type: The specification of a single machine used by the prediction. This field is required if `automatic_resources_min_replica_count` is not specified. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#dedicatedresources). + dedicated_resources_accelerator_type: Hardware accelerator type. Must also set accelerator_count if used. See [available options](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType). This field is required if `dedicated_resources_machine_type` is specified. + dedicated_resources_accelerator_count: The number of accelerators to attach to a worker replica. + dedicated_resources_min_replica_count: The minimum number of machine replicas this DeployedModel will be always deployed on. This value must be greater than or equal to 1. If traffic against the DeployedModel increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. + dedicated_resources_max_replica_count: The maximum number of replicas this deployed model may the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, will use `dedicated_resources_min_replica_count` as the default value. + automatic_resources_min_replica_count: The minimum number of replicas this DeployedModel will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas up to `automatic_resources_max_replica_count`, and as traffic decreases, some of these extra replicas may be freed. If the requested value is too large, the deployment will error. This field is required if `dedicated_resources_machine_type` is not specified. + automatic_resources_max_replica_count: The maximum number of replicas this DeployedModel may be deployed on when the traffic against it increases. If the requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the DeployedModel increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, a no upper bound for scaling under heavy traffic will be assume, though Vertex AI may be unable to scale beyond certain replica number. + service_account: The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. + disable_container_logging: For custom-trained Models and AutoML Tabular Models, the container of the DeployedModel instances will send stderr and stdout streams to Stackdriver Logging by default. Please note that the logs incur cost, which are subject to Cloud Logging pricing. User can disable container logging by setting this flag to true. + enable_access_logging: These logs are like standard server access logs, containing information like timestamp and latency for each prediction request. Note that Stackdriver logs may incur a cost, especially if your project receives prediction requests at a high queries per second rate (QPS). Estimate your costs before enabling this option. + explanation_metadata: Metadata describing the Model's input and output for explanation. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). + explanation_parameters: Parameters that configure explaining information of the Model's predictions. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata). Returns: gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the deploy Model's long-running operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py index 1461a4fda3..6c2876060f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/endpoint/undeploy_model/component.py @@ -30,20 +30,12 @@ def model_undeploy( traffic_split: Dict[str, str] = {}, ): # fmt: off - """[Undeploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) a Google Cloud Vertex [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). - - See the [undeploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) method for more information. - + """[Undeploys](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) a Google Cloud Vertex [DeployedModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints#deployedmodel) within an [Endpoint](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints). See the [undeploy Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.endpoints/undeployModel) method for more information. Args: model: The model that was deployed to the Endpoint. endpoint: The Endpoint for the DeployedModel to be undeployed from. - traffic_split: - If this field is provided, then the Endpoint's trafficSplit will be overwritten with it. - If last DeployedModel is being undeployed from the Endpoint, the - [Endpoint.traffic_split] will always end up empty when this call returns. - A DeployedModel will be successfully undeployed only if it doesn't have any traffic - assigned to it when this method executes, or if this field unassigns any traffic to it. + traffic_split: If this field is provided, then the Endpoint's trafficSplit will be overwritten with it. If last DeployedModel is being undeployed from the Endpoint, the [Endpoint.traffic_split] will always end up empty when this call returns. A DeployedModel will be successfully undeployed only if it doesn't have any traffic assigned to it when this method executes, or if this field unassigns any traffic to it. Returns: gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the undeploy Model's long-running operation. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py index f7e39641ef..5ef59e4403 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Compose [tabular data forecasting](https://cloud.google.com/vertex-ai/docs/tabular-data/forecasting/overview) pipelines.""" +# fmt: on from google_cloud_pipeline_components.v1.forecasting.prepare_data_for_train.component import prepare_data_for_train as ForecastingPrepareDataForTrainOp from google_cloud_pipeline_components.v1.forecasting.preprocess.component import forecasting_preprocessing as ForecastingPreprocessingOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/prepare_data_for_train/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/prepare_data_for_train/component.py index b926b852fb..2651515ad0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/prepare_data_for_train/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/prepare_data_for_train/component.py @@ -48,31 +48,23 @@ def prepare_data_for_train( AutoMLForecastingTrainingJobRunOp. Args: - input_tables: Serialized Json array that specifies - input BigQuery tables and specs. - preprocess_metadata: The output of - ForecastingPreprocessingOp that is a serialized dictionary with 2 fields: - processed_bigquery_table_uri and column_metadata. - model_feature_columns: Serialized list of column names - that will be used as input feature in the training step. If None, all - columns will be used in training. + input_tables: Serialized Json array that specifies input BigQuery tables and specs. + preprocess_metadata: The output of ForecastingPreprocessingOp that is a serialized dictionary with 2 fields: processed_bigquery_table_uri and column_metadata. + model_feature_columns: Serialized list of column names that will be used as input feature in the training step. If None, all columns will be used in training. Returns: - NamedTuple: - time_series_identifier_column: Name of the column that identifies the time series. - time_series_attribute_columns: Serialized column names that should be used as attribute columns. - available_at_forecast_columns: Serialized column names of columns that are available at forecast. - unavailable_at_forecast_columns: Serialized column names of columns that are unavailable at forecast. - column_transformations: Serialized transformations to apply to the input columns. - preprocess_bq_uri: The BigQuery table that saves the preprocessing result and will be - used as training input. - target_column: The name of the column values of which the Model is to predict. - time_column: Name of the column that identifies time order in the time series. - predefined_split_column: Name of the column that specifies an ML use of the row. - weight_column: Name of the column that should be used as the weight column. - data_granularity_unit: The data granularity unit. - data_granularity_count: The number of data granularity units between data points in the - training data. + time_series_identifier_column: Name of the column that identifies the time series. + time_series_attribute_columns: Serialized column names that should be used as attribute columns. + available_at_forecast_columns: Serialized column names of columns that are available at forecast. + unavailable_at_forecast_columns: Serialized column names of columns that are unavailable at forecast. + column_transformations: Serialized transformations to apply to the input columns. + preprocess_bq_uri: The BigQuery table that saves the preprocessing result and will be used as training input. + target_column: The name of the column values of which the Model is to predict. + time_column: Name of the column that identifies time order in the time series. + predefined_split_column: Name of the column that specifies an ML use of the row. + weight_column: Name of the column that should be used as the weight column. + data_granularity_unit: The data granularity unit. + data_granularity_count: The number of data granularity units between data points in the training data. """ # fmt: on # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/preprocess/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/preprocess/component.py index 3dc7705a3e..8fc49f6e9a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/preprocess/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/forecasting/preprocess/component.py @@ -37,8 +37,7 @@ def forecasting_preprocessing( Args: project: The GCP project id that runs the pipeline. input_tables: Serialized Json array that specifies input BigQuery tables and specs. - preprocessing_bigquery_dataset: Optional BigQuery dataset to save the preprocessing result BigQuery table. - If not present, a new dataset will be created by the component. + preprocessing_bigquery_dataset: Optional BigQuery dataset to save the preprocessing result BigQuery table. If not present, a new dataset will be created by the component. location: Optional location for the BigQuery data, default is US. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py index 49fd217295..6b527c15b1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/__init__.py @@ -11,8 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Create [hyperparameter tuning jobs](https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning) via a [Vertex AI Custom Training Job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job).""" - +# fmt: on from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.component import hyperparameter_tuning_job as HyperparameterTuningJobRunOp from google_cloud_pipeline_components.v1.hyperparameter_tuning_job.utils import serialize_metrics diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py index 511bc5ccd6..34d0424239 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/component.py @@ -42,106 +42,42 @@ def hyperparameter_tuning_job( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """Creates a Vertex AI hyperparameter tuning job and waits for - it to complete. + """Creates a Vertex AI hyperparameter tuning job and waits for it to + complete. See [more information](https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning). Args: - display_name: The user-defined name of the - HyperparameterTuningJob. The name can be up to 128 characters long and - can be consist of any UTF-8 characters. - base_output_directory: The Cloud Storage location to - store the output of this HyperparameterTuningJob. The - base_output_directory of each child CustomJob backing a Trial is set - to a subdirectory with name as the trial id under its parent - HyperparameterTuningJob's `base_output_directory`. The following Vertex - AI environment variables will be passed to containers or Python - modules when this field is set: - * AIP_MODEL_DIR = `\/\/model\/` - * AIP_CHECKPOINT_DIR = `\/\/checkpoints\/` - * AIP_TENSORBOARD_LOG_DIR = `\/\/logs\/` - worker_pool_specs: The spec of the worker pools - including machine type and Docker image. All worker pools except the - first one are optional and can be skipped by providing an empty value. - study_spec_metrics: - List serialized from dictionary representing the metrics to optimize. The dictionary key is the metric_id, which is reported by your training job, and the - dictionary value is the optimization goal of the metric (`'minimize'` or - `'maximize'`). + display_name: The user-defined name of the HyperparameterTuningJob. The name can be up to 128 characters long and can be consist of any UTF-8 characters. + base_output_directory: The Cloud Storage location to store the output of this HyperparameterTuningJob. The base_output_directory of each child CustomJob backing a Trial is set to a subdirectory with name as the trial id under its parent HyperparameterTuningJob's `base_output_directory`. The following Vertex AI environment variables will be passed to containers or Python modules when this field is set: * AIP_MODEL_DIR = `\/\/model\/` * AIP_CHECKPOINT_DIR = `\/\/checkpoints\/` * AIP_TENSORBOARD_LOG_DIR = `\/\/logs\/` + worker_pool_specs: The spec of the worker pools including machine type and Docker image. All worker pools except the first one are optional and can be skipped by providing an empty value. + study_spec_metrics: List serialized from dictionary representing the metrics to optimize. The dictionary key is the metric_id, which is reported by your training job, and the dictionary value is the optimization goal of the metric (`'minimize'` or `'maximize'`). Example: - Example: - :: + metrics = hyperparameter_tuning_job.serialize_metrics({ 'loss': 'minimize', 'accuracy': 'maximize' }) - metrics = hyperparameter_tuning_job.serialize_metrics({ - 'loss': 'minimize', - 'accuracy': 'maximize' - }) + study_spec_parameters: List serialized from the parameter dictionary. The dictionary represents parameters to optimize. The dictionary key is the parameter_id, which is passed into your training job as a command line key word argument, and the dictionary value is the parameter specification of the metric. Example: - study_spec_parameters: List serialized from the - parameter dictionary. The dictionary represents parameters to - optimize. The dictionary key is the parameter_id, which is passed into - your training job as a command line key word argument, and the - dictionary value is the parameter specification of the metric. + from google.cloud.aiplatform import hyperparameter_tuning as hpt + from google_cloud_pipeline_components.v1 import hyperparameter_tuning_job + parameters = hyperparameter_tuning_job.serialize_parameters({ 'lr': hpt.DoubleParameterSpec(min=0.001, max=0.1, scale='log'), 'units': hpt.IntegerParameterSpec(min=4, max=128, scale='linear'), 'activation': hpt.CategoricalParameterSpec(values=['relu', 'selu']), 'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear') }) - :Example: - :: + Parameters specs should be subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. - from google.cloud.aiplatform import hyperparameter_tuning as hpt - from google_cloud_pipeline_components.v1 import hyperparameter_tuning_job - - parameters = hyperparameter_tuning_job.serialize_parameters({ - 'lr': hpt.DoubleParameterSpec(min=0.001, max=0.1, scale='log'), - 'units': hpt.IntegerParameterSpec(min=4, max=128, scale='linear'), - 'activation': hpt.CategoricalParameterSpec(values=['relu', 'selu']), - 'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear') - }) - - Parameters specs should be subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, - `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. max_trial_count: The desired total number of Trials. - parallel_trial_count: The desired number of Trials to - run in parallel. - max_failed_trial_count: The number of failed Trials that - need to be seen before failing the HyperparameterTuningJob. If set to - 0, Vertex AI decides how many Trials must fail before the whole job - fails. - location: Location to run the HyperparameterTuningJob - in, defaults to `'us-central1'`. - study_spec_algorithm: The search algorithm specified for - the Study. Accepts one of the following: - * `'ALGORITHM_UNSPECIFIED'` - If you do not specify an algorithm, your job uses the default Vertex AI algorithm. The default algorithm applies Bayesian optimization to arrive at the optimal solution with a more effective search over the parameter space. - * `'GRID_SEARCH'` - A simple grid search within the feasible space. This option is particularly useful if you want to specify a quantity of trials that is greater than the number of points in the feasible space. In such cases, if you do not specify a grid search, the Vertex AI default algorithm may generate duplicate suggestions. To use grid search, all parameter specs must be of type `IntegerParameterSpec`, `CategoricalParameterSpace`, or `DiscreteParameterSpec`. - * `'RANDOM_SEARCH'` - A simple random search within the feasible space. - study_spec_measurement_selection_type: This indicates - which measurement to use if/when the service automatically selects the - final measurement from previously reported intermediate measurements. - - Accepts: `'BEST_MEASUREMENT'` or `'LAST_MEASUREMENT'`. Choose this based on - two considerations: A) Do you expect your measurements to - monotonically improve? If so, choose `'LAST_MEASUREMENT'`. On the - other hand, if you're in a situation where your system can - "over-train" and you expect the performance to get better for a - while but then start declining, choose `'BEST_MEASUREMENT'`. B) Are - your measurements significantly noisy and/or irreproducible? If - so, `'BEST_MEASUREMENT'` will tend to be over-optimistic, and it may - be better to choose `'LAST_MEASUREMENT'`. If both or neither of (A) - and (B) apply, it doesn't matter which selection type is chosen. - encryption_spec_key_name: Customer-managed encryption - key options for a HyperparameterTuningJob. If this is set, then all - resources created by the HyperparameterTuningJob will be encrypted - with the provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource - is created. - service_account: Specifies the service account for - workload run-as account. Users submitting jobs must have act-as - permission on this run-as account. - network: The full name of the Compute Engine network to - which the job should be peered. For example, - `projects/12345/global/networks/myVPC`. Private services access must - already be configured for the network. If left unspecified, the job is - not peered with any network. + parallel_trial_count: The desired number of Trials to run in parallel. + max_failed_trial_count: The number of failed Trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides how many Trials must fail before the whole job fails. + location: Location to run the HyperparameterTuningJob in, defaults to `'us-central1'`. + study_spec_algorithm: The search algorithm specified for the Study. Accepts one of the following: + + * `'ALGORITHM_UNSPECIFIED'` - If you do not specify an algorithm, your job uses the default Vertex AI algorithm. The default algorithm applies Bayesian optimization to arrive at the optimal solution with a more effective search over the parameter space. + * `'GRID_SEARCH'` - A simple grid search within the feasible space. This option is particularly useful if you want to specify a quantity of trials that is greater than the number of points in the feasible space. In such cases, if you do not specify a grid search, the Vertex AI default algorithm may generate duplicate suggestions. To use grid search, all parameter specs must be of type `IntegerParameterSpec`, `CategoricalParameterSpace`, or `DiscreteParameterSpec`. + * `'RANDOM_SEARCH'` - A simple random search within the feasible space. + + study_spec_measurement_selection_type: This indicates which measurement to use if/when the service automatically selects the final measurement from previously reported intermediate measurements. Accepts: `'BEST_MEASUREMENT'` or `'LAST_MEASUREMENT'`. Choose this based on two considerations: A) Do you expect your measurements to monotonically improve? If so, choose `'LAST_MEASUREMENT'`. On the other hand, if you're in a situation where your system can "over-train" and you expect the performance to get better for a while but then start declining, choose `'BEST_MEASUREMENT'`. B) Are your measurements significantly noisy and/or irreproducible? If so, `'BEST_MEASUREMENT'` will tend to be over-optimistic, and it may be better to choose `'LAST_MEASUREMENT'`. If both or neither of (A) and (B) apply, it doesn't matter which selection type is chosen. + encryption_spec_key_name: Customer-managed encryption key options for a HyperparameterTuningJob. If this is set, then all resources created by the HyperparameterTuningJob will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + service_account: Specifies the service account for workload run-as account. Users submitting jobs must have act-as permission on this run-as account. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. project: Project to run the HyperparameterTuningJob in. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py index 8a503fcb22..fa1d171f74 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/hyperparameter_tuning_job/utils.py @@ -26,27 +26,13 @@ def serialize_parameters( """Utility for converting a hyperparameter tuning [ParameterSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/StudySpec#ParameterSpec) into a list of dictionaries. Args: - parameters (Dict[str, hyperparameter_tuning._ParameterSpec]): Dictionary - of parameter ids to subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, - `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. + parameters (Dict[str, hyperparameter_tuning._ParameterSpec]): Dictionary of parameter ids to subclasses of [_ParameterSpec](https://github.com/googleapis/python-aiplatform/blob/1fda4172baaf200414d95e7217bfef0e500cc16a/google/cloud/aiplatform/hyperparameter_tuning.py#L51). Supported subclasses include: `DoubleParameterSpec`, `IntegerParameterSpec`, `CategoricalParameterSpace`, `DiscreteParameterSpec`. Example: - :Example: - :: - - from google.cloud.aiplatform import hyperparameter_tuning as hpt - - parameters = { - 'decay': - hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'), - 'learning_rate': - hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'), - 'batch_size': - hpt.DiscreteParamterSpec( - values=[4, 8, 16, 32, 64, 128], scale='linear') - } + from google.cloud.aiplatform import hyperparameter_tuning as hpt + parameters = { 'decay': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'), 'learning_rate': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'), 'batch_size': hpt.DiscreteParamterSpec( values=[4, 8, 16, 32, 64, 128], scale='linear') } Returns: - List of `ParameterSpec` dictionaries. + List of `ParameterSpec` dictionaries. """ # fmt: on # the to_dict function is used here instead of the to_json function for compatibility with GAPIC @@ -63,18 +49,11 @@ def serialize_metrics(metric_spec: Dict[str, str]) -> List[Dict[str, Any]]: """Utility for converting a hyperparameter tuning [MetricSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/StudySpec#metricspec) into a list of dictionaries. Args: - metric_spec (Dict[str, str]): Dictionary representing metrics to - optimize. The dictionary key is the metric_id, which is reported by your - training job, and the dictionary value is the optimization goal of the - metric (`'minimize'` or `'maximize'`). + metric_spec (Dict[str, str]): Dictionary representing metrics to optimize. The dictionary key is the metric_id, which is reported by your training job, and the dictionary value is the optimization goal of the metric (`'minimize'` or `'maximize'`). Example: - :Example: - :: + metrics = {'loss': 'minimize', 'accuracy': 'maximize'} - metrics = {'loss': 'minimize', 'accuracy': 'maximize'} - - Returns: - List of `MetricSpec` dictionaries. + Returns: List of `MetricSpec` dictionaries. """ # fmt: on return [ diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py index 3aecbeb2c9..2295c68d53 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Manage models via [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction).""" +# fmt: on from google_cloud_pipeline_components.v1.model.delete_model.component import model_delete as ModelDeleteOp from google_cloud_pipeline_components.v1.model.export_model.component import model_export as ModelExportOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py index 5f4e98078d..157a14f06d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py @@ -21,11 +21,7 @@ @dsl.container_component def model_delete(model: Input[VertexModel], gcp_resources: dsl.OutputPath(str)): # fmt: off - """[Deletes](https://cloud.google.com/vertex- - ai/docs/reference/rest/v1/projects.locations.models/delete) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models). - - See the [Model delete](https://cloud.google.com/vertex- - ai/docs/reference/rest/v1/projects.locations.models/delete) method for more information. + """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models). See the [Model delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) method for more information. Args: model: The name of the Model resource to be deleted. Format: `projects/{project}/locations/{location}/models/{model}`. [More information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete#path-parameters). diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py index 1baa950b32..d184ef385a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/export_model/component.py @@ -33,46 +33,16 @@ def model_export( image_destination: str = '', ): # fmt: off - """[Exports](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) to a user-specified location. - - The Model must be exportable. A Model is considered to be exportable if it has at least one supported - export format. - - See the [Model export](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) method for more information. + """[Exports](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) to a user-specified location. The Model must be exportable. A Model is considered to be exportable if it has at least one supported export format. See the [Model export](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export) method for more information. Args: model: The Model to export. - export_format_id: The ID of the format in which the Model must be - exported. Each Model lists the export formats it supports. If no value - is provided here, then the first from the list of the Model's - supported formats is used by default. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) - artifact_destination: The Cloud Storage location where - the Model artifact is to be written to. Under the directory given as - the destination a new one with name - `"model-export--"`, - where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, will - be created. Inside, the Model and any of its supporting files will be - written. This field should only be set when, in - [Model.supported_export_formats], the value for the key given in - `export_format_id` contains `ARTIFACT`. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) - image_destination: The Google Container Registry or - Artifact Registry URI where the Model container image will be copied - to. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) - - Accepted forms: - - - Google Container Registry path. For example: `gcr.io/projectId/imageName:tag`. - - Artifact Registry path. - - For example: - - `us-central1-docker.pkg.dev/projectId/repoName/imageName:tag`. - - This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains `IMAGE`. + export_format_id: The ID of the format in which the Model must be exported. Each Model lists the export formats it supports. If no value is provided here, then the first from the list of the Model's supported formats is used by default. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) + artifact_destination: The Cloud Storage location where the Model artifact is to be written to. Under the directory given as the destination a new one with name `"model-export--"`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, will be created. Inside, the Model and any of its supporting files will be written. This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains `ARTIFACT`. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) + image_destination: The Google Container Registry or Artifact Registry URI where the Model container image will be copied to. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/export#OutputConfig) Accepted forms: - Google Container Registry path. For example: `gcr.io/projectId/imageName:tag`. - Artifact Registry path. For example: `us-central1-docker.pkg.dev/projectId/repoName/imageName:tag`. This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains `IMAGE`. Returns: - output_info: Details of the completed export with output destination paths to - the artifacts or container image. + output_info: Details of the completed export with output destination paths to the artifacts or container image. gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the export Model's long-running operation. """ # fmt: on diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py index 030a47dc9d..5ab4a29911 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py @@ -44,52 +44,24 @@ def model_upload( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """[Uploads](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) and returns a Model artifact representing the uploaded Model - resource. - - See [Model upload](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) method for more information. + """[Uploads](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) and returns a Model artifact representing the uploaded Model resource. See [Model upload](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) method for more information. Args: - location: Optional location to upload this Model to. If - not set, defaults to `us-central1`. - display_name: The display name of the Model. The name - can be up to 128 characters long and can be consist of any UTF-8 - characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) + location: Optional location to upload this Model to. If not set, defaults to `us-central1`. + display_name: The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) parent_model: An artifact of a model which to upload a new version to. Only specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) - unmanaged_container_model: The unmanaged container model to be uploaded. The Model can be passed from an upstream step or imported via a KFP `dsl.importer`. - - :Examples: - :: + unmanaged_container_model: The unmanaged container model to be uploaded. The Model can be passed from an upstream step or imported via a KFP `dsl.importer`. Example: - from kfp import dsl - from google_cloud_pipeline_components.types import artifact_types + from kfp import dsl + from google_cloud_pipeline_components.types import artifact_types - importer_spec = dsl.importer( - artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model', - artifact_class=artifact_types.UnmanagedContainerModel, - metadata={ - 'containerSpec': { 'imageUri': - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod' - } - }) + importer_spec = dsl.importer( artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model', artifact_class=artifact_types.UnmanagedContainerModel, metadata={ 'containerSpec': { 'imageUri': 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod' } }) - explanation_metadata: Metadata describing the Model's - input and output for explanation. Both `explanation_metadata` and `explanation_parameters` must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata) - explanation_parameters: Parameters to configure - explaining for Model's predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters) - encryption_spec_key_name: Customer-managed encryption - key spec for a Model. If set, this Model and all sub-resources of this - Model will be secured by this key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource - is created. - labels: The labels with user-defined metadata to - organize your model. Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only contain lowercase letters, - numeric characters, underscores and dashes. International characters - are allowed. See https://goo.gl/xmQnxf for more information and - examples of labels. + explanation_metadata: Metadata describing the Model's input and output for explanation. Both `explanation_metadata` and `explanation_parameters` must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata) + explanation_parameters: Parameters to configure explaining for Model's predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters) + encryption_spec_key_name: Customer-managed encryption key spec for a Model. If set, this Model and all sub-resources of this Model will be secured by this key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + labels: The labels with user-defined metadata to organize your model. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. project: Project to upload this Model to. Defaults to the project in which the PipelineJob is run. Returns: diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/__init__.py index 7d75f68480..78d839098f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Model evaluation pipelines.""" from google_cloud_pipeline_components.v1.model_evaluation.classification_component import model_evaluation_classification as ModelEvaluationClassificationOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py index 5e424a9689..cbdef55e13 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/classification_component.py @@ -64,114 +64,40 @@ def model_evaluation_classification( Args: location: Location for running the evaluation. - predictions_format: The file format for the batch - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - formats, from Vertex Batch Prediction. - predictions_gcs_source: An artifact with its - URI pointing toward a GCS directory with prediction or explanation files - to be used for this evaluation. For prediction results, the files should - be named "prediction.results-*" or "predictions_". For explanation - results, the files should be named "explanation.results-*". - predictions_bigquery_source: BigQuery table - with prediction or explanation data to be used for this evaluation. For - prediction results, the table column should be named "predicted_*". - ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. `jsonl`, - `csv`, and `bigquery` are the allowed formats. - ground_truth_gcs_source: Required for custom - tabular and non tabular data. The GCS URIs representing where the ground - truth is located. Used to provide ground truth for each prediction - instance when they are not part of the batch prediction jobs prediction - instance. - ground_truth_bigquery_source: Required for custom tabular. - The BigQuery table URI representing where the ground truth is located. - Used to provide ground truth for each prediction instance when they are - not part of the batch prediction jobs prediction instance. - classification_type: The type of classification problem, - either `multiclass` or `multilabel`. - class_labels: The list of class names for the - target_field_name, in the same order they appear in the batch - predictions jobs predictions output file. For instance, if the values of - target_field_name could be either `1` or `0`, and the predictions output - contains ["1", "0"] for the prediction_label_column, then the - class_labels input will be ["1", "0"]. If not set, defaults to the - classes found in the prediction_label_column in the batch prediction - jobs predictions file. - target_field_name: The full name path of the features target field - in the predictions file. Formatted to be able to find nested columns, - delimited by `.`. Alternatively referred to as the ground truth (or - ground_truth_column) field. - model: The Vertex model used for evaluation. Must be located in the same - region as the location argument. It is used to set the default - configurations for AutoML and custom-trained models. - prediction_score_column: The column name of the field - containing batch prediction scores. Formatted to be able to find nested - columns, delimited by `.`. - prediction_label_column: The column name of the field - containing classes the model is scoring. Formatted to be able to find - nested columns, delimited by `.`. - slicing_specs: List of - `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When - provided, compute metrics for each defined slice. See sample code in - https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component - Below is an example of how to format this input. + predictions_format: The file format for the batch prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. + predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named "prediction.results-*" or "predictions_". For explanation results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". + ground_truth_format: Required for custom tabular and non tabular data. The file format for the ground truth files. `jsonl`, `csv`, and `bigquery` are the allowed formats. + ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + ground_truth_bigquery_source: Required for custom tabular. The BigQuery table URI representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + classification_type: The type of classification problem, either `multiclass` or `multilabel`. + class_labels: The list of class names for the target_field_name, in the same order they appear in the batch predictions jobs predictions output file. For instance, if the values of target_field_name could be either `1` or `0`, and the predictions output contains ["1", "0"] for the prediction_label_column, then the class_labels input will be ["1", "0"]. If not set, defaults to the classes found in the prediction_label_column in the batch prediction jobs predictions file. + target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. + model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. + prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component Below is an example of how to format this input. - 1: First, create a SlicingSpec. - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` - - `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` - - `slicing_spec = SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value='label_a'))})` - 2: Create a list to store the slicing specs into. - `slicing_specs = []` - 3: Format each SlicingSpec into a JSON or Dict. - `slicing_spec_json = json_format.MessageToJson(slicing_spec)` - or - `slicing_spec_dict = json_format.MessageToDict(slicing_spec)` - 4: Combine each slicing_spec JSON into a list. - `slicing_specs.append(slicing_spec_json)` - 5: Finally, pass slicing_specs as an parameter for this component. - `ModelEvaluationClassificationOp(slicing_specs=slicing_specs)` - For more details on configuring slices, see - https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice - positive_classes: The list of class - names to create binary classification metrics based on one-vs-rest for - each value of positive_classes provided. - dataflow_service_account: Service account to run - the Dataflow job. If not set, Dataflow will use the default worker - service account. For more details, see - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account - dataflow_disk_size_gb: The disk size (in GB) of the machine - executing the evaluation run. - dataflow_machine_type: The machine type executing the - evaluation run. - dataflow_workers_num: The number of workers executing the - evaluation run. - dataflow_max_workers_num: The max number of workers - executing the evaluation run. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - encryption_spec_key_name: Customer-managed encryption key options. - If set, resources created by this pipeline will be encrypted with the - provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` - and `Dataflow`. + 1: First, create a SlicingSpec. `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice import SliceSpec` `from google.cloud.aiplatform_v1.types.ModelEvaluationSlice.Slice.SliceSpec import SliceConfig` `slicing_spec = SliceSpec(configs={ 'feature_a': SliceConfig(SliceSpec.Value(string_value='label_a'))})` + 2: Create a list to store the slicing specs into. `slicing_specs = []` + 3: Format each SlicingSpec into a JSON or Dict. `slicing_spec_json = json_format.MessageToJson(slicing_spec)` or `slicing_spec_dict = json_format.MessageToDict(slicing_spec)` + 4: Combine each slicing_spec JSON into a list. `slicing_specs.append(slicing_spec_json)` + 5: Finally, pass slicing_specs as an parameter for this component. `ModelEvaluationClassificationOp(slicing_specs=slicing_specs)` For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice + positive_classes: The list of class names to create binary classification metrics based on one-vs-rest for each value of positive_classes provided. + dataflow_service_account: Service account to run the Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. + dataflow_machine_type: The machine type executing the evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: - evaluation_metrics: - `google.ClassificationMetrics` representing the classification - evaluation metrics in GCS. - gcp_resources: Serialized gcp_resources proto tracking the Dataflow - job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + evaluation_metrics: `google.ClassificationMetrics` representing the classification evaluation metrics in GCS. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index c1c0797f7e..9b6cba7866 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -56,6 +56,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): + # fmt: off """The evaluation vision error analysis pipeline. This pipeline can help you to continuously discover dataset example errors @@ -65,101 +66,32 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction, in the format of - `projects/{project}/locations/{location}/models/{model}` or - `projects/{project}/locations/{location}/models/{model}@{model_version_id - or model_version_alias}` - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - test_dataset_resource_name: A Vertex dataset resource name of the test - dataset. If `test_dataset_storage_source_uris` is also provided, this - argument will override the GCS source. - test_dataset_annotation_set_name: A string of the annotation_set resource - name containing the ground truth of the test datset used for evaluation. - training_dataset_resource_name: A Vertex dataset resource name of the - training dataset. If `training_dataset_storage_source_uris` is also - provided, this argument will override the GCS source. - training_dataset_annotation_set_name: A string of the annotation_set - resource name containing the ground truth of the test datset used for - feature extraction. - test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged - test datasets.`jsonl` is currently the only allowed format. If - `test_dataset` is also provided, this field will be overridden by the - provided Vertex Dataset. - training_dataset_storage_source_uris: Google Cloud Storage URI(-s) to - unmanaged test datasets.`jsonl` is currently the only allowed format. If - `training_dataset` is also provided, this field will be overridden by the - provided Vertex Dataset. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. + model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of `projects/{project}/locations/{location}/models/{model}` or `projects/{project}/locations/{location}/models/{model}@{model_version_id or model_version_alias}` + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + test_dataset_resource_name: A Vertex dataset resource name of the test dataset. If `test_dataset_storage_source_uris` is also provided, this argument will override the GCS source. + test_dataset_annotation_set_name: A string of the annotation_set resource name containing the ground truth of the test datset used for evaluation. + training_dataset_resource_name: A Vertex dataset resource name of the training dataset. If `training_dataset_storage_source_uris` is also provided, this argument will override the GCS source. + training_dataset_annotation_set_name: A string of the annotation_set resource name containing the ground truth of the test datset used for feature extraction. + test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged test datasets.`jsonl` is currently the only allowed format. If `test_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. + training_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged test datasets.`jsonl` is currently the only allowed format. If `training_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: The disk size (in GB) of the machine executing the - evaluation run. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ + # fmt: on evaluation_display_name = 'automl-vision-error-analysis-pipeline' with dsl.Condition( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index 12bff9008f..4d33383c86 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -51,95 +51,34 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): + # fmt: off """The evaluation evaluated annotation pipeline. Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction, in the format of - `projects/{project}/locations/{location}/models/{model}` or - `projects/{project}/locations/{location}/models/{model}@{model_version_id - or model_version_alias}` - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - test_dataset_resource_name: A Vertex dataset resource name of the test - dataset. If `test_dataset_storage_source_uris` is also provided, this - argument will override the GCS source. - test_dataset_annotation_set_name: A string of the annotation_set name - containing the ground truth of the test datset used for evaluation. - test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged - test datasets.`jsonl` is currently the only allowed format. If - `test_dataset` is also provided, this field will be overridden by the - provided Vertex Dataset. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. + model_name: The Vertex model resource name to be imported and used for batch prediction, in the format of `projects/{project}/locations/{location}/models/{model}` or `projects/{project}/locations/{location}/models/{model}@{model_version_id or model_version_alias}` + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + test_dataset_resource_name: A Vertex dataset resource name of the test dataset. If `test_dataset_storage_source_uris` is also provided, this argument will override the GCS source. + test_dataset_annotation_set_name: A string of the annotation_set name containing the ground truth of the test datset used for evaluation. + test_dataset_storage_source_uris: Google Cloud Storage URI(-s) to unmanaged test datasets.`jsonl` is currently the only allowed format. If `test_dataset` is also provided, this field will be overridden by the provided Vertex Dataset. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: The disk size (in GB) of the machine executing the - evaluation run. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ + # fmt: off evaluation_display_name = 'automl-vision-evaluated-annotation-pipeline' get_test_dataset_task = GetVertexDatasetOp( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index f8a2e748e1..174c80b346 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -62,145 +62,45 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py evaluation_metrics=ClassificationMetrics, evaluation_resource_name=str, ): + # fmt: off """The evaluation AutoML tabular pipeline with feature attribution for classification models. - This pipeline guarantees support for AutoML Tabular models that contain a - valid explanation_spec. This pipeline does not include the - target_field_data_remover component, which is needed for many tabular custom - models. + This pipeline guarantees support for AutoML Tabular models that contain a valid explanation_spec. This pipeline does not include the target_field_data_remover component, which is needed for many tabular custom models. Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - slicing_specs: List of - `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When - provided, compute metrics for each defined slice. See sample code in - https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component - For more details on configuring slices, see - https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. Returns: - A google.ClassificationMetrics artifact. + A google.ClassificationMetrics artifact. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -340,138 +240,43 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint evaluation_metrics=RegressionMetrics, evaluation_resource_name=str, ): + # fmt: off """The evaluation AutoML tabular pipeline with feature attribution for regression models. - This pipeline guarantees support for AutoML Tabular models that contain a - valid explanation_spec. This pipeline does not include the - target_field_data_remover component, which is needed for many tabular custom - models. + This pipeline guarantees support for AutoML Tabular models that contain a valid explanation_spec. This pipeline does not include the target_field_data_remover component, which is needed for many tabular custom models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. - Returns: - A google.RegressionMetrics artifact. + Returns: A google.RegressionMetrics artifact. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=RegressionMetrics, @@ -606,6 +411,7 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d encryption_spec_key_name: str = '', force_runner_mode: str = '', ): + # fmt: off """The evaluation AutoML tabular pipeline with feature attribution. This pipeline guarantees support for AutoML Tabular classification and @@ -616,132 +422,34 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - slicing_specs: List of - `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When - provided, compute metrics for each defined slice. See sample code in - https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component - For more details on configuring slices, see - https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + prediction_type: The type of prediction the model is to produce. "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ + # fmt: on with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index 77b39a5780..027d72c14b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -56,7 +56,10 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger evaluation_metrics=ClassificationMetrics, evaluation_resource_name=str, ): - """The evaluation AutoML tabular pipeline with no feature attribution for classification models. + # fmt: off + """The evaluation AutoML tabular pipeline with no feature attribution for. + + classification models. This pipeline guarantees support for AutoML Tabular models. This pipeline does not include the target_field_data_remover component, which is needed for many @@ -64,118 +67,34 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - slicing_specs: List of - `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When - provided, compute metrics for each defined slice. See sample code in - https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component - For more details on configuring slices, see - https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. Returns: - A google.ClassificationMetrics artifact and imported - evaluation_resource_name. + A google.ClassificationMetrics artifact and imported evaluation_resource_name. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -277,119 +196,40 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- evaluation_metrics=RegressionMetrics, evaluation_resource_name=str, ): + # fmt: off """The evaluation AutoML tabular pipeline with no feature attribution for regression models. - This pipeline guarantees support for AutoML Tabular models. This pipeline does - not include the target_field_data_remover component, which is needed for many - tabular custom models. + This pipeline guarantees support for AutoML Tabular models. This pipeline does not include the target_field_data_remover component, which is needed for many tabular custom models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: - A google.RegressionMetrics artifact and imported - evaluation_resource_name. + A google.RegressionMetrics artifact and imported evaluation_resource_name. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=RegressionMetrics, @@ -488,124 +328,39 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val encryption_spec_key_name: str = '', force_runner_mode: str = '', ): + # fmt: off """The evaluation AutoML tabular pipeline with no feature attribution. - This pipeline guarantees support for AutoML Tabular classification and - regression models. This pipeline does not include the - target_field_data_remover component, which is needed for many tabular custom - models and AutoML Tabular Forecasting. + This pipeline guarantees support for AutoML Tabular classification and regression models. This pipeline does not include the target_field_data_remover component, which is needed for many tabular custom models and AutoML Tabular Forecasting. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances to run batch prediction on. May contain wildcards. For more - information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - slicing_specs: List of - `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When - provided, compute metrics for each defined slice. See sample code in - https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component - For more details on configuring slices, see - https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. + prediction_type: The type of prediction the model is to produce. "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances to run batch prediction on. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ + # fmt: on with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index a35026914b..59fa225b10 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -60,133 +60,42 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab evaluation_metrics=ClassificationMetrics, evaluation_resource_name=str, ): + # fmt: off """The evaluation pipeline with ground truth and no feature attribution for classification models. - This pipeline is used for all classification unstructured AutoML models, - including Text, Video, Image and Custom models. + This pipeline is used for all classification unstructured AutoML models, including Text, Video, Image and Custom models. Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. Formatted like - projects/{project}/locations/{location}/models/{model} or - projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_label_column: The column name of the field containing - classes the model is scoring. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_class_labels: Required for classification prediction type. The - list of class names for the target_field_name, in the same order they - appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either `1` or `0`, then the class_labels input - will be ["1", "0"]. + model_name: The Vertex model resource name to be imported and used for batch prediction. Formatted like projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. - Returns: - A Tuple of google.ClassificationMetrics artifact and the imported - evaluation metrics resource name. + Returns: A Tuple of google.ClassificationMetrics artifact and the imported evaluation metrics resource name. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -314,123 +223,42 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d evaluation_metrics=RegressionMetrics, evaluation_resource_name=str, ): - """The evaluation pipeline with ground truth and no feature attribution for regression models. + # fmt: off + """The evaluation pipeline with ground truth and no feature attribution for. + + regression models. This pipeline is used for all custom tabular regression models. Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. Formatted like - projects/{project}/locations/{location}/models/{model} or - projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. + model_name: The Vertex model resource name to be imported and used for batch prediction. Formatted like projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. - Returns: - A Tuple of google.RegressionMetrics artifact and the imported evaluation - metrics resource name. + Returns: A Tuple of google.RegressionMetrics artifact and the imported evaluation metrics resource name. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=RegressionMetrics, @@ -555,6 +383,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de encryption_spec_key_name: str = '', force_runner_mode: str = '', ): + # fmt: off """The evaluation pipeline with ground truth and no feature attribution. This pipeline is used for all unstructured AutoML models, including Text, @@ -563,122 +392,33 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - model_name: The Vertex model resource name to be imported and used for batch - prediction. Formatted like - projects/{project}/locations/{location}/models/{model} or - projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_label_column: The column name of the field containing - classes the model is scoring. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_class_labels: Required for classification prediction type. The - list of class names for the target_field_name, in the same order they - appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either `1` or `0`, then the class_labels input - will be ["1", "0"]. + prediction_type: The type of prediction the model is to produce. "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch prediction. Formatted like projects/{project}/locations/{location}/models/{model} or projects/{project}/locations/{location}/models/{model}@{model_version_id_or_model_version_alias}. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ + # fmt: on with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 8eea0e9f32..3c0a630c6f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -63,149 +63,45 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d evaluation_metrics=ClassificationMetrics, evaluation_resource_name=str, ): + # fmt: off """The evaluation custom tabular pipeline with feature attribution for classification models. - This pipeline gives support for custom models that contain a - valid explanation_spec. This pipeline includes the target_field_data_remover - component, which is needed for many tabular custom models. + This pipeline gives support for custom models that contain a valid explanation_spec. This pipeline includes the target_field_data_remover component, which is needed for many tabular custom models. Args: location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_label_column: The column name of the field containing - classes the model is scoring. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_class_labels: Required for classification prediction type. The - list of class names for the target_field_name, in the same order they - appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either `1` or `0`, then the class_labels input - will be ["1", "0"]. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. - project: The GCP project that runs the pipeline components. Defaults to the - project in which the PipelineJob is run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. + project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. - Returns: - A google.ClassificationMetrics artifact. + Returns: A google.ClassificationMetrics artifact. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -373,7 +269,10 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange evaluation_metrics=RegressionMetrics, evaluation_resource_name=str, ): - """The evaluation custom tabular pipeline with feature attribution for regression models. + # fmt: off + """The evaluation custom tabular pipeline with feature attribution for. + + regression models. This pipeline gives support for custom models that contain a valid explanation_spec. This pipeline includes the target_field_data_remover @@ -382,131 +281,36 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: - A google.RegressionMetrics artifact. + A google.RegressionMetrics artifact. """ + # fmt: on outputs = NamedTuple( 'outputs', evaluation_metrics=RegressionMetrics, @@ -671,6 +475,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul encryption_spec_key_name: str = '', force_runner_mode: str = '', ): + # fmt: off """The evaluation custom tabular pipeline with feature attribution. This pipeline gives support for custom models that contain a @@ -680,138 +485,36 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul Args: project: The GCP project that runs the pipeline components. location: The GCP region that runs the pipeline components. - prediction_type: The type of prediction the model is to produce. - "classification" or "regression". - model_name: The Vertex model resource name to be imported and used for batch - prediction. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - batch_predict_instances_format: The format in which instances are given, - must be one of the Model's supportedInputStorageFormats. For more details - about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_gcs_destination_output_uri: The Google Cloud Storage location - of the directory where the output is to be written to. In the given - directory a new directory is created. Its name is - `prediction--`, where timestamp is in - YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - `predictions_0001.`, `predictions_0002.`, ..., - `predictions_N.` are created where `` depends on - chosen `predictions_format`, and N may equal 0001 and depends on the total - number of successfully predicted instances. If the Model has both - `instance` and `prediction` schemata defined then each such file contains - predictions as per the `predictions_format`. If prediction for any - instance failed (partially or completely), then an additional - `errors_0001.`, `errors_0002.`,..., - `errors_N.` files are created (N depends on total number of - failed predictions). These files contain the failed instances, as per - their schema, followed by an additional `error` field which as value has - `google.rpc.Status` containing only `code` and `message` fields. For more - details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your - instances data to run batch prediction on. The instances data should also - contain the ground truth (target) data, used for evaluation. May contain - wildcards. For more information on wildcards, see - https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For - more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to - run batch prediction on. May contain wildcards. For more details about - this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_predictions_format: The format in which Vertex AI gives the - predictions. Must be one of the Model's supportedOutputStorageFormats. For - more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_bigquery_destination_output_uri: The BigQuery project location - where the output is to be written to. In the given project a new dataset - is created with name `prediction__` - where is made BigQuery-dataset-name compatible (for example, most special - characters become underscores), and timestamp is in - YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two - tables will be created, `predictions`, and `errors`. If the Model has both - `instance` and `prediction` schemata defined then the tables have columns - as follows: The `predictions` table contains instances for which the - prediction succeeded, it has columns as per a concatenation of the Model's - instance and prediction schemata. The `errors` table contains rows for - which the prediction has failed, it has instance columns, as per the - instance schema, followed by a single "errors" column, which as values has - `google.rpc.Status` represented as a STRUCT, and containing only `code` - and `message`. For more details about this output config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. - batch_predict_machine_type: The type of machine for running batch prediction - on dedicated resources. If the Model supports DEDICATED_RESOURCES this - config may be provided (and the job will use these resources). If the - Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. - For more details about the BatchDedicatedResources, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. - For more details about the machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_starting_replica_count: The number of machine replicas used at - the start of the batch operation. If not set, Vertex AI decides starting - number, not greater than `max_replica_count`. Only used if `machine_type` - is set. - batch_predict_max_replica_count: The maximum number of machine replicas the - batch operation may be scaled to. Only used if `machine_type` is set. - batch_predict_explanation_metadata: Explanation metadata configuration for - this BatchPredictionJob. Can be specified only if `generate_explanation` - is set to `True`. This value overrides the value of - `Model.explanation_metadata`. All fields of `explanation_metadata` are - optional in the request. If a field of the `explanation_metadata` object - is not populated, the corresponding field of the - `Model.explanation_metadata` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. - batch_predict_explanation_parameters: Parameters to configure explaining for - Model's predictions. Can be specified only if `generate_explanation` is - set to `True`. This value overrides the value of - `Model.explanation_parameters`. All fields of `explanation_parameters` are - optional in the request. If a field of the `explanation_parameters` object - is not populated, the corresponding field of the - `Model.explanation_parameters` object is inherited. For more details, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. - batch_predict_explanation_data_sample_size: Desired size to downsample the - input dataset that will then be used for batch explanation. - batch_predict_accelerator_type: The type of accelerator(s) that may be - attached to the machine as per `batch_predict_accelerator_count`. Only - used if `batch_predict_machine_type` is set. For more details about the - machine spec, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec - batch_predict_accelerator_count: The number of accelerators to attach to the - `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is - set. - evaluation_prediction_label_column: The column name of the field containing - classes the model is scoring. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_prediction_score_column: The column name of the field containing - batch prediction scores. Formatted to be able to find nested columns, - delimited by `.`. - evaluation_class_labels: Required for classification prediction type. The - list of class names for the target_field_name, in the same order they - appear in a file in batch_predict_gcs_source_uris. For instance, if the - target_field_name could be either `1` or `0`, then the class_labels input - will be ["1", "0"]. + prediction_type: The type of prediction the model is to produce. "classification" or "regression". + model_name: The Vertex model resource name to be imported and used for batch prediction. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_gcs_destination_output_uri: The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files `predictions_0001.`, `predictions_0002.`, ..., `predictions_N.` are created where `` depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the `predictions_format`. If prediction for any instance failed (partially or completely), then an additional `errors_0001.`, `errors_0002.`,..., `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional `error` field which as value has `google.rpc.Status` containing only `code` and `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your instances data to run batch prediction on. The instances data should also contain the ground truth (target) data, used for evaluation. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to run batch prediction on. May contain wildcards. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_bigquery_destination_output_uri: The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, `predictions`, and `errors`. If the Model has both `instance` and `prediction` schemata defined then the tables have columns as follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has `google.rpc.Status` represented as a STRUCT, and containing only `code` and `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_machine_type: The type of machine for running batch prediction on dedicated resources. If the Model supports DEDICATED_RESOURCES this config may be provided (and the job will use these resources). If the Model doesn't support AUTOMATIC_RESOURCES, this config must be provided. For more details about the BatchDedicatedResources, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. + batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + batch_predict_explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. + batch_predict_explanation_parameters: Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters. + batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. + batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec + batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. + evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. dataflow_machine_type: The Dataflow machine type for evaluation components. - dataflow_max_num_workers: The max number of Dataflow workers for evaluation - components. - dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation - components. + dataflow_max_num_workers: The max number of Dataflow workers for evaluation components. + dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. dataflow_service_account: Custom service account to run Dataflow jobs. - dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty - the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow workers use public IP - addresses. - encryption_spec_key_name: Customer-managed encryption key options. If set, - resources created by this pipeline will be encrypted with the provided - encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Indicate the runner mode to use forcely. Valid options - are `Dataflow` and `DirectRunner`. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ + # fmt: on with kfp.dsl.Condition( prediction_type == 'classification', name='classification' ): diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py index 927ececbf4..ed133ce391 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/forecasting_component.py @@ -61,83 +61,33 @@ def model_evaluation_forecasting( Args: location: Location for running the evaluation. - predictions_format: The file format for the batch - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - formats, from Vertex Batch Prediction. - predictions_gcs_source: An artifact with its - URI pointing toward a GCS directory with prediction or explanation files - to be used for this evaluation. For prediction results, the files should - be named "prediction.results-*". For explanation results, the files - should be named "explanation.results-*". - predictions_bigquery_source: BigQuery table - with prediction or explanation data to be used for this evaluation. For - prediction results, the table column should be named "predicted_*". - ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. `jsonl`, - `csv`, and `bigquery` are the allowed formats. - ground_truth_gcs_source: Required for custom - tabular and non tabular data. The GCS URIs representing where the ground - truth is located. Used to provide ground truth for each prediction - instance when they are not part of the batch prediction jobs prediction - instance. - ground_truth_bigquery_source: Required for custom tabular. - The BigQuery table URI representing where the ground truth is located. - Used to provide ground truth for each prediction instance when they are - not part of the batch prediction jobs prediction instance. - forecasting_type: The forecasting type being addressed by - this evaluation run. `point` and `quantile` are the supported types. - forecasting_quantiles: Required for a - `quantile` forecasting_type. The list of quantiles in the same order - appeared in the quantile prediction score column. - point_evaluation_quantile: Required for a `quantile` - forecasting_type. A quantile in the list of forecasting_quantiles that - will be used for point evaluation metrics. - target_field_name: The full name path of the features target field - in the predictions file. Formatted to be able to find nested columns, - delimited by `.`. Alternatively referred to as the ground truth (or - ground_truth_column) field. - model: The Vertex model used for evaluation. Must be located in the same - region as the location argument. It is used to set the default - configurations for AutoML and custom-trained models. - prediction_score_column: The column name of the field - containing batch prediction scores. Formatted to be able to find nested - columns, delimited by `.`. - dataflow_service_account: Service account to run the - Dataflow job. If not set, Dataflow will use the default worker service - account. For more details, see - https://cloud.google.com/dataflow/docs/concepts/secURIty-and-permissions#default_worker_service_account - dataflow_disk_size_gb: The disk size (in GB) of the machine - executing the evaluation run. - dataflow_machine_type: The machine type executing the - evaluation run. - dataflow_workers_num: The number of workers executing the - evaluation run. - dataflow_max_workers_num: The max number of workers - executing the evaluation run. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - encryption_spec_key_name: Customer-managed encryption key options. - If set, resources created by this pipeline will be encrypted with the - provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` - and `Dataflow`. + predictions_format: The file format for the batch prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. + predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named "prediction.results-*". For explanation results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". + ground_truth_format: Required for custom tabular and non tabular data. The file format for the ground truth files. `jsonl`, `csv`, and `bigquery` are the allowed formats. + ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + ground_truth_bigquery_source: Required for custom tabular. The BigQuery table URI representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + forecasting_type: The forecasting type being addressed by this evaluation run. `point` and `quantile` are the supported types. + forecasting_quantiles: Required for a `quantile` forecasting_type. The list of quantiles in the same order appeared in the quantile prediction score column. + point_evaluation_quantile: Required for a `quantile` forecasting_type. A quantile in the list of forecasting_quantiles that will be used for point evaluation metrics. + target_field_name: The full name path of the features target field in the predictions file. Formatted to be able to find nested columns, delimited by `.`. Alternatively referred to as the ground truth (or ground_truth_column) field. + model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. + prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + dataflow_service_account: Service account to run the Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see https://cloud.google.com/dataflow/docs/concepts/secURIty-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. + dataflow_machine_type: The machine type executing the evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: - evaluation_metrics: - `google.ForecastingMetrics` representing the forecasting - evaluation metrics in GCS. - gcp_resources: Serialized gcp_resources proto tracking the Dataflow - job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + evaluation_metrics: `google.ForecastingMetrics` representing the forecasting evaluation metrics in GCS. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: off return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py index 48bdc63b36..e989181814 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/regression_component.py @@ -58,73 +58,29 @@ def model_evaluation_regression( Args: location: Location for running the evaluation. - predictions_format: The file format for the batch - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - formats, from Vertex Batch Prediction. - predictions_gcs_source: An artifact with its - URI pointing toward a GCS directory with prediction or explanation files - to be used for this evaluation. For prediction results, the files should - be named "prediction.results-*". For explanation results, the files - should be named "explanation.results-*". - predictions_bigquery_source: BigQuery table - with prediction or explanation data to be used for this evaluation. For - prediction results, the table column should be named "predicted_*". - ground_truth_format: Required for custom tabular and non - tabular data. The file format for the ground truth files. `jsonl`, - `csv`, and `bigquery` are the allowed formats. - ground_truth_gcs_source: Required for custom - tabular and non tabular data. The GCS URIs representing where the ground - truth is located. Used to provide ground truth for each prediction - instance when they are not part of the batch prediction jobs prediction - instance. - ground_truth_bigquery_source: Required for custom tabular. - The BigQuery table URI representing where the ground truth is located. - Used to provide ground truth for each prediction instance when they are - not part of the batch prediction jobs prediction instance. - target_field_name: The target field's name. Formatted to be able to find - nested columns, delimited by `.`. Prefixed with 'instance.' on the - component for Vertex Batch Prediction. - model: The Vertex model used for evaluation. Must be located in the same - region as the location argument. It is used to set the default - configurations for AutoML and custom-trained models. - prediction_score_column: The column name of the field - containing batch prediction scores. Formatted to be able to find nested - columns, delimited by `.`. - dataflow_service_account: Service account to run the - Dataflow job. If not set, Dataflow will use the default worker service - account. For more details, see - https://cloud.google.com/dataflow/docs/concepts/secURIty-and-permissions#default_worker_service_account - dataflow_disk_size_gb: The disk size (in GB) of the machine - executing the evaluation run. - dataflow_machine_type: The machine type executing the - evaluation run. - dataflow_workers_num: The number of workers executing the - evaluation run. - dataflow_max_workers_num: The max number of workers - executing the evaluation run. - dataflow_subnetwork: Dataflow's fully qualified subnetwork - name, when empty the default subnetwork will be used. More - details: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications - dataflow_use_public_ips: Specifies whether Dataflow - workers use public IP addresses. - encryption_spec_key_name: Customer-managed encryption key options. - If set, resources created by this pipeline will be encrypted with the - provided encryption key. Has the form: - `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. - The key needs to be in the same region as where the compute resource is - created. - force_runner_mode: Flag to choose Beam runner. Valid options are - `DirectRunner` and `Dataflow`. + predictions_format: The file format for the batch prediction results. `jsonl`, `csv`, and `bigquery` are the allowed formats, from Vertex Batch Prediction. + predictions_gcs_source: An artifact with its URI pointing toward a GCS directory with prediction or explanation files to be used for this evaluation. For prediction results, the files should be named "prediction.results-*". For explanation results, the files should be named "explanation.results-*". + predictions_bigquery_source: BigQuery table with prediction or explanation data to be used for this evaluation. For prediction results, the table column should be named "predicted_*". + ground_truth_format: Required for custom tabular and non tabular data. The file format for the ground truth files. `jsonl`, `csv`, and `bigquery` are the allowed formats. + ground_truth_gcs_source: Required for custom tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + ground_truth_bigquery_source: Required for custom tabular. The BigQuery table URI representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + target_field_name: The target field's name. Formatted to be able to find nested columns, delimited by `.`. Prefixed with 'instance.' on the component for Vertex Batch Prediction. + model: The Vertex model used for evaluation. Must be located in the same region as the location argument. It is used to set the default configurations for AutoML and custom-trained models. + prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. + dataflow_service_account: Service account to run the Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see https://cloud.google.com/dataflow/docs/concepts/secURIty-and-permissions#default_worker_service_account + dataflow_disk_size_gb: The disk size (in GB) of the machine executing the evaluation run. + dataflow_machine_type: The machine type executing the evaluation run. + dataflow_workers_num: The number of workers executing the evaluation run. + dataflow_max_workers_num: The max number of workers executing the evaluation run. + dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. More details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner` and `Dataflow`. project: Project to run evaluation container. Defaults to the project in which the PipelineJob is run. Returns: - evaluation_metrics: - `google.RegressionMetrics` representing the regression - evaluation metrics in GCS. - gcp_resources: Serialized gcp_resources proto tracking the Dataflow - job. For more details, see - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. + evaluation_metrics: `google.RegressionMetrics` representing the regression evaluation metrics in GCS. + gcp_resources: Serialized gcp_resources proto tracking the Dataflow job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on return dsl.ContainerSpec( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py index 34e78bafc2..6c8a35f2a9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/vertex_notification_email/component.py @@ -28,11 +28,7 @@ def vertex_pipelines_notification_email( # fmt: off """Send notification email(s) when an upstream task/DAG completes. - This component can only be used as an [ExitHandler](https://www.kubeflow.org/docs/components/pipelines/v2/pipelines/control-flow/#exit-handling-dslexithandler)'s exit task. Note that the [PipelineTaskFinalStatus](https://kubeflow-pipelines.readthedocs.io/en/latest/source/dsl.html#kfp.dsl.PipelineTaskFinalStatus) is provided automatically by Vertex Pipelines at runtime. You should not provide any input to this parameter when you instantiate this component as a task. - - This component works only on Vertex Pipelines. This component raises an exception when run on Kubeflow Pipelines. - - See a [usage example](https://cloud.google.com/vertex-ai/docs/pipelines/email-notifications). + This component can only be used as an [ExitHandler](https://www.kubeflow.org/docs/components/pipelines/v2/pipelines/control-flow/#exit-handling-dslexithandler)'s exit task. Note that the [PipelineTaskFinalStatus](https://kubeflow-pipelines.readthedocs.io/en/latest/source/dsl.html#kfp.dsl.PipelineTaskFinalStatus) is provided automatically by Vertex Pipelines at runtime. You should not provide any input to this parameter when you instantiate this component as a task. This component works only on Vertex Pipelines. This component raises an exception when run on Kubeflow Pipelines. See a [usage example](https://cloud.google.com/vertex-ai/docs/pipelines/email-notifications). Args: recipients: A list of email addresses to send a notification to. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/__init__.py index 851fcc2c24..6bc60df9ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/__init__.py @@ -11,7 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off """Wait on the completion of GCP resources spawned from an upstream pipeline component.""" +# fmt: on from google_cloud_pipeline_components.v1.wait_gcp_resources.component import wait_gcp_resources as WaitGcpResourcesOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py index 9e26933388..4a8647d8d3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/wait_gcp_resources/component.py @@ -24,31 +24,20 @@ def wait_gcp_resources( output__gcp_resources: OutputPath(str), ): # fmt: off - """Waits for the completion of one or more GCP resources by polling for completion statuses. + """Waits for the completion of one or more GCP resources by polling for + completion statuses. - Currently this component only supports waiting on a [DataflowJob](https://cloud.google.com/config-connector/docs/reference/resource-docs/dataflow/dataflowjob) resource. + Currently this component only supports waiting on a [DataflowJob](https://cloud.google.com/config-connector/docs/reference/resource-docs/dataflow/dataflowjob) resource. To use this component, first create a component that outputs a `gcp_resources` proto as JSON, then pass it to this component's `gcp_resources` parameter. See [details](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) on how to create a `gcp_resources` proto as a component output. - To use this component, first create a component that outputs a `gcp_resources` proto as JSON, then pass it to this component's `gcp_resources` parameter. - - See [details](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) on how to create a `gcp_resources` proto as a component output. - - Examples: - :: - - dataflow_python_op = gcpc.v1.dataflow.LaunchPythonOp( - python_file_path=... - ) - - dataflow_wait_op = WaitGcpResourcesOp( - gcp_resources=dataflow_python_op.outputs["gcp_resources"] - ) + ``` + dataflow_python_op = gcpc.v1.dataflow.LaunchPythonOp( python_file_path=... ) dataflow_wait_op = WaitGcpResourcesOp( gcp_resources=dataflow_python_op.outputs["gcp_resources"] ) + ``` Args: gcp_resources: Serialized JSON of `gcp_resources` proto, indicating the resource(s) this component should wait on. Returns: gcp_resources: The `gcp_resource`, including any relevant error information. - """ # fmt: on return dsl.ContainerSpec( From 659c54bbeb4348d366d443602d009c7893256255 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 14 Sep 2023 10:37:30 -0700 Subject: [PATCH 150/253] chore(sdk): release KFP SDK 2.2.0 (#9984) --- docs/conf.py | 4 ++-- sdk/RELEASE.md | 12 +++--------- sdk/python/kfp/__init__.py | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 56d6c769ae..faff716cd7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -134,9 +134,9 @@ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags { 'version': - 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.1.3/', + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.2.0/', 'title': - '2.1.3', + '2.2.0', 'aliases': ['stable'], }, { diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index bc2effc705..ac81cc00bf 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -2,7 +2,7 @@ ## Features -* Add support for `dsl.If`, `dsl.Elif`, and `dsl.Else` control flow context managers; deprecate `dsl.Condition` in favor of `dsl.If` [\#9894](https://github.com/kubeflow/pipelines/pull/9894) + ## Breaking changes @@ -11,18 +11,12 @@ ## Bug fixes and other changes ## Documentation updates -# 2.1.3 +# 2.2.0 ## Features +* Add support for `dsl.If`, `dsl.Elif`, and `dsl.Else` control flow context managers; deprecate `dsl.Condition` in favor of `dsl.If` [\#9894](https://github.com/kubeflow/pipelines/pull/9894) * Create "dependency-free" runtime package (only `typing_extensions` required) for Lightweight Python Components to reduce runtime dependency resolution errors [\#9710](https://github.com/kubeflow/pipelines/pull/9710), [\#9886](https://github.com/kubeflow/pipelines/pull/9886) -## Breaking changes - -## Deprecations - -## Bug fixes and other changes - -## Documentation updates # 2.0.1 ## Features diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index ce4bee8252..926df57d4a 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.1.3' +__version__ = '2.2.0' TYPE_CHECK = True From 8518e95efcdb2c78a4ae719af66b72caac082267 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 14 Sep 2023 10:54:33 -0700 Subject: [PATCH 151/253] feat(components): Implement helper functions for Jinja2-based Templated Custom Job Launcher PiperOrigin-RevId: 565417061 --- .../container/_implementation/llm/__init__.py | 14 ++ .../llm/templated_custom_job/__init__.py | 20 +++ .../llm/templated_custom_job/remote_runner.py | 129 ++++++++++++++++++ components/google-cloud/setup.py | 1 + 4 files changed, 164 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/__init__.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/__init__.py new file mode 100644 index 0000000000..7ef02c25d0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline LLM Components container code.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/__init__.py new file mode 100644 index 0000000000..f3293916fb --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Components - LLM Templated Custom Job Remote Launcher. + +Note: The `v1/custom_job/` launcher is the preferred Custom Job launcher +whenever possible, because this Jinja2-based templated launcher does not take +advantage of the Vertex Pipeline backend optimization and will thus launch a +Custom Job that runs a Custom Job. +""" diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py new file mode 100644 index 0000000000..d8b81f28cd --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py @@ -0,0 +1,129 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP remote runner for templated custom jobs based on the AI Platform SDK.""" + +import json +from typing import Any, Callable, Dict, List, Optional +import jinja2 +from jinja2 import sandbox + +# Note that type annotations need to match the python version in the GCPC docker +# image in addition to the internal python version. + + +def _json_escape_filter(value: str) -> str: + """A Jinja2 filter for JSON escaping.""" + return json.dumps(value)[1:-1] + + +def render_payload(payload_str: str, params: Dict[str, Any]) -> str: + """Renders a base64-encoded Custom Job payload in Jinja2 format.""" + env = sandbox.ImmutableSandboxedEnvironment( + autoescape=False, undefined=jinja2.StrictUndefined + ) + + # We add an additional `json_dumps` filter because the builtin filter + # `tojson`, which is implemented as `htmlsafe_json_dumps`, converts special + # symbols to `\u` format, e.g., `&` -> `\u0026`, `<` -> `\u003c`. + env.filters['json_dumps'] = json.dumps + env.filters['json_escape'] = _json_escape_filter + + template = env.from_string(payload_str) + return template.render(**params) + + +def convert_key_value_param_list( + param_list: Optional[List[List[str]]], + type_cast: Callable[[str], Any], + cmd_flag: str, +) -> Dict[str, Any]: + """Converts a list of (key, [value]) pairs to a dictionary. + + Args: + param_list: A list of (key, [value]) pairs of string type. + type_cast: A function to cast `value`, if exists, from string to a + specific type. + cmd_flag: The command-line flag for this list of parameters. This is used + to provide better error message when raising an exception. + + Returns: + A dictionary of the converted (key, value) pairs. + """ + params = {} + if not param_list: + return params + for key_value in param_list: + if 1 <= len(key_value) <= 2: + key = key_value[0] + if 1 == len(key_value): + params[key] = None + else: + try: + params[key] = type_cast(key_value[1]) + except json.JSONDecodeError as e: + raise ValueError( + f'Cannot decode value for [{key}]: {key_value[1]}' + ) from e + else: + raise ValueError( + f'{cmd_flag} can only take 1 or 2 params, but found {key_value}' + ) + return params + + +def convert_integer_params( + integer_params: Optional[List[List[str]]], +) -> Dict[str, Optional[int]]: + """Converts a list of (key, [integer]) pairs to a dictionary.""" + return convert_key_value_param_list( + param_list=integer_params, type_cast=int, cmd_flag='--set_integer' + ) + + +def convert_string_params( + string_params: Optional[List[List[str]]], +) -> Dict[str, Optional[str]]: + """Converts a list of (key, [string]) pairs to a dictionary.""" + return convert_key_value_param_list( + param_list=string_params, type_cast=str, cmd_flag='--set_string' + ) + + +def convert_float_params( + float_params: Optional[List[List[str]]], +) -> Dict[str, Optional[float]]: + """Converts a list of (key, [float]) pairs to a dictionary.""" + return convert_key_value_param_list( + param_list=float_params, type_cast=float, cmd_flag='--set_float' + ) + + +def convert_boolean_params( + boolean_params: Optional[List[List[str]]], +) -> Dict[str, Optional[bool]]: + """Converts a list of (key, [boolean]) pairs to a dictionary.""" + return convert_key_value_param_list( + param_list=boolean_params, + type_cast=lambda x: x.lower() in ['1', 'true', 'y', 'yes'], + cmd_flag='--set_boolean', + ) + + +def convert_json_params( + json_params: Optional[List[List[str]]], +) -> Dict[str, Any]: + """Converts a list of (key, [json objects]) pairs to a dictionary.""" + return convert_key_value_param_list( + param_list=json_params, type_cast=json.loads, cmd_flag='--set_json' + ) diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index c861843f8d..fa530e0cf2 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -84,6 +84,7 @@ "google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "kfp>=2.0.0b10,<=2.1.3", "google-cloud-aiplatform>=1.14.0,<2", + "Jinja2==3.1.2", ], project_urls={ "User Documentation": "https://cloud.google.com/vertex-ai/docs/pipelines/components-introduction", From fc1f12b7bd2f28390c838abcf3dd020723ad573a Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 14 Sep 2023 11:13:44 -0700 Subject: [PATCH 152/253] feat(components): add `persistent_resource_id` to preview GCPC custom job components/utils PiperOrigin-RevId: 565423232 --- components/google-cloud/RELEASE.md | 1 + .../docs/source/api/preview/custom_job.rst | 4 + .../docs/source/api/preview/index.rst | 1 + .../preview/custom_job/__init__.py | 29 ++ .../preview/custom_job/component.py | 108 +++++++ .../preview/custom_job/utils.py | 282 ++++++++++++++++++ 6 files changed, 425 insertions(+) create mode 100644 components/google-cloud/docs/source/api/preview/custom_job.rst create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 91c5d1e8ba..3b80a02f43 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,4 +1,5 @@ ## Upcoming release +* Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` * Add feature_selection_pipeline to preview.automl.tabular. diff --git a/components/google-cloud/docs/source/api/preview/custom_job.rst b/components/google-cloud/docs/source/api/preview/custom_job.rst new file mode 100644 index 0000000000..ed3a195457 --- /dev/null +++ b/components/google-cloud/docs/source/api/preview/custom_job.rst @@ -0,0 +1,4 @@ +Custom Job +========================== + +.. automodule:: preview.custom_job \ No newline at end of file diff --git a/components/google-cloud/docs/source/api/preview/index.rst b/components/google-cloud/docs/source/api/preview/index.rst index 1b5072b589..8074a6c6b8 100644 --- a/components/google-cloud/docs/source/api/preview/index.rst +++ b/components/google-cloud/docs/source/api/preview/index.rst @@ -5,6 +5,7 @@ Preview Components :maxdepth: 1 automl/index + custom_job dataflow llm model_evaluation \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py new file mode 100644 index 0000000000..ac8c9aeb78 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# fmt: off +"""Run KFP components as [Vertex AI Custom Training Jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) with +customized worker and cloud configurations. +""" +# fmt: on + +from google_cloud_pipeline_components.preview.custom_job.component import custom_training_job as CustomTrainingJobOp +from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_from_component +from google_cloud_pipeline_components.preview.custom_job.utils import create_custom_training_job_op_from_component + +__all__ = [ + 'CustomTrainingJobOp', + 'create_custom_training_job_op_from_component', + 'create_custom_training_job_from_component', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py new file mode 100644 index 0000000000..b155e39130 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/component.py @@ -0,0 +1,108 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List + +from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components import utils +from kfp import dsl + + +# keep consistent with create_custom_training_job_from_component +@dsl.container_component +def custom_training_job( + display_name: str, + gcp_resources: dsl.OutputPath(str), + location: str = 'us-central1', + worker_pool_specs: List[Dict[str, str]] = [], + timeout: str = '604800s', + restart_job_on_worker_restart: bool = False, + service_account: str = '', + tensorboard: str = '', + enable_web_access: bool = False, + network: str = '', + reserved_ip_ranges: List[str] = [], + base_output_directory: str = '', + labels: Dict[str, str] = {}, + encryption_spec_key_name: str = '', + persistent_resource_id: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, +): + # fmt: off + """Launch a Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + + Args: + location: Location for creating the custom training job. If not set, default to us-central1. + display_name: The name of the CustomJob. + worker_pool_specs: Serialized json spec of the worker pools including machine type and Docker image. All worker pools except the first one are optional and can be skipped by providing an empty value. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#WorkerPoolSpec). + timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". + restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. + service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. + reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). + encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. + persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) + project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. + Returns: + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. + """ + # fmt: on + return dsl.ContainerSpec( + image=_image.GCPC_IMAGE_TAG, + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.preview.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--payload', + utils.container_component_dumps({ + 'display_name': display_name, + 'job_spec': { + 'worker_pool_specs': worker_pool_specs, + 'scheduling': { + 'timeout': timeout, + 'restart_job_on_worker_restart': ( + restart_job_on_worker_restart + ), + }, + 'service_account': service_account, + 'tensorboard': tensorboard, + 'enable_web_access': enable_web_access, + 'network': network, + 'reserved_ip_ranges': reserved_ip_ranges, + 'base_output_directory': { + 'output_uri_prefix': base_output_directory + }, + 'persistent_resource_id': persistent_resource_id, + }, + 'labels': labels, + 'encryption_spec': {'kms_key_name': encryption_spec_key_name}, + }), + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + ], + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py new file mode 100644 index 0000000000..73849ed29a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py @@ -0,0 +1,282 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Module for supporting Google Vertex AI Custom Training Job Op.""" + +import copy +import textwrap +from typing import Callable, Dict, List, Optional +import warnings + +from google_cloud_pipeline_components.preview.custom_job import component +from kfp import components +import yaml + +from google.protobuf import json_format + + +def _replace_executor_placeholder( + container_input: List[str], +) -> List[str]: + """Replace executor placeholder in container command or args. + + Args: + container_input: Container command or args. + + Returns: container_input with executor placeholder replaced. + """ + # Executor replacement is used as executor content needs to be jsonified before + # injection into the payload, since payload is already a JSON serialized string. + EXECUTOR_INPUT_PLACEHOLDER = '{{$}}' + JSON_ESCAPED_EXECUTOR_INPUT_PLACEHOLDER = '{{$.json_escape[1]}}' + return [ + JSON_ESCAPED_EXECUTOR_INPUT_PLACEHOLDER + if cmd_part == EXECUTOR_INPUT_PLACEHOLDER + else cmd_part + for cmd_part in container_input + ] + + +# keep identical to CustomTrainingJobOp +def create_custom_training_job_from_component( + component_spec: Callable, + display_name: str = '', + replica_count: int = 1, + machine_type: str = 'n1-standard-4', + accelerator_type: str = '', + accelerator_count: int = 1, + boot_disk_type: str = 'pd-ssd', + boot_disk_size_gb: int = 100, + timeout: str = '604800s', + restart_job_on_worker_restart: bool = False, + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', + tensorboard: str = '', + enable_web_access: bool = False, + reserved_ip_ranges: Optional[List[str]] = None, + nfs_mounts: Optional[List[Dict[str, str]]] = None, + base_output_directory: str = '', + labels: Optional[Dict[str, str]] = None, + persistent_resource_id: str = '', +) -> Callable: + # fmt: off + """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. + + This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + + Args: + component_spec: A KFP component. + display_name: The name of the CustomJob. If not provided the component's name will be used instead. + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. + boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. + boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. + timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". + restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. + service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. + tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. + nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). + persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) + + Returns: + A KFP component with CustomJob specification applied. + """ + # fmt: on + # This function constructs a Custom Job component based on the input + # component, by performing a 3-way merge of the inputs/outputs of the + # input component, the Custom Job component and the arguments given to this + # function. + # + # It first retrieves the PipelineSpec (as a Python dict) for each of the two + # components (the input component and the Custom Job component). + # Note: The advantage of using the PipelineSpec here is that the + # placeholders are (mostly) serialized, so there is less processing + # needed (and avoids unnecessary dependency on KFP internals). + # + # The arguments to this function are first inserted into each input parameter + # of the Custom Job component as a default value (which will be used at + # runtime, unless when overridden by specifying the input). + # One particular input parameter that needs detailed construction is the + # worker_pool_spec, before being inserted into the Custom Job component. + # + # After inserting the arguments into the Custom Job input parameters as + # default values, the input/output parameters from the input component are + # then merged with the Custom Job input/output parameters. Preference is given + # to Custom Job input parameters to make sure they are not overridden (which + # follows the same logic as the original version). + # + # It is assumed that the Custom Job component itself has no input/output + # artifacts, so the artifacts from the input component needs no merging. + # (There is a unit test to make sure this is the case, otherwise merging of + # artifacts need to be done here.) + # + # Once the above is done, and the dict of the Custom Job is converted back + # into a KFP component (by first converting to YAML, then using + # load_component_from_text to load the YAML). + # After adding the appropriate description and the name, the new component + # is returned. + + cj_pipeline_spec = json_format.MessageToDict( + component.custom_training_job.pipeline_spec + ) + user_pipeline_spec = json_format.MessageToDict(component_spec.pipeline_spec) + + user_component_container = list( + user_pipeline_spec['deploymentSpec']['executors'].values() + )[0]['container'] + + worker_pool_spec = { + 'machine_spec': {'machine_type': machine_type}, + 'replica_count': 1, + 'container_spec': { + 'image_uri': user_component_container['image'], + 'command': _replace_executor_placeholder( + user_component_container.get('command', []) + ), + 'args': _replace_executor_placeholder( + user_component_container.get('args', []) + ), + }, + } + if accelerator_type: + worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type + worker_pool_spec['machine_spec']['accelerator_count'] = accelerator_count + if boot_disk_type: + worker_pool_spec['disk_spec'] = { + 'boot_disk_type': boot_disk_type, + 'boot_disk_size_gb': boot_disk_size_gb, + } + if nfs_mounts: + worker_pool_spec['nfs_mounts'] = nfs_mounts + + worker_pool_specs = [worker_pool_spec] + + if int(replica_count) > 1: + additional_worker_pool_spec = copy.deepcopy(worker_pool_spec) + additional_worker_pool_spec['replica_count'] = replica_count - 1 + worker_pool_specs.append(additional_worker_pool_spec) + + # get the component spec for both components + cj_component_spec_key = list(cj_pipeline_spec['components'].keys())[0] + cj_component_spec = cj_pipeline_spec['components'][cj_component_spec_key] + + user_component_spec_key = list(user_pipeline_spec['components'].keys())[0] + user_component_spec = user_pipeline_spec['components'][ + user_component_spec_key + ] + + # add custom job defaults based on user-provided args + custom_job_param_defaults = { + 'display_name': display_name or component_spec.component_spec.name, + 'worker_pool_specs': worker_pool_specs, + 'timeout': timeout, + 'restart_job_on_worker_restart': restart_job_on_worker_restart, + 'service_account': service_account, + 'tensorboard': tensorboard, + 'enable_web_access': enable_web_access, + 'network': network, + 'reserved_ip_ranges': reserved_ip_ranges or [], + 'base_output_directory': base_output_directory, + 'labels': labels or {}, + 'encryption_spec_key_name': encryption_spec_key_name, + 'persistent_resource_id': persistent_resource_id, + } + + for param_name, default_value in custom_job_param_defaults.items(): + cj_component_spec['inputDefinitions']['parameters'][param_name][ + 'defaultValue' + ] = default_value + + # merge parameters from user component into the customjob component + cj_component_spec['inputDefinitions']['parameters'].update( + user_component_spec.get('inputDefinitions', {}).get('parameters', {}) + ) + cj_component_spec['outputDefinitions']['parameters'].update( + user_component_spec.get('outputDefinitions', {}).get('parameters', {}) + ) + # use artifacts from user component + ## assign artifacts, not update, since customjob has no artifact outputs + cj_component_spec['inputDefinitions']['artifacts'] = user_component_spec.get( + 'inputDefinitions', {} + ).get('artifacts', {}) + cj_component_spec['outputDefinitions']['artifacts'] = user_component_spec.get( + 'outputDefinitions', {} + ).get('artifacts', {}) + + # copy the input definitions to the root, which will have an identical interface for a single-step pipeline + cj_pipeline_spec['root']['inputDefinitions'] = copy.deepcopy( + cj_component_spec['inputDefinitions'] + ) + cj_pipeline_spec['root']['outputDefinitions'] = copy.deepcopy( + cj_component_spec['outputDefinitions'] + ) + + # update the customjob task with the user inputs + cj_task_key = list(cj_pipeline_spec['root']['dag']['tasks'].keys())[0] + user_task_key = list(user_pipeline_spec['root']['dag']['tasks'].keys())[0] + + cj_pipeline_spec['root']['dag']['tasks'][cj_task_key]['inputs'].update( + user_pipeline_spec['root']['dag']['tasks'][user_task_key].get( + 'inputs', {} + ) + ) + + # reload the pipelinespec as a component using KFP + new_component = components.load_component_from_text( + yaml.safe_dump(cj_pipeline_spec) + ) + + # Copy the component name and description + # TODO(b/262360354): The inner .component_spec.name is needed here as that is + # the name that is retrieved by the FE for display. Can simply reference the + # outer .name once setter is implemented. + new_component.component_spec.name = component_spec.component_spec.name + + if component_spec.description: + component_description = textwrap.dedent(f""" + A CustomJob that wraps {component_spec.component_spec.name}. + + Original component description: + {component_spec.description} + + Custom Job wrapper description: + {component.custom_training_job.description} + """) + new_component.description = component_description + + return new_component + + +def create_custom_training_job_op_from_component(*args, **kwargs) -> Callable: + """Deprecated. + + Please use create_custom_training_job_from_component instead. + """ + + warnings.warn( + f'{create_custom_training_job_op_from_component.__name__!r} is' + ' deprecated. Please use' + f' {create_custom_training_job_from_component.__name__!r} instead.', + DeprecationWarning, + ) + + return create_custom_training_job_from_component(*args, **kwargs) From a13e1430387566e183ca6610a14784b48d323142 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 14 Sep 2023 13:02:30 -0700 Subject: [PATCH 153/253] test: fix sdk execution test (#9988) * fix sdk execution test * fix missing key * fix state * fix state * fix capitalization --- sdk/python/test_data/test_data_config.yaml | 1 + test/sdk-execution-tests/sdk_execution_tests.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/test_data/test_data_config.yaml b/sdk/python/test_data/test_data_config.yaml index b40267f35c..42e12c7c79 100644 --- a/sdk/python/test_data/test_data_config.yaml +++ b/sdk/python/test_data/test_data_config.yaml @@ -239,6 +239,7 @@ components: execute: false - module: component_with_metadata_fields name: dataset_joiner + execute: false - module: component_with_task_final_status name: exit_comp execute: false diff --git a/test/sdk-execution-tests/sdk_execution_tests.py b/test/sdk-execution-tests/sdk_execution_tests.py index 5e3bbba19d..1613cb627b 100644 --- a/test/sdk-execution-tests/sdk_execution_tests.py +++ b/test/sdk-execution-tests/sdk_execution_tests.py @@ -65,7 +65,7 @@ def create_test_case_parameters() -> List[TestCase]: return parameters -def wait(run_result: client.client.RunPipelineResult) -> kfp_server_api.ApiRun: +def wait(run_result: client.client.RunPipelineResult) -> kfp_server_api.V2beta1Run: return kfp_client.wait_for_run_completion( run_id=run_result.run_id, timeout=int(TIMEOUT_SECONDS)) @@ -122,4 +122,4 @@ async def test(test_case: TestCase, mocker) -> None: f'Error triggering pipeline {test_case.name}.') from e api_run = await event_loop.run_in_executor(None, wait, run_result) - assert api_run.run.status == 'Succeeded', f'Pipeline {test_case.name} ended with incorrect status: {api_run.run.status}. More info: {run_url}' + assert api_run.state == 'SUCCEEDED', f'Pipeline {test_case.name} ended with incorrect status: {api_run.state}. More info: {run_url}' From ad058b5321f5fe74bbd10845778e6627fb9aa5cb Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 14 Sep 2023 15:54:28 -0700 Subject: [PATCH 154/253] chore(components): bump KFP SDK version number in GCPC PiperOrigin-RevId: 565500642 --- components/google-cloud/RELEASE.md | 1 + components/google-cloud/setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 3b80a02f43..21cd1a3136 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -3,6 +3,7 @@ * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` * Add feature_selection_pipeline to preview.automl.tabular. +* Bump supported KFP versions to kfp>=2.0.0b10,<=2.2.0 ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index fa530e0cf2..adfc020872 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -82,7 +82,7 @@ # Pin google-api-core version for the bug fixing in 1.31.5 # https://github.com/googleapis/python-api-core/releases/tag/v1.31.5 "google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "kfp>=2.0.0b10,<=2.1.3", + "kfp>=2.0.0b10,<=2.2.0", "google-cloud-aiplatform>=1.14.0,<2", "Jinja2==3.1.2", ], From e307545e689516c1249d1211e4131db49a346ffd Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 15 Sep 2023 11:32:08 -0700 Subject: [PATCH 155/253] feat(components): Implement `create_templated_custom_job` for Templated Custom Job Launcher PiperOrigin-RevId: 565733608 --- .../llm/templated_custom_job/remote_runner.py | 88 +++++++++++++++++-- 1 file changed, 83 insertions(+), 5 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py index d8b81f28cd..3423181124 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/remote_runner.py @@ -13,14 +13,20 @@ # limitations under the License. """GCP remote runner for templated custom jobs based on the AI Platform SDK.""" +import base64 import json +import logging +import sys from typing import Any, Callable, Dict, List, Optional +import google_cloud_pipeline_components.google_cloud_pipeline_components.container.v1.custom_job.remote_runner as custom_job_remote_runner import jinja2 from jinja2 import sandbox # Note that type annotations need to match the python version in the GCPC docker # image in addition to the internal python version. +ParamListType = Optional[List[List[str]]] + def _json_escape_filter(value: str) -> str: """A Jinja2 filter for JSON escaping.""" @@ -83,7 +89,7 @@ def convert_key_value_param_list( def convert_integer_params( - integer_params: Optional[List[List[str]]], + integer_params: ParamListType, ) -> Dict[str, Optional[int]]: """Converts a list of (key, [integer]) pairs to a dictionary.""" return convert_key_value_param_list( @@ -92,7 +98,7 @@ def convert_integer_params( def convert_string_params( - string_params: Optional[List[List[str]]], + string_params: ParamListType, ) -> Dict[str, Optional[str]]: """Converts a list of (key, [string]) pairs to a dictionary.""" return convert_key_value_param_list( @@ -101,7 +107,7 @@ def convert_string_params( def convert_float_params( - float_params: Optional[List[List[str]]], + float_params: ParamListType, ) -> Dict[str, Optional[float]]: """Converts a list of (key, [float]) pairs to a dictionary.""" return convert_key_value_param_list( @@ -110,7 +116,7 @@ def convert_float_params( def convert_boolean_params( - boolean_params: Optional[List[List[str]]], + boolean_params: ParamListType, ) -> Dict[str, Optional[bool]]: """Converts a list of (key, [boolean]) pairs to a dictionary.""" return convert_key_value_param_list( @@ -121,9 +127,81 @@ def convert_boolean_params( def convert_json_params( - json_params: Optional[List[List[str]]], + json_params: ParamListType, ) -> Dict[str, Any]: """Converts a list of (key, [json objects]) pairs to a dictionary.""" return convert_key_value_param_list( param_list=json_params, type_cast=json.loads, cmd_flag='--set_json' ) + + +# This method will also be used for unit tests. +def decode_and_render_payload( + payload: str, + int_params: ParamListType = None, + string_params: ParamListType = None, + float_params: ParamListType = None, + boolean_params: ParamListType = None, + json_params: ParamListType = None, +) -> str: + """Decodes base64-encoded Jinja2 payload and renders it.""" + params = convert_integer_params(int_params) + params.update(convert_string_params(string_params)) + params.update(convert_float_params(float_params)) + params.update(convert_boolean_params(boolean_params)) + params.update(convert_json_params(json_params)) + + return render_payload(base64.b64decode(payload).decode('utf-8'), params) + + +def create_templated_custom_job( + type: str, # pylint: disable=redefined-builtin + project: str, + location: str, + payload: str, + gcp_resources: str, + dry_run: bool = False, + set_integer: ParamListType = None, + set_string: ParamListType = None, + set_float: ParamListType = None, + set_boolean: ParamListType = None, + set_json: ParamListType = None, +) -> None: + """Creates and polls a Custom Job.""" + rendered_payload = decode_and_render_payload( + payload=payload, + int_params=set_integer, + string_params=set_string, + float_params=set_float, + boolean_params=set_boolean, + json_params=set_json, + ) + + # Call json.loads() to validate that the payload is a valid JSON. + # Call json.dumps() instead of using rendered_payload to remove redundant + # blank spaces in the payload. + try: + payload_str = json.dumps(json.loads(rendered_payload)) + except json.JSONDecodeError as e: + logging.error( + 'Cannot deserialize the rendered payload to JSON: %r', rendered_payload + ) + raise ValueError( + 'The rendered payload is an invalid JSON. Please see the error log for ' + 'details.' + ) from e + + if dry_run: + logging.info( + 'Log rendered payload for dry run and exit with error code 1: %s', + payload_str, + ) + sys.exit(1) + + custom_job_remote_runner.create_custom_job( + type=type, + project=project, + location=location, + payload=payload_str, + gcp_resources=gcp_resources, + ) From e137ae7faccad207fedbeeff80f8502e49a1fbc5 Mon Sep 17 00:00:00 2001 From: axel7083 <42176370+axel7083@users.noreply.github.com> Date: Fri, 15 Sep 2023 22:43:30 +0200 Subject: [PATCH 156/253] Fix(frontend): content is not available (#9720) * Using display name instead of taskname * Creating dedicated function for extracting task label * Propagating fix to side panel * Adding backward compatibility and unit test * running prettier Signed-off-by: STEFANINI Axel (INTERN) <42176370+axel7083@users.noreply.github.com> --------- Signed-off-by: STEFANINI Axel (INTERN) <42176370+axel7083@users.noreply.github.com> --- frontend/src/lib/v2/DynamicFlow.test.ts | 19 ++++++++++++++++ frontend/src/lib/v2/DynamicFlow.ts | 29 +++++++++++++++++-------- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/frontend/src/lib/v2/DynamicFlow.test.ts b/frontend/src/lib/v2/DynamicFlow.test.ts index c35f38de95..feaad8f386 100644 --- a/frontend/src/lib/v2/DynamicFlow.test.ts +++ b/frontend/src/lib/v2/DynamicFlow.test.ts @@ -142,6 +142,25 @@ describe('DynamicFlow', () => { expect(nodeMlmdInfo).toEqual({ execution }); }); + it('execution found with custom name', () => { + const label = 'custom-label'; + const elem: Node = { + id: 'task.exec', + data: { + label: label, + mlmdId: 1, + }, + type: NodeTypeNames.EXECUTION, + position: { x: 1, y: 2 }, + }; + + const execution = new Execution(); + execution.setId(1); + execution.getCustomPropertiesMap().set(TASK_NAME_KEY, new Value().setStringValue(label)); + const nodeMlmdInfo = getNodeMlmdInfo(elem, [execution], [], []); + expect(nodeMlmdInfo).toEqual({ execution }); + }); + it('artifact not exist', () => { const elem: Node = { id: 'artifact.exec.arti', diff --git a/frontend/src/lib/v2/DynamicFlow.ts b/frontend/src/lib/v2/DynamicFlow.ts index 9c2736b687..fc44ccd199 100644 --- a/frontend/src/lib/v2/DynamicFlow.ts +++ b/frontend/src/lib/v2/DynamicFlow.ts @@ -263,12 +263,14 @@ export function updateFlowElementsState( } return flowGraph; } - for (let elem of elems) { let updatedElem = Object.assign({}, elem); if (NodeTypeNames.EXECUTION === elem.type) { - const taskName = getTaskKeyFromNodeKey(elem.id); - const executions = getExecutionsUnderDAG(taskNameToExecution, taskName, executionLayers); + const executions = getExecutionsUnderDAG( + taskNameToExecution, + getTaskLabelByPipelineFlowElement(elem), + executionLayers, + ); if (executions) { (updatedElem.data as ExecutionFlowElementData).state = executions[0]?.getLastKnownState(); (updatedElem.data as ExecutionFlowElementData).mlmdId = executions[0]?.getId(); @@ -291,8 +293,11 @@ export function updateFlowElementsState( (updatedElem.data as ArtifactFlowElementData).mlmdId = linkedArtifact?.artifact?.getId(); } else if (NodeTypeNames.SUB_DAG === elem.type) { // TODO: Update sub-dag state based on future design. - const taskName = getTaskKeyFromNodeKey(elem.id); - const executions = getExecutionsUnderDAG(taskNameToExecution, taskName, executionLayers); + const executions = getExecutionsUnderDAG( + taskNameToExecution, + getTaskLabelByPipelineFlowElement(elem), + executionLayers, + ); if (executions) { (updatedElem.data as SubDagFlowElementData).state = executions[0]?.getLastKnownState(); (updatedElem.data as SubDagFlowElementData).mlmdId = executions[0]?.getId(); @@ -303,6 +308,12 @@ export function updateFlowElementsState( return flowGraph; } +function getTaskLabelByPipelineFlowElement(elem: PipelineFlowElement) { + const taskLabel = elem.data?.label; + if (taskLabel === undefined) return getTaskKeyFromNodeKey(elem.id); + return taskLabel; +} + function getExecutionsUnderDAG( taskNameToExecution: Map, taskName: string, @@ -337,9 +348,9 @@ export function getNodeMlmdInfo( ); if (NodeTypeNames.EXECUTION === elem.type) { - const taskName = getTaskKeyFromNodeKey(elem.id); + const taskLabel = getTaskLabelByPipelineFlowElement(elem); const executions = taskNameToExecution - .get(taskName) + .get(taskLabel) ?.filter(exec => exec.getId() === elem.data?.mlmdId); return executions ? { execution: executions[0] } : {}; } else if (NodeTypeNames.ARTIFACT === elem.type) { @@ -361,9 +372,9 @@ export function getNodeMlmdInfo( return { execution, linkedArtifact }; } else if (NodeTypeNames.SUB_DAG === elem.type) { // TODO: Update sub-dag state based on future design. - const taskName = getTaskKeyFromNodeKey(elem.id); + const taskLabel = getTaskLabelByPipelineFlowElement(elem); const executions = taskNameToExecution - .get(taskName) + .get(taskLabel) ?.filter(exec => exec.getId() === elem.data?.mlmdId); return executions ? { execution: executions[0] } : {}; } From f8f01bcd08ba30bb8ac902843468984fdc662033 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 15 Sep 2023 14:53:31 -0700 Subject: [PATCH 157/253] feat(components): Add main entry and command-line flags for Templated Custom Job Launcher PiperOrigin-RevId: 565785651 --- .../llm/templated_custom_job/launcher.py | 139 ++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/launcher.py diff --git a/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/launcher.py b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/launcher.py new file mode 100644 index 0000000000..6fa0a0deb9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/_implementation/llm/templated_custom_job/launcher.py @@ -0,0 +1,139 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Jinja2-based templated launcher for custom jobs. + +Note: Whenever possible, please prefer the vanilla custom_job launcher instead +of this Jinja2-based templated custom job launcher. This launcher does not take +advantage of the Vertex Pipeline backend optimization and will thus launch a +Custom Job that runs another Custom Job. +""" + +import argparse +import logging +import sys +from typing import Any, Dict, List + +from google_cloud_pipeline_components.container._implementation.llm.templated_custom_job import remote_runner +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import parser_util + + +def _parse_args(args: List[str]) -> Dict[str, Any]: + """Parse command line arguments.""" + parser, _ = parser_util.parse_default_args(args) + parser.add_argument( + '--dry_run', + dest='dry_run', + action='store_true', + help=( + 'If set, log the rendered payload for the Custom Job and exit with ' + 'error code 1' + ), + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--set_integer', + dest='set_integer', + action='append', + nargs='+', + help='(key, [value]) pairs. If `value` is missing, the value is None', + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--set_string', + dest='set_string', + action='append', + nargs='+', + help='(key, [value]) pairs. If `value` is missing, the value is None', + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--set_float', + dest='set_float', + action='append', + nargs='+', + help='(key, [value]) pairs. If `value` is missing, the value is None', + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--set_boolean', + dest='set_boolean', + action='append', + nargs='+', + help='(key, [value]) pairs. If `value` is missing, the value is None', + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--set_json', + dest='set_json', + action='append', + nargs='+', + help='(key, [value]) pairs. If `value` is missing, the value is None', + default=argparse.SUPPRESS, + ) + parsed_args, _ = parser.parse_known_args(args) + return vars(parsed_args) + + +def main(argv: List[str]) -> None: + """Main entry. + + Expected input args are as follows: + project - Required. The project of which the resource will be launched. + location - Required. The region of which the resource will be launched. + type - Required. GCPC launcher is a single container. This Enum will specify + which resource to be launched. + payload - Required. The base64-encoded Jinja2 payload that will be rendered + to a full serialized JSON of the resource spec. This payload normally + doesn't contain Pipeline Placehoders. + gcp_resources - Required. Placeholder output for returning job_id. + dry_run - If set, log the rendered payload for the Custom Job and exit with + error code 1. + set_integer - A list of `(key, [value])` pairs of strings that'll be used to + render the payload. The `value` will be cast to an integer. If `value` is + missing, it'll be `None`. Note that `value` can contain Pipeline + Placeholders. + set_string - A list of (key, [value]) pairs of strings that'll be used to + render the payload. If `value` is missing, it'll be `None`. Note that + `value` can contain Pipeline Placeholders. + set_float - A list of `(key, [value])` pairs of strings that'll be used to + render the payload. The `value` will be cast to a float number. If `value` + is missing, it'll be `None`. Note that `value` can contain Pipeline + Placeholders. + set_boolean - A list of `(key, [value])` pairs of strings that'll be used to + render the payload. The `value` will be cast to a boolean value. If + `value` is missing, it'll be `None`. Note that `value` can contain + Pipeline Placeholders. + set_json - A list of `(key, [value])` pairs of strings that'll be used to + render the payload. The `value` will be cast to an object by calling + `json.loads()`. If `value` is missing, it'll be `None`. Note that `value` + can contain Pipeline Placeholders. + + Args: + argv: A list of system arguments. + """ + parsed_args = _parse_args(argv) + logging.basicConfig( + format='[%(asctime)s] [%(levelname)s]: %(message)s', level=logging.INFO + ) + job_type = parsed_args['type'] + if job_type != 'TemplatedCustomJob': + raise ValueError('Incorrect job type: ' + job_type) + + logging.info('Job started for type: %s', job_type) + + remote_runner.create_templated_custom_job(**parsed_args) + + +if __name__ == '__main__': + main(sys.argv[1:]) From d001b8055fa7fd11ec6c1a8b94ea4f16ddfc481e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 15 Sep 2023 15:00:30 -0700 Subject: [PATCH 158/253] chore(sdk): refactor python component executor (#9990) --- sdk/python/kfp/dsl/executor.py | 291 +++++++++++++++++---------------- 1 file changed, 147 insertions(+), 144 deletions(-) diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp/dsl/executor.py index 63fcbb039d..7429c0de2b 100644 --- a/sdk/python/kfp/dsl/executor.py +++ b/sdk/python/kfp/dsl/executor.py @@ -14,69 +14,76 @@ import inspect import json import os +import re from typing import Any, Callable, Dict, List, Optional, Union +from kfp import dsl from kfp.dsl import task_final_status from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations -class Executor(): - """Executor executes v2-based Python function components.""" +class Executor: + """Executor executes Python function components.""" def __init__( - self, executor_input: Dict, + self, + executor_input: Dict, function_to_execute: Union[Callable, - 'python_component.PythonComponent']): + 'python_component.PythonComponent'], + ): + if hasattr(function_to_execute, 'python_func'): - self._func = function_to_execute.python_func + self.func = function_to_execute.python_func else: - self._func = function_to_execute + self.func = function_to_execute - self._input = executor_input - self._input_artifacts: Dict[str, - Union[artifact_types.Artifact, - List[artifact_types.Artifact]]] = {} - self._output_artifacts: Dict[str, artifact_types.Artifact] = {} + self.executor_input = executor_input + self.input_artifacts: Dict[str, Union[dsl.Artifact, + List[dsl.Artifact]]] = {} + self.output_artifacts: Dict[str, dsl.Artifact] = {} + self.assign_input_and_output_artifacts() - for name, artifacts in self._input.get('inputs', - {}).get('artifacts', {}).items(): + self.return_annotation = inspect.signature(self.func).return_annotation + self.excutor_output = {} + + def assign_input_and_output_artifacts(self) -> None: + for name, artifacts in self.executor_input.get('inputs', + {}).get('artifacts', + {}).items(): list_of_artifact_proto_structs = artifacts.get('artifacts') if list_of_artifact_proto_structs: - annotation = self._func.__annotations__[name] + annotation = self.func.__annotations__[name] # InputPath has no attribute __origin__ and also should be handled as a single artifact if type_annotations.is_Input_Output_artifact_annotation( annotation) and type_annotations.is_list_of_artifacts( annotation.__origin__): - self._input_artifacts[name] = [ + self.input_artifacts[name] = [ self.make_artifact( msg, name, - self._func, + self.func, ) for msg in list_of_artifact_proto_structs ] else: - self._input_artifacts[name] = self.make_artifact( + self.input_artifacts[name] = self.make_artifact( list_of_artifact_proto_structs[0], name, - self._func, + self.func, ) - for name, artifacts in self._input.get('outputs', - {}).get('artifacts', {}).items(): + for name, artifacts in self.executor_input.get('outputs', + {}).get('artifacts', + {}).items(): list_of_artifact_proto_structs = artifacts.get('artifacts') if list_of_artifact_proto_structs: output_artifact = self.make_artifact( list_of_artifact_proto_structs[0], name, - self._func, + self.func, ) - self._output_artifacts[name] = output_artifact - self.makedirs_recursively(output_artifact.path) - - self._return_annotation = inspect.signature( - self._func).return_annotation - self._executor_output = {} + self.output_artifacts[name] = output_artifact + makedirs_recursively(output_artifact.path) def make_artifact( self, @@ -99,56 +106,51 @@ def make_artifact( return create_artifact_instance( runtime_artifact, artifact_cls=artifact_cls) - def makedirs_recursively(self, path: str) -> None: - os.makedirs(os.path.dirname(path), exist_ok=True) + def get_input_artifact(self, name: str) -> Optional[dsl.Artifact]: + return self.input_artifacts.get(name) - def _get_input_artifact(self, name: str): - return self._input_artifacts.get(name) + def get_output_artifact(self, name: str) -> Optional[dsl.Artifact]: + return self.output_artifacts.get(name) - def _get_output_artifact(self, name: str): - return self._output_artifacts.get(name) - - def _get_input_parameter_value(self, parameter_name: str): - parameter_values = self._input.get('inputs', - {}).get('parameterValues', None) + def get_input_parameter_value(self, parameter_name: str) -> Optional[str]: + parameter_values = self.executor_input.get('inputs', {}).get( + 'parameterValues', None) if parameter_values is not None: return parameter_values.get(parameter_name, None) return None - def _get_output_parameter_path(self, parameter_name: str): - parameter = self._input.get('outputs', - {}).get('parameters', - {}).get(parameter_name, None) + def get_output_parameter_path(self, parameter_name: str) -> Optional[str]: + parameter = self.executor_input.get('outputs', {}).get( + 'parameters', {}).get(parameter_name, None) if parameter is None: return None - import os path = parameter.get('outputFile', None) if path: os.makedirs(os.path.dirname(path), exist_ok=True) return path - def _get_output_artifact_path(self, artifact_name: str): - output_artifact = self._output_artifacts.get(artifact_name) + def get_output_artifact_path(self, artifact_name: str) -> str: + output_artifact = self.output_artifacts.get(artifact_name) if not output_artifact: raise ValueError( f'Failed to get output artifact path for artifact name {artifact_name}' ) return output_artifact.path - def _get_input_artifact_path(self, artifact_name: str): - input_artifact = self._input_artifacts.get(artifact_name) + def get_input_artifact_path(self, artifact_name: str) -> str: + input_artifact = self.input_artifacts.get(artifact_name) if not input_artifact: raise ValueError( f'Failed to get input artifact path for artifact name {artifact_name}' ) return input_artifact.path - def _write_output_parameter_value(self, name: str, - value: Union[str, int, float, bool, dict, - list, Dict, List]): + def write_output_parameter_value( + self, name: str, value: Union[str, int, float, bool, dict, list, + Dict, List]) -> None: if isinstance(value, (float, int)): output = str(value) elif isinstance(value, str): @@ -161,66 +163,19 @@ def _write_output_parameter_value(self, name: str, f'Unable to serialize unknown type `{value}` for parameter input with value `{type(value)}`' ) - if not self._executor_output.get('parameterValues'): - self._executor_output['parameterValues'] = {} + if not self.excutor_output.get('parameterValues'): + self.excutor_output['parameterValues'] = {} - self._executor_output['parameterValues'][name] = value + self.excutor_output['parameterValues'][name] = value - def _write_output_artifact_payload(self, name: str, value: Any): - path = self._get_output_artifact_path(name) + def write_output_artifact_payload(self, name: str, value: Any) -> None: + path = self.get_output_artifact_path(name) with open(path, 'w') as f: f.write(str(value)) - # TODO: extract to a util - @classmethod - def _get_short_type_name(cls, type_name: str) -> str: - """Extracts the short form type name. - - This method is used for looking up serializer for a given type. - - For example: - typing.List -> List - typing.List[int] -> List - typing.Dict[str, str] -> Dict - List -> List - str -> str - - Args: - type_name: The original type name. - - Returns: - The short form type name or the original name if pattern doesn't match. - """ - import re - match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) - return match.group('type') if match else type_name - - # TODO: merge with type_utils.is_parameter_type - @classmethod - def _is_parameter(cls, annotation: Any) -> bool: - if type(annotation) == type: - return annotation in [str, int, float, bool, dict, list] - - # Annotation could be, for instance `typing.Dict[str, str]`, etc. - return cls._get_short_type_name(str(annotation)) in ['Dict', 'List'] - - @classmethod - def _is_artifact(cls, annotation: Any) -> bool: - if type(annotation) == type: - return type_annotations.is_artifact_class(annotation) - return False - - @classmethod - def _is_named_tuple(cls, annotation: Any) -> bool: - if type(annotation) == type: - return issubclass(annotation, tuple) and hasattr( - annotation, '_fields') and hasattr(annotation, - '__annotations__') - return False - - def _handle_single_return_value(self, output_name: str, - annotation_type: Any, return_value: Any): - if self._is_parameter(annotation_type): + def handle_single_return_value(self, output_name: str, annotation_type: Any, + return_value: Any) -> None: + if is_parameter(annotation_type): origin_type = getattr(annotation_type, '__origin__', None) or annotation_type # relax float-typed return to allow both int and float. @@ -231,19 +186,19 @@ def _handle_single_return_value(self, output_name: str, accepted_types = origin_type if not isinstance(return_value, accepted_types): raise ValueError( - f'Function `{self._func.__name__}` returned value of type {type(return_value)}; want type {origin_type}' + f'Function `{self.func.__name__}` returned value of type {type(return_value)}; want type {origin_type}' ) - self._write_output_parameter_value(output_name, return_value) - elif self._is_artifact(annotation_type): - self._write_output_artifact_payload(output_name, return_value) + self.write_output_parameter_value(output_name, return_value) + elif is_artifact(annotation_type): + self.write_output_artifact_payload(output_name, return_value) else: raise RuntimeError( f'Unknown return type: {annotation_type}. Must be one of the supported data types: https://www.kubeflow.org/docs/components/pipelines/v2/data-types/' ) - def _write_executor_output(self, - func_output: Optional[Any] = None - ) -> Optional[str]: + def write_executor_output(self, + func_output: Optional[Any] = None + ) -> Optional[str]: """Writes executor output containing the Python function output. The executor output file will not be written if this code is executed from a non-chief node in a mirrored execution strategy. @@ -254,10 +209,10 @@ def _write_executor_output(self, Returns: Optional[str]: Returns the location of the executor_output file as a string if the file is written. Else, None. """ - if self._output_artifacts: - self._executor_output['artifacts'] = {} + if self.output_artifacts: + self.excutor_output['artifacts'] = {} - for name, artifact in self._output_artifacts.items(): + for name, artifact in self.output_artifacts.items(): runtime_artifact = { 'name': artifact.name, 'uri': artifact.uri, @@ -265,32 +220,32 @@ def _write_executor_output(self, } artifacts_list = {'artifacts': [runtime_artifact]} - self._executor_output['artifacts'][name] = artifacts_list + self.excutor_output['artifacts'][name] = artifacts_list if func_output is not None: - if self._is_parameter(self._return_annotation) or self._is_artifact( - self._return_annotation): + if is_parameter(self.return_annotation) or is_artifact( + self.return_annotation): # Note: single output is named `Output` in component.yaml. - self._handle_single_return_value('Output', - self._return_annotation, - func_output) - elif self._is_named_tuple(self._return_annotation): - if len(self._return_annotation._fields) != len(func_output): + self.handle_single_return_value('Output', + self.return_annotation, + func_output) + elif is_named_tuple(self.return_annotation): + if len(self.return_annotation._fields) != len(func_output): raise RuntimeError( - f'Expected {len(self._return_annotation._fields)} return values from function `{self._func.__name__}`, got {len(func_output)}' + f'Expected {len(self.return_annotation._fields)} return values from function `{self.func.__name__}`, got {len(func_output)}' ) - for i in range(len(self._return_annotation._fields)): - field = self._return_annotation._fields[i] - field_type = self._return_annotation.__annotations__[field] + for i in range(len(self.return_annotation._fields)): + field = self.return_annotation._fields[i] + field_type = self.return_annotation.__annotations__[field] if type(func_output) == tuple: field_value = func_output[i] else: field_value = getattr(func_output, field) - self._handle_single_return_value(field, field_type, - field_value) + self.handle_single_return_value(field, field_type, + field_value) else: raise RuntimeError( - f'Unknown return type: {self._return_annotation}. Must be one of `str`, `int`, `float`, a subclass of `Artifact`, or a NamedTuple collection of these types.' + f'Unknown return type: {self.return_annotation}. Must be one of `str`, `int`, `float`, a subclass of `Artifact`, or a NamedTuple collection of these types.' ) # This check is to ensure only one worker (in a mirrored, distributed training/compute strategy) attempts to write to the same executor output file at the same time using gcsfuse, which enforces immutability of files. @@ -304,10 +259,10 @@ def _write_executor_output(self, write_file = cluster_spec['task']['type'] in CHIEF_NODE_LABELS if write_file: - executor_output_path = self._input['outputs']['outputFile'] + executor_output_path = self.executor_input['outputs']['outputFile'] os.makedirs(os.path.dirname(executor_output_path), exist_ok=True) with open(executor_output_path, 'w') as f: - f.write(json.dumps(self._executor_output)) + f.write(json.dumps(self.excutor_output)) return executor_output_path return None @@ -320,7 +275,7 @@ def execute(self) -> Optional[str]: Returns: Optional[str]: Returns the location of the executor_output file as a string if the file is written. Else, None. """ - annotations = inspect.getfullargspec(self._func).annotations + annotations = inspect.getfullargspec(self.func).annotations # Function arguments. func_kwargs = {} @@ -335,7 +290,7 @@ def execute(self) -> Optional[str]: v = type_annotations.maybe_strip_optional_from_annotation(v) if v == task_final_status.PipelineTaskFinalStatus: - value = self._get_input_parameter_value(k) + value = self.get_input_parameter_value(k) func_kwargs[k] = task_final_status.PipelineTaskFinalStatus( state=value.get('state'), pipeline_job_resource_name=value.get( @@ -345,33 +300,33 @@ def execute(self) -> Optional[str]: error_message=value.get('error').get('message', None), ) - elif self._is_parameter(v): - value = self._get_input_parameter_value(k) + elif is_parameter(v): + value = self.get_input_parameter_value(k) if value is not None: func_kwargs[k] = value elif type_annotations.is_Input_Output_artifact_annotation(v): if type_annotations.is_input_artifact(v): - func_kwargs[k] = self._get_input_artifact(k) + func_kwargs[k] = self.get_input_artifact(k) if type_annotations.is_output_artifact(v): - func_kwargs[k] = self._get_output_artifact(k) + func_kwargs[k] = self.get_output_artifact(k) elif isinstance(v, type_annotations.OutputPath): - if self._is_parameter(v.type): - func_kwargs[k] = self._get_output_parameter_path(k) + if is_parameter(v.type): + func_kwargs[k] = self.get_output_parameter_path(k) else: - func_kwargs[k] = self._get_output_artifact_path(k) + func_kwargs[k] = self.get_output_artifact_path(k) elif isinstance(v, type_annotations.InputPath): - func_kwargs[k] = self._get_input_artifact_path(k) + func_kwargs[k] = self.get_input_artifact_path(k) - result = self._func(**func_kwargs) - return self._write_executor_output(result) + result = self.func(**func_kwargs) + return self.write_executor_output(result) def create_artifact_instance( runtime_artifact: Dict, - artifact_cls=artifact_types.Artifact, + artifact_cls=dsl.Artifact, ) -> type: """Creates an artifact class instances from a runtime artifact dictionary.""" @@ -388,3 +343,51 @@ def create_artifact_instance( name=runtime_artifact.get('name', ''), metadata=runtime_artifact.get('metadata', {}), ) + + +def get_short_type_name(type_name: str) -> str: + """Extracts the short form type name. + + This method is used for looking up serializer for a given type. + + For example: + typing.List -> List + typing.List[int] -> List + typing.Dict[str, str] -> Dict + List -> List + str -> str + + Args: + type_name: The original type name. + + Returns: + The short form type name or the original name if pattern doesn't match. + """ + match = re.match('(typing\.)?(?P\w+)(?:\[.+\])?', type_name) + return match['type'] if match else type_name + + +# TODO: merge with type_utils.is_parameter_type +def is_parameter(annotation: Any) -> bool: + if type(annotation) == type: + return annotation in [str, int, float, bool, dict, list] + + # Annotation could be, for instance `typing.Dict[str, str]`, etc. + return get_short_type_name(str(annotation)) in ['Dict', 'List'] + + +def is_artifact(annotation: Any) -> bool: + if type(annotation) == type: + return type_annotations.is_artifact_class(annotation) + return False + + +def is_named_tuple(annotation: Any) -> bool: + if type(annotation) == type: + return issubclass(annotation, tuple) and hasattr( + annotation, '_fields') and hasattr(annotation, '__annotations__') + return False + + +def makedirs_recursively(path: str) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) From df4bc46725798d27a32c3935e48dae2384e7d4b9 Mon Sep 17 00:00:00 2001 From: Changyu Zhu Date: Fri, 15 Sep 2023 15:41:27 -0700 Subject: [PATCH 159/253] feat(components): Add AutoML image training job v1 remote runner PiperOrigin-RevId: 565796626 --- .../v1/automl_training_job/image/__init__.py | 14 ++ .../v1/automl_training_job/image/launcher.py | 59 +++++++++ .../image/remote_runner.py | 120 ++++++++++++++++++ .../v1/gcp_launcher/pipeline_remote_runner.py | 7 +- 4 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/__init__.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/__init__.py new file mode 100644 index 0000000000..cbe4dd022e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Components - AutoML Image Training Job Launcher and Remote Runner.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py new file mode 100644 index 0000000000..25ad5d2484 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py @@ -0,0 +1,59 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP launcher for AutoML image training jobs based on the AI Platform SDK.""" + +import logging +import sys + +from google_cloud_pipeline_components.container.v1.automl_training_job.image import remote_runner +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import parser_util + + +def _parse_args(args): + """Parse command line arguments.""" + _, parsed_args = parser_util.parse_default_args(args) + return vars(parsed_args) + + +def main(argv): + """Main entry. + + Expected input args are as follows: + Project - Required. The project of which the resource will be launched. + Region - Required. The region of which the resource will be launched. + Type - Required. GCP launcher is a single container. This Enum will + specify which resource to be launched. + Request payload - Required. The full serialized json of the resource spec. + Note this can contain the Pipeline Placeholders. + gcp_resources - placeholder output for returning job_id. + + Args: + argv: A list of system arguments. + """ + parsed_args = _parse_args(argv) + job_type = parsed_args['type'] + + if job_type != 'AutoMLImageTrainingJob': + raise ValueError('Incorrect job type: ' + job_type) + + logging.info( + 'Starting AutoMLImageTrainingJob using the following arguments: %s', + parsed_args, + ) + + remote_runner.create_pipeline(**parsed_args) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py new file mode 100644 index 0000000000..e0d46c825b --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py @@ -0,0 +1,120 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""GCP remote runner for AutoML image training pipelines based on the AI Platform SDK.""" + +import logging +from typing import Any + +from google.api_core import retry +from google.cloud.aiplatform import gapic +from google_cloud_pipeline_components.container.v1.gcp_launcher import pipeline_remote_runner +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util + + +_GET_PIPELINE_RETRY_DEADLINE_SECONDS = 10.0 * 60.0 + + +def create_pipeline_with_client( + pipeline_client: gapic.PipelineServiceClient, + parent, + pipeline_spec: Any, +): + """Creates a training pipeline with the client.""" + created_pipeline = None + try: + logging.info( + 'Creating AutoML Vision training pipeline with sanitized pipeline' + ' spec: %s', + pipeline_spec, + ) + created_pipeline = pipeline_client.create_training_pipeline( + parent=parent, training_pipeline=pipeline_spec + ) + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) + return created_pipeline + + +def get_pipeline_with_client( + pipeline_client: gapic.PipelineServiceClient, pipeline_name: str +): + """Gets training pipeline state with the client.""" + get_automl_vision_training_pipeline = None + try: + get_automl_vision_training_pipeline = pipeline_client.get_training_pipeline( + name=pipeline_name, + retry=retry.Retry(deadline=_GET_PIPELINE_RETRY_DEADLINE_SECONDS), + ) + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) + return get_automl_vision_training_pipeline + + +def create_pipeline( + type: str, # pylint: disable=redefined-builtin + project: str, + location: str, + payload: str, + gcp_resources: str, +): + """Create and poll AutoML Vision training pipeline status till it reaches a final state. + + This follows the typical launching logic: + 1. Read if the training pipeline already exists in gcp_resources + - If already exists, jump to step 3 and poll the pipeline status. This + happens + if the launcher container experienced unexpected termination, such as + preemption + 2. Deserialize the payload into the pipeline spec and create the training + pipeline + 3. Poll the training pipeline status every + pipeline_remote_runner._POLLING_INTERVAL_IN_SECONDS seconds + - If the training pipeline is succeeded, return succeeded + - If the training pipeline is cancelled/paused, it's an unexpected + scenario so return failed + - If the training pipeline is running, continue polling the status + + Also retry on ConnectionError up to + pipeline_remote_runner._CONNECTION_ERROR_RETRY_LIMIT times during the poll. + + Args: + type: Job type. + project: Project name. + location: Location to start the training job. + payload: Serialized JSON payload. + gcp_resources: URI for storing GCP resources. + """ + remote_runner = pipeline_remote_runner.PipelineRemoteRunner( + type, project, location, gcp_resources + ) + + try: + # Create AutoML vision training pipeline if it does not exist + pipeline_name = remote_runner.check_if_pipeline_exists() + if pipeline_name is None: + logging.info( + 'AutoML Vision training payload formatted: %s', + payload, + ) + pipeline_name = remote_runner.create_pipeline( + create_pipeline_with_client, + payload, + ) + + # Poll AutoML Vision training pipeline status until + # "PipelineState.PIPELINE_STATE_SUCCEEDED" + remote_runner.poll_pipeline(get_pipeline_with_client, pipeline_name) + + except (ConnectionError, RuntimeError) as err: + error_util.exit_with_internal_error(err.args[0]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/pipeline_remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/pipeline_remote_runner.py index 8a807d590a..c5c77c2975 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/pipeline_remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/pipeline_remote_runner.py @@ -29,13 +29,15 @@ from google.cloud.aiplatform_v1.types import pipeline_job from google.cloud.aiplatform_v1.types import pipeline_state from google.cloud.aiplatform_v1.types import training_pipeline +from google_cloud_pipeline_components.container.utils import execution_context +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util +from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util from google_cloud_pipeline_components.proto import gcp_resources_pb2 import requests from google.rpc import code_pb2 from google.protobuf import json_format -from google_cloud_pipeline_components.container.utils import execution_context -from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util, error_util + _POLLING_INTERVAL_IN_SECONDS = 20 _CONNECTION_ERROR_RETRY_LIMIT = 5 @@ -231,6 +233,7 @@ def poll_pipeline( error_util.exit_with_internal_error( f'Request failed after {_CONNECTION_ERROR_RETRY_LIMIT} retries.' ) + return # Not necessary, only to please the linter. if ( get_pipeline_response.state From 6468b4db11c2cd60a7b2dba7482ab170a129982d Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 15 Sep 2023 16:42:43 -0700 Subject: [PATCH 160/253] feat(components): Use t5-xl reward model when tuning t5-xxl PiperOrigin-RevId: 565809352 --- .../_implementation/llm/function_based.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index a71783d296..b4da8a350a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -221,8 +221,8 @@ def resolve_reference_model_metadata( reference_model_path=( 'gs://t5-data/pretrained_models/t5x/flan_t5_xxl/' ), - reward_model_reference='T5_XXL', - reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_xxl', + reward_model_reference='T5_XL', + reward_model_path='gs://t5-data/pretrained_models/t5x/t5_1_1_xl', is_supported=True, ), 'palm-tiny': reference_model_metadata( From 20218a9c1d9f1b4202738eb30d67606239d28bf4 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 18 Sep 2023 15:51:27 -0700 Subject: [PATCH 161/253] chore(sdk): test observability, refactorings, and cleanup (#10005) * chore(sdk): test observability, refactorings, and cleanup * address review feedback * rename classes --- .../kfp/cli/utils/aliased_plurals_group.py | 6 +- .../kfp/cli/utils/deprecated_alias_group.py | 4 +- sdk/python/kfp/client/auth.py | 13 ++- .../kfp/compiler/pipeline_spec_builder.py | 5 +- sdk/python/kfp/compiler/read_write_test.py | 6 +- sdk/python/kfp/dsl/component_factory.py | 14 ++- sdk/python/kfp/dsl/structures.py | 4 +- .../kfp/dsl/types/custom_artifact_types.py | 5 +- sdk/python/kfp/dsl/types/type_annotations.py | 49 +++++++--- .../kfp/dsl/types/type_annotations_test.py | 97 ++++++++++++++++++- sdk/python/kfp/dsl/types/type_utils.py | 10 +- .../pipeline_with_google_artifact_type.py | 8 +- 12 files changed, 169 insertions(+), 52 deletions(-) diff --git a/sdk/python/kfp/cli/utils/aliased_plurals_group.py b/sdk/python/kfp/cli/utils/aliased_plurals_group.py index 79631827de..79c152868a 100644 --- a/sdk/python/kfp/cli/utils/aliased_plurals_group.py +++ b/sdk/python/kfp/cli/utils/aliased_plurals_group.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Tuple, Union +from typing import List, Optional, Tuple import click @@ -30,8 +30,8 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> click.Command: raise click.UsageError(f"Unrecognized command '{cmd_name}'") def resolve_command( - self, ctx: click.Context, args: List[str] - ) -> Tuple[Union[str, None], Union[click.Command, None], List[str]]: + self, ctx: click.Context, + args: List[str]) -> Tuple[Optional[str], Optional[None], List[str]]: # always return the full command name _, cmd, args = super().resolve_command(ctx, args) return cmd.name, cmd, args # type: ignore diff --git a/sdk/python/kfp/cli/utils/deprecated_alias_group.py b/sdk/python/kfp/cli/utils/deprecated_alias_group.py index b4720ab92f..307fdabb49 100644 --- a/sdk/python/kfp/cli/utils/deprecated_alias_group.py +++ b/sdk/python/kfp/cli/utils/deprecated_alias_group.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Optional, Tuple import click @@ -58,7 +58,7 @@ def get_command(self, ctx: click.Context, def resolve_command( self, ctx: click.Context, args: List[str] - ) -> Tuple[Union[str, None], Union[click.Command, None], List[str]]: + ) -> Tuple[Optional[str], Optional[click.Command], List[str]]: # always return the full command name _, cmd, args = super().resolve_command(ctx, args) return cmd.name, cmd, args # type: ignore diff --git a/sdk/python/kfp/client/auth.py b/sdk/python/kfp/client/auth.py index 75b0faef48..4d71b40433 100644 --- a/sdk/python/kfp/client/auth.py +++ b/sdk/python/kfp/client/auth.py @@ -16,7 +16,7 @@ import json import logging import os -from typing import Any, Callable, Dict, Generator, Iterable, Tuple, Union +from typing import Any, Callable, Dict, Generator, Iterable, Optional, Tuple from urllib.parse import parse_qs from urllib.parse import urlparse from webbrowser import open_new_tab @@ -38,7 +38,7 @@ LOCAL_KFP_CREDENTIAL = os.path.expanduser('~/.config/kfp/credentials.json') -def get_gcp_access_token() -> Union[str, None]: +def get_gcp_access_token() -> Optional[str]: """Gets GCP access token for the current Application Default Credentials. Returns: @@ -60,7 +60,7 @@ def get_gcp_access_token() -> Union[str, None]: def get_auth_token(client_id: str, other_client_id: str, - other_client_secret: str) -> Tuple[Union[str, None], bool]: + other_client_secret: str) -> Tuple[Optional[str], bool]: """Gets auth token from default service account or user account. Returns: @@ -122,7 +122,7 @@ def get_auth_token(client_id: str, other_client_id: str, return token, is_refresh_token -def get_auth_token_from_sa(client_id: str) -> Union[str, None]: +def get_auth_token_from_sa(client_id: str) -> Optional[str]: """Gets auth token from default service account. Returns: @@ -135,8 +135,7 @@ def get_auth_token_from_sa(client_id: str) -> Union[str, None]: def get_service_account_credentials( - client_id: str -) -> Union[google.oauth2.service_account.Credentials, None]: + client_id: str) -> Optional[google.oauth2.service_account.Credentials]: """Figure out what environment we're running in and get some preliminary information about the service account. @@ -318,7 +317,7 @@ def get_auth_response_ssh(host: str, port: int, auth_url: str) -> str: def get_auth_response_local(host: str, port: int, - auth_url: str) -> Union[str, None]: + auth_url: str) -> Optional[str]: """Fetches OAuth authorization response URL using a local web-server. Args: diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 75be4eb647..83b55e3b61 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -24,6 +24,7 @@ from google.protobuf import struct_pb2 import kfp from kfp.compiler import compiler_utils +from kfp.dsl import component_factory from kfp.dsl import for_loop from kfp.dsl import pipeline_channel from kfp.dsl import pipeline_context @@ -45,8 +46,6 @@ tasks_group.TasksGroupType.EXIT_HANDLER: tasks_group.ExitHandler, } -_SINGLE_OUTPUT_NAME = 'Output' - def to_protobuf_value(value: type_utils.PARAMETER_TYPES) -> struct_pb2.Value: """Creates a google.protobuf.struct_pb2.Value message out of a provide @@ -1774,7 +1773,7 @@ def convert_pipeline_outputs_to_dict( if pipeline_outputs is None: return {} elif isinstance(pipeline_outputs, pipeline_channel.PipelineChannel): - return {_SINGLE_OUTPUT_NAME: pipeline_outputs} + return {component_factory.SINGLE_OUTPUT_NAME: pipeline_outputs} elif isinstance(pipeline_outputs, tuple) and hasattr( pipeline_outputs, '_asdict'): return dict(pipeline_outputs._asdict()) diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index 29c76db03e..7f33d73394 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -146,7 +146,8 @@ def _test_serialization_deserialization_consistency(self, yaml_file: str): original_component) self.assertEqual( handle_expected_diffs(original_component.component_spec), - handle_expected_diffs(reloaded_component.component_spec)) + handle_expected_diffs(reloaded_component.component_spec), + f'\n\n\nError with (de)serialization consistency of: {yaml_file}') def _test_serialization_correctness( self, @@ -158,7 +159,8 @@ def _test_serialization_correctness( pipeline = import_obj_from_file(python_file, function_name) compiled_result = self._compile_and_read_yaml(pipeline) golden_result = load_compiled_file(yaml_file) - self.assertEqual(compiled_result, golden_result) + self.assertEqual(compiled_result, golden_result, + f'\n\n\nError with compiling: {python_file}') @parameterized.parameters(create_test_cases()) def test( diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index b3547d980e..f34dd33fe0 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -35,6 +35,7 @@ from kfp.dsl.types import type_utils _DEFAULT_BASE_IMAGE = 'python:3.7' +SINGLE_OUTPUT_NAME = 'Output' @dataclasses.dataclass @@ -209,7 +210,6 @@ def extract_component_interface( description: Optional[str] = None, name: Optional[str] = None, ) -> structures.ComponentSpec: - single_output_name_const = 'Output' signature = inspect.signature(func) parameters = list(signature.parameters.values()) @@ -280,10 +280,9 @@ def extract_component_interface( if passing_style in [ type_annotations.OutputAnnotation, type_annotations.OutputPath ]: - if io_name == single_output_name_const: + if io_name == SINGLE_OUTPUT_NAME: raise ValueError( - f'"{single_output_name_const}" is an invalid parameter name.' - ) + f'"{SINGLE_OUTPUT_NAME}" is an invalid parameter name.') io_name = _maybe_make_unique(io_name, output_names) output_names.add(io_name) if type_annotations.is_artifact_class(parameter_type): @@ -359,8 +358,7 @@ def extract_component_interface( output_spec = structures.OutputSpec(type=output_type_struct) outputs[name] = output_spec elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty: - output_name = _maybe_make_unique(single_output_name_const, - output_names) + output_name = _maybe_make_unique(SINGLE_OUTPUT_NAME, output_names) # Fixes exotic, but possible collision: # `def func(output_path: OutputPath()) -> str: ...` output_names.add(output_name) @@ -403,7 +401,7 @@ def assign_descriptions( def parse_docstring_with_return_as_args( docstring: Union[str, - None]) -> Union[docstring_parser.Docstring, None]: + None]) -> Optional[docstring_parser.Docstring]: """Modifies docstring so that a return section can be treated as an args section, then parses the docstring.""" if docstring is None: @@ -663,7 +661,7 @@ def create_graph_component_from_func( def get_pipeline_description( decorator_description: Union[str, None], docstring: docstring_parser.Docstring, -) -> Union[str, None]: +) -> Optional[str]: """Obtains the correct pipeline description from the pipeline decorator's description argument and the parsed docstring. diff --git a/sdk/python/kfp/dsl/structures.py b/sdk/python/kfp/dsl/structures.py index d9e03dd947..4b4e6fba1c 100644 --- a/sdk/python/kfp/dsl/structures.py +++ b/sdk/python/kfp/dsl/structures.py @@ -797,7 +797,7 @@ def outputs_dict_from_component_spec_dict( } def extract_description_from_command( - commands: List[str]) -> Union[str, None]: + commands: List[str]) -> Optional[str]: for command in commands: if isinstance(command, str) and 'import kfp' in command: for node in ast.walk(ast.parse(command)): @@ -846,7 +846,7 @@ def from_yaml_documents(cls, component_yaml: str) -> 'ComponentSpec': ComponentSpec: The ComponentSpec object. """ - def extract_description(component_yaml: str) -> Union[str, None]: + def extract_description(component_yaml: str) -> Optional[str]: heading = '# Description: ' multi_line_description_prefix = '# ' index_of_heading = 2 diff --git a/sdk/python/kfp/dsl/types/custom_artifact_types.py b/sdk/python/kfp/dsl/types/custom_artifact_types.py index 484dfa6508..beba576dc6 100644 --- a/sdk/python/kfp/dsl/types/custom_artifact_types.py +++ b/sdk/python/kfp/dsl/types/custom_artifact_types.py @@ -53,9 +53,8 @@ def get_param_to_custom_artifact_class(func: Callable) -> Dict[str, type]: artifact_class = type_annotations.get_io_artifact_class(annotation) if artifact_class not in kfp_artifact_classes: param_to_artifact_cls[name] = artifact_class - elif type_annotations.is_artifact_class(annotation): - param_to_artifact_cls[name] = annotation - if artifact_class not in kfp_artifact_classes: + elif type_annotations.issubclass_of_artifact(annotation): + if annotation not in kfp_artifact_classes: param_to_artifact_cls[name] = artifact_class return_annotation = signature.return_annotation diff --git a/sdk/python/kfp/dsl/types/type_annotations.py b/sdk/python/kfp/dsl/types/type_annotations.py index aa39d2002e..1d9e2f2b0e 100644 --- a/sdk/python/kfp/dsl/types/type_annotations.py +++ b/sdk/python/kfp/dsl/types/type_annotations.py @@ -17,7 +17,7 @@ """ import re -from typing import List, Type, TypeVar, Union +from typing import Any, List, Optional, Type, TypeVar, Union from kfp.dsl.types import artifact_types from kfp.dsl.types import type_annotations @@ -99,7 +99,7 @@ def __eq__(self, other): def construct_type_for_inputpath_or_outputpath( - type_: Union[str, Type, None]) -> Union[str, None]: + type_: Union[str, Type, None]) -> Optional[str]: if type_annotations.is_artifact_class(type_): return type_utils.create_bundled_artifact_type(type_.schema_title, type_.schema_version) @@ -233,13 +233,40 @@ def is_artifact_class(artifact_class_or_instance: Type) -> bool: def is_list_of_artifacts( - type_var: Union[Type[List[artifact_types.Artifact]], - Type[artifact_types.Artifact]] + annotation: Union[Type[List[artifact_types.Artifact]], + Type[artifact_types.Artifact]] ) -> bool: - # the type annotation for this function's `type_var` parameter may not actually be a subclass of the KFP SDK's Artifact class for custom artifact types - is_list_or_list_generic = getattr(type_var, '__origin__', None) == list - # in >= python3.9, List wont have .__args__ if it's used as `-> List` with no inner type argument - contains_artifact = hasattr( - type_var, '__args__') and type_annotations.is_artifact_class( - type_var.__args__[0]) - return is_list_or_list_generic and contains_artifact + """Checks if an object is a list of list of artifacts annotation (e.g., + List[Artifact], List[Dataset])""" + return is_generic_list(annotation) and issubclass_of_artifact( + get_inner_type(annotation)) + + +def get_inner_type(annotation: Any) -> Optional[Any]: + """Returns the inner type of a generic annotation. + + For Union or Optional types with multiple inner types, a tuple of + types is returned. + """ + # Check if the annotation has '__args__' attribute + if hasattr(annotation, '__args__'): + if len(annotation.__args__) == 1: + return annotation.__args__[0] + else: + return tuple(annotation.__args__) + return None + + +def issubclass_of_artifact(obj: Any) -> bool: + """Checks if an object is a class and a subclass of a dsl.Artifact.""" + return type(obj) == type and issubclass(obj, artifact_types.Artifact) + + +def is_generic_list(annotation: Any) -> bool: + # handles generics from the typing module for python<3.9 + typing_generic_list = getattr(annotation, '__origin__', + None) is list or getattr( + annotation, '__origin__', None) is List + # handles built-in generics for python>=3.9 + built_in_generic_list = annotation == list + return typing_generic_list or built_in_generic_list diff --git a/sdk/python/kfp/dsl/types/type_annotations_test.py b/sdk/python/kfp/dsl/types/type_annotations_test.py index 099208c1b1..b57e254082 100644 --- a/sdk/python/kfp/dsl/types/type_annotations_test.py +++ b/sdk/python/kfp/dsl/types/type_annotations_test.py @@ -13,10 +13,12 @@ # limitations under the License. """Tests for kfp.dsl.types.type_annotations.""" -from typing import Any, Dict, List, Optional +import sys +from typing import Any, Dict, List, Optional, Union import unittest from absl.testing import parameterized +from kfp import dsl from kfp.dsl import Input from kfp.dsl import Output from kfp.dsl.types import artifact_types @@ -209,17 +211,102 @@ def test_false(self, obj): def test_false_no_schema_title(self): - class NotArtifact: + class MissingSchemaTitle: schema_version = '' - self.assertFalse(type_annotations.is_artifact_class(NotArtifact)) + self.assertFalse(type_annotations.is_artifact_class(MissingSchemaTitle)) def test_false_no_schema_version(self): - class NotArtifact: + class MissingSchemaVersion: schema_title = '' - self.assertFalse(type_annotations.is_artifact_class(NotArtifact)) + self.assertFalse( + type_annotations.is_artifact_class(MissingSchemaVersion)) + + +class ArtifactSubclass(dsl.Artifact): + pass + + +class NotArtifactSubclass: + pass + + +class TestIsSubclassOfArtifact(parameterized.TestCase): + + @parameterized.parameters([{ + 'obj': obj + } for obj in [ + dsl.Artifact, + dsl.Dataset, + dsl.Metrics, + ArtifactSubclass, + ]]) + def test_true(self, obj): + self.assertTrue(type_annotations.issubclass_of_artifact(obj)) + + @parameterized.parameters([{ + 'obj': obj + } for obj in [ + dsl.Artifact(), + dsl.Dataset(), + 1, + NotArtifactSubclass, + ]]) + def test_false(self, obj): + self.assertFalse(type_annotations.issubclass_of_artifact(obj)) + + +class TestIsGenericList(parameterized.TestCase): + + @parameterized.parameters([{ + 'obj': obj + } for obj in [ + List, + List[str], + List[dsl.Artifact], + List[Dict[str, str]], + ] + ([ + list, + list[str], + ] if sys.version_info >= (3, 9, 0) else [])]) + def test_true(self, obj): + self.assertTrue(type_annotations.is_generic_list(obj)) + + @parameterized.parameters([{ + 'obj': obj + } for obj in [ + Optional[List[str]], + Dict[str, str], + str, + int, + dsl.Artifact, + ]]) + def test_false(self, obj): + self.assertFalse(type_annotations.is_generic_list(obj)) + + +class TestGetInnerType(parameterized.TestCase): + + @parameterized.parameters([{ + 'annotation': annotation, + 'expected': expected + } for annotation, expected in [ + (int, None), + (Optional[int], (int, type(None))), + (Union[int, None], (int, type(None))), + (List[str], str), + (Dict[str, str], (str, str)), + (List[dsl.Artifact], dsl.Artifact), + ] + ([ + (list[str], str), + (dict[str, str], (str, str)), + (list[dsl.Artifact], dsl.Artifact), + ] if sys.version_info >= (3, 9, 0) else [])]) + def test(self, annotation, expected): + actual = type_annotations.get_inner_type(annotation) + self.assertEqual(actual, expected) if __name__ == '__main__': diff --git a/sdk/python/kfp/dsl/types/type_utils.py b/sdk/python/kfp/dsl/types/type_utils.py index 12a78eda38..324c971459 100644 --- a/sdk/python/kfp/dsl/types/type_utils.py +++ b/sdk/python/kfp/dsl/types/type_utils.py @@ -54,7 +54,7 @@ LIST = 5 STRUCT = 6 PARAMETER_TYPES_MAPPING = { - 'integer': 2, + 'integer': NUMBER_INTEGER, 'int': NUMBER_INTEGER, 'double': NUMBER_DOUBLE, 'float': NUMBER_DOUBLE, @@ -194,9 +194,13 @@ def get_parameter_type_name( """Gets the parameter type name.""" from kfp.pipeline_spec import pipeline_spec_pb2 - + param_enum_val = get_parameter_type(param_type) + if param_enum_val is None: + raise ValueError( + '`param_type` is not a parameter type. Cannot get ParameterType name.' + ) return pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name( - get_parameter_type(param_type)) + param_enum_val) class InconsistentTypeException(Exception): diff --git a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.py b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.py index 2deb7f0620..91ce665974 100644 --- a/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.py +++ b/sdk/python/test_data/pipelines/pipeline_with_google_artifact_type.py @@ -25,7 +25,9 @@ def create_temporary_google_artifact_package( import os import textwrap - class VertexModel: + from kfp import dsl + + class VertexModel(dsl.Artifact): schema_title = 'google.VertexModel' schema_version = '0.0.0' @@ -38,7 +40,7 @@ def __init__(self, name: str, uri: str, metadata: dict) -> None: def path(self) -> str: return self.uri.replace('gs://', '/') - class VertexDataset: + class VertexDataset(dsl.Artifact): schema_title = 'google.VertexDataset' schema_version = '0.0.0' @@ -51,7 +53,7 @@ def __init__(self, name: str, uri: str, metadata: dict) -> None: def path(self) -> str: return self.uri.replace('gs://', '/') - class_source = textwrap.dedent( + class_source = 'from kfp import dsl' + '\n\n' + textwrap.dedent( inspect.getsource(VertexModel)) + '\n\n' + textwrap.dedent( inspect.getsource(VertexDataset)) From 4945e2cfc5848898727e608ab2f9c607bb99dc92 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 19 Sep 2023 09:47:29 -0700 Subject: [PATCH 162/253] feat(components): Support multiple chunking functions PiperOrigin-RevId: 566660535 --- .../llm_embedding/evaluation_llm_embedding_pipeline.py | 9 +++++++++ .../llm_information_retrieval_preprocessor/component.py | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py index 899c43c78d..f4cbced09f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding/evaluation_llm_embedding_pipeline.py @@ -34,6 +34,7 @@ def evaluation_llm_embedding_pipeline( query_gcs_source: str, golden_docs_gcs_source: str, model_name: str, + embedding_chunking_function: str = 'langchain-RecursiveCharacterTextSplitter', embedding_chunk_size: int = 0, embedding_chunk_overlap: int = 0, embedding_retrieval_combination_function: str = 'max', @@ -65,6 +66,13 @@ def evaluation_llm_embedding_pipeline( golden_docs_gcs_source: The gcs location for csv file containing mapping of each query to the golden docs. model_name: The path for model to generate embeddings. + embedding_chunking_function: function used to split a document into chunks. + Supported values are `langchain-RecursiveCharacterTextSplitter` and + `sentence-splitter`. langchain-RecursiveCharacterTextSplitter: + langchain.text_splitter.RecursiveCharacterTextSplitter, with configurable + chunk_size and chunk_overlap. sentence-splitter: splitter that will not + break in the middle of a sentence. embedding_chunk_size and + embedding_chunk_overlap are measured by the number of tokens. embedding_chunk_size: The length of each document chunk. If 0, chunking is not enabled. embedding_chunk_overlap: The length of the overlap part between adjacent @@ -127,6 +135,7 @@ def evaluation_llm_embedding_pipeline( service_account=service_account, network=network, runner=runner, + embedding_chunking_function=embedding_chunking_function, embedding_chunk_size=embedding_chunk_size, embedding_chunk_overlap=embedding_chunk_overlap, dataflow_service_account=dataflow_service_account, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py index 8653b916c2..a17faa0ff2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -34,6 +34,7 @@ def llm_information_retrieval_preprocessor( corpus_gcs_source: str, query_gcs_source: str, golden_docs_gcs_source: str, + embedding_chunking_function: str = 'langchain-RecursiveCharacterTextSplitter', embedding_chunk_size: int = 0, embedding_chunk_overlap: int = 0, display_name: str = 'information-retrieval-preprocessor', @@ -63,6 +64,9 @@ def llm_information_retrieval_preprocessor( documents. golden_docs_gcs_source: Required. The path for csv file containing mapping of each query to the golden docs. + embedding_chunking_function: function used to split a document into + chunks. Supported values are `langchain-RecursiveCharacterTextSplitter` + and `sentence-splitter`. embedding_chunk_size: The length of each document chunk. If 0, chunking is not enabled. embedding_chunk_overlap: The length of the overlap part between adjacent @@ -138,6 +142,7 @@ def llm_information_retrieval_preprocessor( f'--predictions_query_gcs_source={predictions_query_gcs_source}', f'--predictions_corpus_gcs_source={predictions_corpus_gcs_source}', f'--embedding_retrieval_gcs_source={embedding_retrieval_gcs_source}', + f'--embedding_chunking_function={embedding_chunking_function}', f'--embedding_chunk_size={embedding_chunk_size}', f'--embedding_chunk_overlap={embedding_chunk_overlap}', f'--runner={runner}', From b31d8a57ef5db67a8cd782d7ab60f7e5b131ae7a Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 19 Sep 2023 09:49:49 -0700 Subject: [PATCH 163/253] feat(components): Update default image tag used by LLM implementation components PiperOrigin-RevId: 566661112 --- .../google_cloud_pipeline_components/_implementation/llm/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py index 967788f6eb..7b2bbfbe86 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py @@ -16,7 +16,7 @@ def get_private_image_tag() -> str: - return os.getenv('PRIVATE_IMAGE_TAG', 'live') + return os.getenv('PRIVATE_IMAGE_TAG', '20230918_1327_RC00') def get_use_test_machine_spec() -> bool: From adb86777a0c8bf8c28bb0cee1d936daf70d9a59f Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 19 Sep 2023 14:20:36 -0700 Subject: [PATCH 164/253] No public description PiperOrigin-RevId: 566742090 --- components/google-cloud/RELEASE.md | 1 + .../preview/automl/forecasting/__init__.py | 41 +++++++++++++++++++ .../learn_to_learn_forecasting_pipeline.yaml | 2 +- ...ence_to_sequence_forecasting_pipeline.yaml | 2 +- ...sion_transformer_forecasting_pipeline.yaml | 2 +- ...es_dense_encoder_forecasting_pipeline.yaml | 2 +- 6 files changed, 46 insertions(+), 4 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 21cd1a3136..1a98d48d10 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -4,6 +4,7 @@ * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` * Add feature_selection_pipeline to preview.automl.tabular. * Bump supported KFP versions to kfp>=2.0.0b10,<=2.2.0 +* Add `time_series_dense_encoder_forecasting_pipeline`, `learn_to_learn_forecasting_pipeline`, `sequence_to_sequence_forecasting_pipeline`, and `temporal_fusion_transformer_forecasting_pipeline` to `preview.automl.forecasting`. ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py index e6535b039a..917eb0e145 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/__init__.py @@ -12,13 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. """Experimental AutoML forecasting components.""" +import os from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_ensemble import automl_forecasting_ensemble as ForecastingEnsembleOp from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_1_tuner import automl_forecasting_stage_1_tuner as ForecastingStage1TunerOp from google_cloud_pipeline_components.preview.automl.forecasting.forecasting_stage_2_tuner import automl_forecasting_stage_2_tuner as ForecastingStage2TunerOp +from kfp import components __all__ = [ 'ForecastingStage1TunerOp', 'ForecastingEnsembleOp', 'ForecastingStage2TunerOp', + 'learn_to_learn_forecasting_pipeline', + 'sequence_to_sequence_forecasting_pipeline', + 'temporal_fusion_transformer_forecasting_pipeline', + 'time_series_dense_encoder_forecasting_pipeline', ] + +learn_to_learn_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), 'learn_to_learn_forecasting_pipeline.yaml' + ) +) + +sequence_to_sequence_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'sequence_to_sequence_forecasting_pipeline.yaml', + ) +) + +temporal_fusion_transformer_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'temporal_fusion_transformer_forecasting_pipeline.yaml', + ) +) + +time_series_dense_encoder_forecasting_pipeline = components.load_component_from_file( + # Note, please don't name it as `component.yaml` which will conflict with + # the generated file. + os.path.join( + os.path.dirname(__file__), + 'time_series_dense_encoder_forecasting_pipeline.yaml', + ) +) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml index a18391bf96..db07733ed7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: learn-to-learn-forecasting -# Description: The AutoML Forecasting pipeline. +# Description: Train a model using Tabular Workflows for Learn to Learn Forecasting pipelines. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml index 244c0d16d1..a56d98b784 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: sequence-to-sequence-forecasting -# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. +# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. Seq2seq is a simple model, and can be trained very fast, but accuracy is not its strength # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml index 15da388493..073c9cc94f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: temporal-fusion-transformer-forecasting -# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. +# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. The TFT model can produce the feature importance using the built-in mask for each feature, which leads to a very cheap prediction explanability overhead. With this model, the user won't need to explicitly enable the explanability support during serving to get the feature importance for each feature column. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml index 954d4f5ef5..5d23d21761 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: time-series-dense-encoder-forecasting -# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. +# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. TiDE is a new model type in Vertex Forecasting and has the best training and inference performance while not sacrificing any model quality. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] From e99f2704fc164039d9106a76223ee4abf9402bfb Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 20 Sep 2023 18:46:37 -0700 Subject: [PATCH 165/253] feat(sdk): support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` (#10010) * support taskfinalstatus in tasks that ignore upstream failure * address review feedback --- sdk/RELEASE.md | 1 + sdk/python/kfp/compiler/compiler_test.py | 135 ++++++++++++++++++ .../kfp/compiler/pipeline_spec_builder.py | 49 ++++++- sdk/python/kfp/dsl/pipeline_task.py | 2 + sdk/python/kfp/dsl/types/type_utils.py | 1 + 5 files changed, 182 insertions(+), 6 deletions(-) diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index ac81cc00bf..1d9e328fda 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -2,6 +2,7 @@ ## Features +* Support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` [\#10010](https://github.com/kubeflow/pipelines/pull/10010) ## Breaking changes diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 56407cd752..dcf68f17c5 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -3127,6 +3127,141 @@ def my_pipeline(sample_input1: str = 'message'): my_pipeline.pipeline_spec.root.dag.tasks['fail-op'].trigger_policy .strategy, 0) + def test_can_use_task_final_status(self): + + @dsl.component + def worker_component() -> str: + return 'hello' + + @dsl.component + def cancel_handler( + status: PipelineTaskFinalStatus, + text: str = '', + ): + print(text) + print(status) + + @dsl.pipeline + def my_pipeline(): + worker_task = worker_component() + exit_task = cancel_handler( + text=worker_task.output).ignore_upstream_failure() + + self.assertEqual( + my_pipeline.pipeline_spec.root.dag.tasks['cancel-handler'] + .trigger_policy.strategy, 2) + self.assertEqual( + my_pipeline.pipeline_spec.root.dag.tasks['cancel-handler'].inputs + .parameters['status'].task_final_status.producer_task, + 'worker-component') + + status_param = my_pipeline.pipeline_spec.components[ + 'comp-cancel-handler'].input_definitions.parameters['status'] + self.assertTrue(status_param.is_optional) + self.assertEqual(status_param.parameter_type, + type_utils.TASK_FINAL_STATUS) + + self.assertEqual( + my_pipeline.pipeline_spec.root.dag.tasks['worker-component'] + .trigger_policy.strategy, 0) + + def test_cannot_use_task_final_status_under_task_group(self): + + @dsl.component + def worker_component() -> str: + return 'hello' + + @dsl.component + def cancel_handler( + status: PipelineTaskFinalStatus, + text: str = '', + ): + print(text) + print(status) + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r"Tasks that use '\.ignore_upstream_failure\(\)' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task within the same control flow scope\. Got task 'cancel-handler' beneath a 'dsl\.Condition' that does not also contain the upstream dependent task\.", + ): + + @dsl.pipeline + def my_pipeline(): + worker_task = worker_component() + with dsl.Condition(worker_task.output == 'foo'): + exit_task = cancel_handler( + text=worker_task.output).ignore_upstream_failure() + + def test_cannot_use_final_task_status_if_zero_dependencies(self): + + @dsl.component + def worker_component() -> str: + return 'hello' + + @dsl.component + def cancel_handler(status: PipelineTaskFinalStatus,): + print(status) + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r"Tasks that use '\.ignore_upstream_failure\(\)' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task\. Got task 'cancel-handler with no upstream dependencies\.", + ): + + @dsl.pipeline + def my_pipeline(): + worker_task = worker_component() + exit_task = cancel_handler().ignore_upstream_failure() + + def test_cannot_use_task_final_status_if_more_than_one_dependency_implicit( + self): + + @dsl.component + def worker_component() -> str: + return 'hello' + + @dsl.component + def cancel_handler( + status: PipelineTaskFinalStatus, + a: str = '', + b: str = '', + ): + print(status) + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r"Tasks that use '\.ignore_upstream_failure\(\)' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task\. Got 2 dependent tasks: \['worker-component', 'worker-component-2']\.", + ): + + @dsl.pipeline + def my_pipeline(): + worker_task1 = worker_component() + worker_task2 = worker_component() + exit_task = cancel_handler( + a=worker_task1.output, + b=worker_task2.output).ignore_upstream_failure() + + def test_cannot_use_task_final_status_if_more_than_one_dependency_explicit( + self): + + @dsl.component + def worker_component() -> str: + return 'hello' + + @dsl.component + def cancel_handler(status: PipelineTaskFinalStatus,): + print(status) + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r"Tasks that use '\.ignore_upstream_failure\(\)' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task\. Got 2 dependent tasks: \['worker-component', 'worker-component-2']\.", + ): + + @dsl.pipeline + def my_pipeline(): + worker_task1 = worker_component() + worker_task2 = worker_component() + exit_task = cancel_handler().after( + worker_task1, worker_task2).ignore_upstream_failure() + def test_component_with_no_input_permitted(self): @dsl.component diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 83b55e3b61..3e242892d3 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -301,11 +301,6 @@ def build_task_spec_for_task( 'str, int, float, bool, dict, and list.' f'Got {input_value} of type {type(input_value)}.') - if task._ignore_upstream_failure_tag: - pipeline_task_spec.trigger_policy.strategy = ( - pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy.TriggerStrategy - .ALL_UPSTREAM_TASKS_COMPLETED) - return pipeline_task_spec @@ -339,7 +334,8 @@ def build_component_spec_for_task( """ for input_name, input_spec in (task.component_spec.inputs or {}).items(): if not is_exit_task and type_utils.is_task_final_status_type( - input_spec.type) and not is_compiled_component: + input_spec.type + ) and not is_compiled_component and not task._ignore_upstream_failure_tag: raise ValueError( f'PipelineTaskFinalStatus can only be used in an exit task. Parameter {input_name} of a non exit task has type PipelineTaskFinalStatus.' ) @@ -1302,6 +1298,11 @@ def build_spec_by_group( subgroup_task_spec.dependent_tasks.extend( [utils.sanitize_task_name(dep) for dep in group_dependencies]) + # Modify the task inputs for PipelineTaskFinalStatus if ignore_upstream_failure is used + # Must be done after dependencies are added + if isinstance(subgroup, pipeline_task.PipelineTask): + modify_task_for_ignore_upstream_failure( + task=subgroup, pipeline_task_spec=subgroup_task_spec) # Add component spec subgroup_component_name = utils.make_name_unique_by_adding_index( name=subgroup_component_name, @@ -1328,6 +1329,42 @@ def build_spec_by_group( ) +def modify_task_for_ignore_upstream_failure( + task: pipeline_task.PipelineTask, + pipeline_task_spec: pipeline_spec_pb2.PipelineTaskSpec, +): + if task._ignore_upstream_failure_tag: + pipeline_task_spec.trigger_policy.strategy = ( + pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy.TriggerStrategy + .ALL_UPSTREAM_TASKS_COMPLETED) + + for input_name, input_spec in (task.component_spec.inputs or + {}).items(): + if not type_utils.is_task_final_status_type(input_spec.type): + continue + + if len(pipeline_task_spec.dependent_tasks) == 0: + if task.parent_task_group.group_type == tasks_group.TasksGroupType.PIPELINE: + raise compiler_utils.InvalidTopologyException( + f"Tasks that use '.ignore_upstream_failure()' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task. Got task '{pipeline_task_spec.task_info.name} with no upstream dependencies." + ) + else: + # TODO: permit additional PipelineTaskFinalStatus flexibility by "punching the hole" through Condition and ParallelFor groups + raise compiler_utils.InvalidTopologyException( + f"Tasks that use '.ignore_upstream_failure()' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task within the same control flow scope. Got task '{pipeline_task_spec.task_info.name}' beneath a 'dsl.{group_type_to_dsl_class[task.parent_task_group.group_type].__name__}' that does not also contain the upstream dependent task." + ) + + # if >1 dependent task, ambiguous to which upstream task the PipelineTaskFinalStatus should correspond, since there is no ExitHandler that bundles these together + if len(pipeline_task_spec.dependent_tasks) > 1: + raise compiler_utils.InvalidTopologyException( + f"Tasks that use '.ignore_upstream_failure()' and 'PipelineTaskFinalStatus' must have exactly one dependent upstream task. Got {len(pipeline_task_spec.dependent_tasks)} dependent tasks: {pipeline_task_spec.dependent_tasks}." + ) + + pipeline_task_spec.inputs.parameters[ + input_name].task_final_status.producer_task = pipeline_task_spec.dependent_tasks[ + 0] + + def platform_config_to_platform_spec( platform_config: dict, executor_label: str, diff --git a/sdk/python/kfp/dsl/pipeline_task.py b/sdk/python/kfp/dsl/pipeline_task.py index f35cdd752b..ecf1640a88 100644 --- a/sdk/python/kfp/dsl/pipeline_task.py +++ b/sdk/python/kfp/dsl/pipeline_task.py @@ -604,6 +604,8 @@ def my_pipeline(text: str = 'message'): for input_spec_name, input_spec in (self.component_spec.inputs or {}).items(): + if type_utils.is_task_final_status_type(input_spec.type): + continue argument_value = self._inputs[input_spec_name] if (isinstance(argument_value, pipeline_channel.PipelineChannel) ) and (not input_spec.optional) and (argument_value.task_name diff --git a/sdk/python/kfp/dsl/types/type_utils.py b/sdk/python/kfp/dsl/types/type_utils.py index 324c971459..cd84a37041 100644 --- a/sdk/python/kfp/dsl/types/type_utils.py +++ b/sdk/python/kfp/dsl/types/type_utils.py @@ -53,6 +53,7 @@ BOOLEAN = 4 LIST = 5 STRUCT = 6 +TASK_FINAL_STATUS = 7 PARAMETER_TYPES_MAPPING = { 'integer': NUMBER_INTEGER, 'int': NUMBER_INTEGER, From 434b41a19c983432e5f1ba218ac29e5075604db9 Mon Sep 17 00:00:00 2001 From: David van der Spek <28541758+DavidSpek@users.noreply.github.com> Date: Thu, 21 Sep 2023 22:35:37 +0200 Subject: [PATCH 166/253] fix(backend): update requirements scripts (#10009) * fix: update requirements scripts Signed-off-by: David van der Spek * fix: visualization dependency hell Signed-off-by: David van der Spek * bump snapshottest Signed-off-by: David van der Spek * fix: pin jinja2 Signed-off-by: David van der Spek * fix: pin markupsafe to fix ci Signed-off-by: David van der Spek --------- Signed-off-by: David van der Spek --- backend/Dockerfile.visualization | 2 +- backend/README.md | 7 + backend/metadata_writer/Dockerfile | 2 +- backend/metadata_writer/README.md | 3 +- backend/metadata_writer/requirements.txt | 85 ++- .../metadata_writer/update_requirements.sh | 5 + backend/requirements.txt | 43 +- .../visualization/requirements-test.txt | 2 +- .../apiserver/visualization/requirements.in | 14 +- .../apiserver/visualization/requirements.txt | 711 ++++++++++++++---- .../visualization/update_requirements.sh | 4 +- backend/update_requirements.sh | 5 + hack/update-all-requirements.sh | 2 + hack/update-requirements.sh | 6 +- 14 files changed, 685 insertions(+), 206 deletions(-) create mode 100755 backend/metadata_writer/update_requirements.sh create mode 100755 backend/update_requirements.sh diff --git a/backend/Dockerfile.visualization b/backend/Dockerfile.visualization index 4bcdcb03a0..f1e0d8d0d1 100644 --- a/backend/Dockerfile.visualization +++ b/backend/Dockerfile.visualization @@ -18,7 +18,7 @@ # and exporter.py files in the directory specified above. # This image should be in sync with image in backend/src/apiserver/visualization/update_requirements.sh. -FROM tensorflow/tensorflow:2.5.1 +FROM tensorflow/tensorflow:2.10.1 RUN apt-get update \ && apt-get install -y wget curl tar openssl diff --git a/backend/README.md b/backend/README.md index ecbad2f390..853d83dd6b 100644 --- a/backend/README.md +++ b/backend/README.md @@ -55,6 +55,13 @@ need to be regenerated and checked-in. Refer to [backend/api](./api/README.md) f make all ``` +## Updating python dependencies + +[pip-tools](https://github.com/jazzband/pip-tools) is used to manage python +dependencies. To update dependencies, edit [requirements.in](requirements.in) +and run `./update_requirements.sh` to update and pin the transitive +dependencies. + # Visualization Server Instructions ## Updating python dependencies diff --git a/backend/metadata_writer/Dockerfile b/backend/metadata_writer/Dockerfile index 6111175f94..63394179ec 100644 --- a/backend/metadata_writer/Dockerfile +++ b/backend/metadata_writer/Dockerfile @@ -1,5 +1,5 @@ # ml-metadata package depends on tensorflow package -FROM python:3.7 +FROM python:3.8 COPY backend/metadata_writer/requirements.txt /kfp/metadata_writer/ RUN python3 -m pip install -r /kfp/metadata_writer/requirements.txt diff --git a/backend/metadata_writer/README.md b/backend/metadata_writer/README.md index 82cccca60f..eb8166edd6 100644 --- a/backend/metadata_writer/README.md +++ b/backend/metadata_writer/README.md @@ -4,6 +4,5 @@ [pip-tools](https://github.com/jazzband/pip-tools) is used to manage python dependencies. To update dependencies, edit [requirements.in](requirements.in) -and run `../update_requirements.sh python:3.7 requirements.txt` to update and pin the transitive +and run `./update_requirements.sh` to update and pin the transitive dependencies. - diff --git a/backend/metadata_writer/requirements.txt b/backend/metadata_writer/requirements.txt index cc3c61470e..071621f3a8 100644 --- a/backend/metadata_writer/requirements.txt +++ b/backend/metadata_writer/requirements.txt @@ -1,32 +1,67 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile --output-file=- - # -absl-py==0.12.0 # via ml-metadata -attrs==20.3.0 # via ml-metadata -cachetools==5.0.0 # via google-auth -certifi==2021.10.8 # via kubernetes, requests -charset-normalizer==2.0.10 # via requests -google-auth==2.4.1 # via kubernetes -grpcio==1.43.0 # via ml-metadata -idna==3.3 # via requests -kubernetes==10.1.0 # via -r - -lru-dict==1.1.7 # via -r - -ml-metadata==1.5.0 # via -r - -oauthlib==3.1.1 # via requests-oauthlib -protobuf==3.19.3 # via ml-metadata -pyasn1-modules==0.2.8 # via google-auth -pyasn1==0.4.8 # via pyasn1-modules, rsa -python-dateutil==2.8.2 # via kubernetes -pyyaml==3.13 # via kubernetes -requests-oauthlib==1.3.0 # via kubernetes -requests==2.27.1 # via kubernetes, requests-oauthlib -rsa==4.8 # via google-auth -six==1.16.0 # via absl-py, google-auth, grpcio, kubernetes, ml-metadata, python-dateutil -urllib3==1.26.8 # via kubernetes, requests -websocket-client==1.2.3 # via kubernetes +absl-py==1.4.0 + # via ml-metadata +attrs==21.4.0 + # via ml-metadata +cachetools==5.3.1 + # via google-auth +certifi==2023.7.22 + # via + # kubernetes + # requests +charset-normalizer==3.2.0 + # via requests +google-auth==2.23.0 + # via kubernetes +grpcio==1.58.0 + # via ml-metadata +idna==3.4 + # via requests +kubernetes==10.1.0 + # via -r - +lru-dict==1.2.0 + # via -r - +ml-metadata==1.14.0 + # via -r - +oauthlib==3.2.2 + # via requests-oauthlib +protobuf==3.20.3 + # via ml-metadata +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +python-dateutil==2.8.2 + # via kubernetes +pyyaml==3.13 + # via kubernetes +requests==2.31.0 + # via + # kubernetes + # requests-oauthlib +requests-oauthlib==1.3.1 + # via kubernetes +rsa==4.9 + # via google-auth +six==1.16.0 + # via + # kubernetes + # ml-metadata + # python-dateutil +urllib3==1.26.16 + # via + # google-auth + # kubernetes + # requests +websocket-client==1.6.3 + # via kubernetes # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/backend/metadata_writer/update_requirements.sh b/backend/metadata_writer/update_requirements.sh new file mode 100755 index 0000000000..f63fa4972d --- /dev/null +++ b/backend/metadata_writer/update_requirements.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# This image should be in sync with Dockerfile. +IMAGE="python:3.8" +../../hack/update-requirements.sh $IMAGE requirements.txt diff --git a/backend/requirements.txt b/backend/requirements.txt index 00191b2288..68cfc0f33f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,53 +1,55 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.7 # by the following command: # -# pip-compile --no-emit-index-url requirements.in +# pip-compile --output-file=- - # cachetools==5.3.1 # via google-auth -certifi==2023.5.7 +certifi==2023.7.22 # via # kfp-server-api # kubernetes # requests -charset-normalizer==3.1.0 +charset-normalizer==3.2.0 # via requests -click==8.1.3 +click==8.1.7 # via kfp docstring-parser==0.15 # via kfp -google-api-core==2.11.0 +google-api-core==2.11.1 # via # google-cloud-core # google-cloud-storage # kfp -google-auth==2.19.1 +google-auth==2.23.0 # via # google-api-core # google-cloud-core # google-cloud-storage # kfp # kubernetes -google-cloud-core==2.3.2 +google-cloud-core==2.3.3 # via google-cloud-storage -google-cloud-storage==2.9.0 +google-cloud-storage==2.11.0 # via kfp google-crc32c==1.5.0 # via google-resumable-media -google-resumable-media==2.5.0 +google-resumable-media==2.6.0 # via google-cloud-storage -googleapis-common-protos==1.59.0 +googleapis-common-protos==1.60.0 # via google-api-core idna==3.4 # via requests -kfp==2.0.0rc1 - # via -r requirements.in +importlib-metadata==6.7.0 + # via click +kfp==2.0.1 + # via -r - kfp-pipeline-spec==0.2.2 # via kfp -kfp-server-api==2.0.0-rc.1 +kfp-server-api==2.0.1 # via kfp -kubernetes==23.6.0 +kubernetes==26.1.0 # via kfp oauthlib==3.2.2 # via requests-oauthlib @@ -67,7 +69,7 @@ python-dateutil==2.8.2 # via # kfp-server-api # kubernetes -pyyaml==6.0 +pyyaml==6.0.1 # via # kfp # kubernetes @@ -86,12 +88,15 @@ rsa==4.9 # via google-auth six==1.16.0 # via - # google-auth # kfp-server-api # kubernetes # python-dateutil tabulate==0.9.0 # via kfp +typing-extensions==4.7.1 + # via + # importlib-metadata + # kfp urllib3==1.26.16 # via # google-auth @@ -99,8 +104,10 @@ urllib3==1.26.16 # kfp-server-api # kubernetes # requests -websocket-client==1.5.2 +websocket-client==1.6.1 # via kubernetes +zipp==3.15.0 + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/backend/src/apiserver/visualization/requirements-test.txt b/backend/src/apiserver/visualization/requirements-test.txt index d9d5fbfd16..dc9791f7f2 100644 --- a/backend/src/apiserver/visualization/requirements-test.txt +++ b/backend/src/apiserver/visualization/requirements-test.txt @@ -1 +1 @@ -snapshottest==0.5.1 +snapshottest==0.6.0 diff --git a/backend/src/apiserver/visualization/requirements.in b/backend/src/apiserver/visualization/requirements.in index a5b6919c07..61ebd737ba 100644 --- a/backend/src/apiserver/visualization/requirements.in +++ b/backend/src/apiserver/visualization/requirements.in @@ -4,14 +4,16 @@ google-api-python-client==1.7.* itables==0.1.0 ipykernel==5.1.1 ipython==7.12.0 +jinja2==2.11.3 jupyter_client==5.3.* +markupsafe==2.0.1 nbconvert==5.5.0 nbformat==4.4.0 -scikit_learn==0.21.2 -tensorflow==2.5.1 -tensorflow-metadata==1.2.* -tensorflow-model-analysis==0.33.* -tensorflow-data-validation==1.2.* -tensorflow-serving-api==2.5.1 +scikit-learn==0.24.2 +tensorflow==2.10.1 +tensorflow-metadata==1.9.* +tensorflow-model-analysis==0.40.* +tensorflow-data-validation==1.9.* +tensorflow-serving-api==2.10.1 tornado==6.* mistune<2.0.0 \ No newline at end of file diff --git a/backend/src/apiserver/visualization/requirements.txt b/backend/src/apiserver/visualization/requirements.txt index 1e0e6dd825..bd95a01914 100644 --- a/backend/src/apiserver/visualization/requirements.txt +++ b/backend/src/apiserver/visualization/requirements.txt @@ -1,153 +1,570 @@ # -# This file is autogenerated by pip-compile -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # -# pip-compile --output-file=- - +# pip-compile --output-file=- --resolver=backtracking - # -absl-py==0.12.0 # via tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tfx-bsl -apache-beam[gcp]==2.34.0 # via tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -argon2-cffi-bindings==21.2.0 # via argon2-cffi -argon2-cffi==21.3.0 # via notebook -astunparse==1.6.3 # via tensorflow -attrs==21.2.0 # via jsonschema -avro-python3==1.9.2.1 # via apache-beam -backcall==0.2.0 # via ipython -bleach==4.1.0 # via nbconvert -bokeh==1.2.0 # via -r - -cached-property==1.5.2 # via h5py -cachetools==4.2.4 # via apache-beam, google-auth -certifi==2021.10.8 # via requests -cffi==1.15.0 # via argon2-cffi-bindings -charset-normalizer==2.0.9 # via requests -crcmod==1.7 # via apache-beam -dataclasses==0.8 # via apache-beam, argon2-cffi, libcst, werkzeug -decorator==5.1.0 # via gcsfs, ipython, traitlets -defusedxml==0.7.1 # via nbconvert -dill==0.3.1.1 # via apache-beam -docopt==0.6.2 # via hdfs -entrypoints==0.3 # via nbconvert -fastavro==1.4.7 # via apache-beam -fasteners==0.16.3 # via google-apitools -flatbuffers==1.12 # via tensorflow -future==0.18.2 # via apache-beam -gast==0.4.0 # via tensorflow -gcsfs==0.2.3 # via -r - -google-api-core[grpc,grpcgcp]==1.31.5 # via google-cloud-bigquery, google-cloud-bigquery-storage, google-cloud-bigtable, google-cloud-core, google-cloud-datastore, google-cloud-dlp, google-cloud-language, google-cloud-pubsub, google-cloud-recommendations-ai, google-cloud-spanner, google-cloud-videointelligence, google-cloud-vision -google-api-python-client==1.7.12 # via -r -, tfx-bsl -google-apitools==0.5.31 # via apache-beam -google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.4.6 # via gcsfs, tensorboard -google-auth==1.35.0 # via apache-beam, gcsfs, google-api-core, google-api-python-client, google-auth-httplib2, google-auth-oauthlib, google-cloud-core, tensorboard -google-cloud-bigquery-storage==2.10.1 # via apache-beam -google-cloud-bigquery==2.20.0 # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -google-cloud-bigtable==1.7.0 # via apache-beam -google-cloud-core==1.7.2 # via apache-beam, google-cloud-bigquery, google-cloud-bigtable, google-cloud-datastore, google-cloud-spanner -google-cloud-datastore==1.15.3 # via apache-beam -google-cloud-dlp==1.0.0 # via apache-beam -google-cloud-language==1.3.0 # via apache-beam -google-cloud-pubsub==1.7.0 # via apache-beam -google-cloud-recommendations-ai==0.2.0 # via apache-beam -google-cloud-spanner==1.19.1 # via apache-beam -google-cloud-videointelligence==1.16.1 # via apache-beam -google-cloud-vision==1.0.0 # via apache-beam -google-crc32c==1.3.0 # via google-resumable-media -google-pasta==0.2.0 # via tensorflow -google-resumable-media==1.3.3 # via google-cloud-bigquery -googleapis-common-protos[grpc]==1.54.0 # via google-api-core, grpc-google-iam-v1, tensorflow-metadata -grpc-google-iam-v1==0.12.3 # via google-cloud-bigtable, google-cloud-pubsub, google-cloud-spanner -grpcio-gcp==0.2.2 # via apache-beam, google-api-core -grpcio==1.34.1 # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, grpcio-gcp, tensorboard, tensorflow, tensorflow-serving-api -h5py==3.1.0 # via tensorflow -hdfs==2.6.0 # via apache-beam -httplib2==0.19.1 # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client -idna==3.3 # via requests -importlib-metadata==4.8.3 # via jsonschema, markdown -ipykernel==5.1.1 # via -r -, ipywidgets, notebook -ipython-genutils==0.2.0 # via ipywidgets, nbformat, notebook, traitlets -ipython==7.12.0 # via -r -, ipykernel, ipywidgets, tensorflow-model-analysis -ipywidgets==7.6.5 # via tensorflow-model-analysis -itables==0.1.0 # via -r - -jedi==0.18.1 # via ipython -jinja2==3.0.3 # via bokeh, nbconvert, notebook -joblib==0.14.1 # via scikit-learn, tensorflow-data-validation -jsonschema==3.2.0 # via nbformat -jupyter-client==5.3.5 # via -r -, ipykernel, notebook -jupyter-core==4.9.1 # via jupyter-client, nbconvert, nbformat, notebook -jupyterlab-widgets==1.0.2 # via ipywidgets -keras-nightly==2.5.0.dev2021032900 # via tensorflow -keras-preprocessing==1.1.2 # via tensorflow -libcst==0.3.23 # via google-cloud-bigquery-storage -markdown==3.3.6 # via tensorboard -markupsafe==2.0.1 # via jinja2 -mistune==0.8.4 # via -r -, nbconvert -mypy-extensions==0.4.3 # via typing-inspect -nbconvert==5.5.0 # via -r -, notebook -nbformat==4.4.0 # via -r -, ipywidgets, nbconvert, notebook -nest-asyncio==1.5.4 # via notebook -notebook==6.4.6 # via widgetsnbextension -numpy==1.19.5 # via apache-beam, bokeh, h5py, keras-preprocessing, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -oauth2client==4.1.3 # via apache-beam, google-apitools -oauthlib==3.1.1 # via requests-oauthlib -opt-einsum==3.3.0 # via tensorflow -orjson==3.6.1 # via apache-beam -packaging==21.3 # via bleach, bokeh, google-api-core, google-cloud-bigquery -pandas==1.1.5 # via itables, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -pandocfilters==1.5.0 # via nbconvert -parso==0.8.3 # via jedi -pexpect==4.8.0 # via ipython -pickleshare==0.7.5 # via ipython -pillow==8.4.0 # via bokeh -prometheus-client==0.12.0 # via notebook -prompt-toolkit==3.0.24 # via ipython -proto-plus==1.19.8 # via google-cloud-bigquery, google-cloud-bigquery-storage, google-cloud-recommendations-ai -protobuf==3.19.1 # via apache-beam, google-api-core, google-cloud-bigquery, googleapis-common-protos, proto-plus, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl -ptyprocess==0.7.0 # via pexpect, terminado -pyarrow==2.0.0 # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -pyasn1-modules==0.2.8 # via google-auth, oauth2client -pyasn1==0.4.8 # via oauth2client, pyasn1-modules, rsa -pycparser==2.21 # via cffi -pydot==1.4.2 # via apache-beam -pygments==2.10.0 # via ipython, nbconvert -pymongo==3.12.3 # via apache-beam -pyparsing==2.4.7 # via httplib2, packaging, pydot -pyrsistent==0.18.0 # via jsonschema -python-dateutil==2.8.2 # via apache-beam, bokeh, jupyter-client, pandas -pytz==2021.3 # via apache-beam, google-api-core, pandas -pyyaml==6.0 # via bokeh, libcst -pyzmq==22.3.0 # via jupyter-client, notebook -requests-oauthlib==1.3.0 # via google-auth-oauthlib -requests==2.26.0 # via apache-beam, gcsfs, google-api-core, google-cloud-bigquery, hdfs, requests-oauthlib, tensorboard -rsa==4.8 # via google-auth, oauth2client -scikit_learn==0.21.2 # via -r - -scipy==1.5.4 # via scikit-learn, tensorflow-model-analysis -send2trash==1.8.0 # via notebook -six==1.15.0 # via absl-py, astunparse, bleach, bokeh, fasteners, google-api-core, google-api-python-client, google-apitools, google-auth, google-auth-httplib2, google-cloud-core, google-pasta, google-resumable-media, grpcio, hdfs, jsonschema, keras-preprocessing, oauth2client, python-dateutil, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, traitlets -tensorboard-data-server==0.6.1 # via tensorboard -tensorboard-plugin-wit==1.8.0 # via tensorboard -tensorboard==2.7.0 # via tensorflow -tensorflow-data-validation==1.2.0 # via -r - -tensorflow-estimator==2.5.0 # via tensorflow -tensorflow-metadata==1.2.0 # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl -tensorflow-model-analysis==0.33.0 # via -r - -tensorflow-serving-api==2.5.1 # via -r -, tfx-bsl -tensorflow==2.5.1 # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl -termcolor==1.1.0 # via tensorflow -terminado==0.12.1 # via notebook -testpath==0.5.0 # via nbconvert -tfx-bsl==1.2.0 # via tensorflow-data-validation, tensorflow-model-analysis -tornado==6.1 # via -r -, bokeh, ipykernel, jupyter-client, notebook, terminado -traitlets==4.3.3 # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook -typing-extensions==3.7.4.3 # via apache-beam, argon2-cffi, importlib-metadata, libcst, tensorflow, typing-inspect -typing-inspect==0.7.1 # via libcst -uritemplate==3.0.1 # via google-api-python-client -urllib3==1.26.7 # via requests -wcwidth==0.2.5 # via prompt-toolkit -webencodings==0.5.1 # via bleach -werkzeug==2.0.2 # via tensorboard -wheel==0.37.1 # via astunparse, tensorboard, tensorflow -widgetsnbextension==3.5.2 # via ipywidgets -wrapt==1.12.1 # via tensorflow -zipp==3.6.0 # via importlib-metadata +absl-py==1.4.0 + # via + # tensorboard + # tensorflow + # tensorflow-data-validation + # tensorflow-metadata + # tensorflow-model-analysis + # tfx-bsl +apache-beam[gcp]==2.46.0 + # via + # tensorflow-data-validation + # tensorflow-model-analysis + # tfx-bsl +argon2-cffi==23.1.0 + # via + # jupyter-server + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +astunparse==1.6.3 + # via tensorflow +attrs==23.1.0 + # via + # jsonschema + # referencing +backcall==0.2.0 + # via ipython +bleach==6.0.0 + # via nbconvert +bokeh==1.2.0 + # via -r - +cachetools==4.2.4 + # via + # apache-beam + # google-auth +certifi==2023.7.22 + # via requests +cffi==1.15.1 + # via argon2-cffi-bindings +charset-normalizer==3.2.0 + # via requests +cloudpickle==2.2.1 + # via apache-beam +comm==0.1.4 + # via ipywidgets +crcmod==1.7 + # via apache-beam +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +dill==0.3.1.1 + # via apache-beam +docopt==0.6.2 + # via hdfs +entrypoints==0.4 + # via nbconvert +fastavro==1.8.3 + # via apache-beam +fasteners==0.19 + # via + # apache-beam + # google-apitools +flatbuffers==23.5.26 + # via tensorflow +gast==0.4.0 + # via tensorflow +gcsfs==0.2.3 + # via -r - +google-api-core[grpc]==2.11.1 + # via + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-bigtable + # google-cloud-core + # google-cloud-datastore + # google-cloud-dlp + # google-cloud-language + # google-cloud-pubsub + # google-cloud-pubsublite + # google-cloud-recommendations-ai + # google-cloud-spanner + # google-cloud-videointelligence + # google-cloud-vision +google-api-python-client==1.7.12 + # via + # -r - + # tfx-bsl +google-apitools==0.5.31 + # via apache-beam +google-auth==2.23.0 + # via + # apache-beam + # gcsfs + # google-api-core + # google-api-python-client + # google-auth-httplib2 + # google-auth-oauthlib + # google-cloud-core + # tensorboard +google-auth-httplib2==0.1.1 + # via + # apache-beam + # google-api-python-client +google-auth-oauthlib==0.4.6 + # via + # gcsfs + # tensorboard +google-cloud-bigquery==3.11.4 + # via apache-beam +google-cloud-bigquery-storage==2.16.2 + # via apache-beam +google-cloud-bigtable==1.7.3 + # via apache-beam +google-cloud-core==2.3.3 + # via + # apache-beam + # google-cloud-bigquery + # google-cloud-bigtable + # google-cloud-datastore + # google-cloud-spanner +google-cloud-datastore==1.15.5 + # via apache-beam +google-cloud-dlp==3.12.3 + # via apache-beam +google-cloud-language==1.3.2 + # via apache-beam +google-cloud-pubsub==2.18.4 + # via + # apache-beam + # google-cloud-pubsublite +google-cloud-pubsublite==1.8.3 + # via apache-beam +google-cloud-recommendations-ai==0.7.1 + # via apache-beam +google-cloud-spanner==3.40.1 + # via apache-beam +google-cloud-videointelligence==1.16.3 + # via apache-beam +google-cloud-vision==3.4.4 + # via apache-beam +google-crc32c==1.5.0 + # via google-resumable-media +google-pasta==0.2.0 + # via tensorflow +google-resumable-media==2.6.0 + # via google-cloud-bigquery +googleapis-common-protos[grpc]==1.60.0 + # via + # google-api-core + # grpc-google-iam-v1 + # grpcio-status + # tensorflow-metadata +grpc-google-iam-v1==0.12.6 + # via + # google-cloud-bigtable + # google-cloud-pubsub + # google-cloud-spanner +grpcio==1.58.0 + # via + # apache-beam + # google-api-core + # google-cloud-bigquery + # google-cloud-pubsub + # google-cloud-pubsublite + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status + # tensorboard + # tensorflow + # tensorflow-serving-api +grpcio-status==1.48.2 + # via + # google-api-core + # google-cloud-pubsub + # google-cloud-pubsublite +h5py==3.9.0 + # via tensorflow +hdfs==2.7.2 + # via apache-beam +httplib2==0.21.0 + # via + # apache-beam + # google-api-python-client + # google-apitools + # google-auth-httplib2 + # oauth2client +idna==3.4 + # via requests +importlib-metadata==6.8.0 + # via markdown +importlib-resources==6.1.0 + # via + # jsonschema + # jsonschema-specifications +ipykernel==5.1.1 + # via + # -r - + # notebook +ipython==7.12.0 + # via + # -r - + # ipykernel + # ipywidgets + # tensorflow-model-analysis +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.8.1 + # via tensorflow-model-analysis +itables==0.1.0 + # via -r - +jedi==0.19.0 + # via ipython +jinja2==2.11.3 + # via + # -r - + # bokeh + # nbconvert + # notebook +joblib==0.14.1 + # via + # scikit-learn + # tensorflow-data-validation +jsonschema==4.19.1 + # via nbformat +jsonschema-specifications==2023.7.1 + # via jsonschema +jupyter-client==5.3.5 + # via + # -r - + # ipykernel + # notebook +jupyter-core==5.3.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-widgets==1.1.7 + # via ipywidgets +keras==2.10.0 + # via tensorflow +keras-preprocessing==1.1.2 + # via tensorflow +libclang==16.0.6 + # via tensorflow +markdown==3.4.4 + # via tensorboard +markupsafe==2.0.1 + # via + # -r - + # jinja2 +mistune==0.8.4 + # via + # -r - + # nbconvert +nbconvert==5.5.0 + # via + # -r - + # notebook +nbformat==4.4.0 + # via + # -r - + # nbconvert + # notebook +nest-asyncio==1.5.8 + # via notebook +notebook==6.4.13 + # via widgetsnbextension +numpy==1.24.4 + # via + # apache-beam + # bokeh + # h5py + # keras-preprocessing + # opt-einsum + # pandas + # pyarrow + # scikit-learn + # scipy + # tensorboard + # tensorflow + # tensorflow-data-validation + # tensorflow-model-analysis + # tfx-bsl +oauth2client==4.1.3 + # via google-apitools +oauthlib==3.2.2 + # via requests-oauthlib +objsize==0.6.1 + # via apache-beam +opt-einsum==3.3.0 + # via tensorflow +orjson==3.9.7 + # via apache-beam +overrides==6.5.0 + # via google-cloud-pubsublite +packaging==23.1 + # via + # bokeh + # google-cloud-bigquery + # tensorflow +pandas==1.5.3 + # via + # itables + # tensorflow-data-validation + # tensorflow-model-analysis + # tfx-bsl +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +pillow==10.0.1 + # via bokeh +pkgutil-resolve-name==1.3.10 + # via jsonschema +platformdirs==3.10.0 + # via jupyter-core +prometheus-client==0.17.1 + # via notebook +prompt-toolkit==3.0.39 + # via ipython +proto-plus==1.22.3 + # via + # apache-beam + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-dlp + # google-cloud-pubsub + # google-cloud-recommendations-ai + # google-cloud-spanner + # google-cloud-vision +protobuf==3.19.6 + # via + # apache-beam + # google-api-core + # google-cloud-bigquery + # google-cloud-bigquery-storage + # google-cloud-bigtable + # google-cloud-datastore + # google-cloud-dlp + # google-cloud-language + # google-cloud-pubsub + # google-cloud-recommendations-ai + # google-cloud-spanner + # google-cloud-videointelligence + # google-cloud-vision + # googleapis-common-protos + # grpc-google-iam-v1 + # grpcio-status + # proto-plus + # tensorboard + # tensorflow + # tensorflow-data-validation + # tensorflow-metadata + # tensorflow-model-analysis + # tensorflow-serving-api + # tfx-bsl +ptyprocess==0.7.0 + # via + # pexpect + # terminado +pyarrow==5.0.0 + # via + # apache-beam + # tensorflow-data-validation + # tensorflow-model-analysis + # tfx-bsl +pyasn1==0.5.0 + # via + # oauth2client + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via + # google-auth + # oauth2client +pycparser==2.21 + # via cffi +pydot==1.4.2 + # via apache-beam +pyfarmhash==0.3.2 + # via tensorflow-data-validation +pygments==2.16.1 + # via + # ipython + # nbconvert +pymongo==3.13.0 + # via apache-beam +pyparsing==3.1.1 + # via + # httplib2 + # pydot +python-dateutil==2.8.2 + # via + # apache-beam + # bokeh + # google-cloud-bigquery + # jupyter-client + # pandas +pytz==2023.3.post1 + # via + # apache-beam + # pandas +pyyaml==6.0.1 + # via bokeh +pyzmq==25.1.1 + # via + # jupyter-client + # notebook +referencing==0.30.2 + # via + # jsonschema + # jsonschema-specifications +regex==2023.8.8 + # via apache-beam +requests==2.31.0 + # via + # apache-beam + # gcsfs + # google-api-core + # google-cloud-bigquery + # hdfs + # requests-oauthlib + # tensorboard +requests-oauthlib==1.3.1 + # via google-auth-oauthlib +rpds-py==0.10.3 + # via + # jsonschema + # referencing +rsa==4.9 + # via + # google-auth + # oauth2client +scikit-learn==0.24.2 + # via -r - +scipy==1.10.1 + # via + # scikit-learn + # tensorflow-model-analysis +send2trash==1.8.2 + # via notebook +six==1.16.0 + # via + # astunparse + # bleach + # bokeh + # google-api-python-client + # google-apitools + # google-pasta + # hdfs + # keras-preprocessing + # oauth2client + # python-dateutil + # tensorflow + # tensorflow-data-validation + # tensorflow-model-analysis +sqlparse==0.4.4 + # via google-cloud-spanner +tensorboard==2.10.1 + # via tensorflow +tensorboard-data-server==0.6.1 + # via tensorboard +tensorboard-plugin-wit==1.8.1 + # via tensorboard +tensorflow==2.10.1 + # via + # -r - + # tensorflow-data-validation + # tensorflow-model-analysis + # tensorflow-serving-api + # tfx-bsl +tensorflow-data-validation==1.9.0 + # via -r - +tensorflow-estimator==2.10.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.34.0 + # via tensorflow +tensorflow-metadata==1.9.0 + # via + # -r - + # tensorflow-data-validation + # tensorflow-model-analysis + # tfx-bsl +tensorflow-model-analysis==0.40.0 + # via -r - +tensorflow-serving-api==2.10.1 + # via + # -r - + # tfx-bsl +termcolor==2.3.0 + # via tensorflow +terminado==0.17.1 + # via notebook +testpath==0.6.0 + # via nbconvert +tfx-bsl==1.9.0 + # via + # tensorflow-data-validation + # tensorflow-model-analysis +threadpoolctl==3.2.0 + # via scikit-learn +tornado==6.3.3 + # via + # -r - + # bokeh + # ipykernel + # jupyter-client + # notebook + # terminado +traitlets==5.10.0 + # via + # comm + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # nbconvert + # nbformat + # notebook +typing-extensions==4.8.0 + # via + # apache-beam + # tensorflow +uritemplate==3.0.1 + # via google-api-python-client +urllib3==1.26.16 + # via + # google-auth + # requests +wcwidth==0.2.6 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +werkzeug==2.1.2 + # via tensorboard +wheel==0.41.2 + # via + # astunparse + # tensorboard +widgetsnbextension==3.6.6 + # via ipywidgets +wrapt==1.15.0 + # via tensorflow +zipp==3.17.0 + # via + # importlib-metadata + # importlib-resources +zstandard==0.21.0 + # via apache-beam # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/backend/src/apiserver/visualization/update_requirements.sh b/backend/src/apiserver/visualization/update_requirements.sh index 5dfbd8f12f..a1d22db67a 100755 --- a/backend/src/apiserver/visualization/update_requirements.sh +++ b/backend/src/apiserver/visualization/update_requirements.sh @@ -1,9 +1,9 @@ #!/bin/bash # This image should be in sync with Dockerfile.visualization. -IMAGE="tensorflow/tensorflow:2.5.1" +IMAGE="tensorflow/tensorflow:2.10.1" # tensorflow/tfx default entrypoint is Apache BEAM, because Apache BEAM doesn't # support custom entrypoint for now. We need to override with --entrypoint "" # for other `docker run` usecase. # https://github.com/tensorflow/tfx/blob/master/tfx/tools/docker/Dockerfile#L71 -../../../update_requirements.sh $IMAGE requirements.txt +../../../../hack/update-requirements.sh $IMAGE requirements.txt diff --git a/backend/update_requirements.sh b/backend/update_requirements.sh new file mode 100755 index 0000000000..920940e909 --- /dev/null +++ b/backend/update_requirements.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +# This image should be in sync with Dockerfile. +IMAGE="python:3.7" +../hack/update-requirements.sh $IMAGE requirements.txt diff --git a/hack/update-all-requirements.sh b/hack/update-all-requirements.sh index 8b49c0f434..0b4cd533ba 100755 --- a/hack/update-all-requirements.sh +++ b/hack/update-all-requirements.sh @@ -20,3 +20,5 @@ REPO_ROOT="${DIR}/.." cd "${REPO_ROOT}/backend/src/apiserver/visualization" && bash update_requirements.sh cd "${REPO_ROOT}/test/sample-test/hack" && bash update_requirements.sh +cd "${REPO_ROOT}/backend/metadata_writer" && bash update_requirements.sh +cd "${REPO_ROOT}/backend" && bash update_requirements.sh diff --git a/hack/update-requirements.sh b/hack/update-requirements.sh index 68361a3b3d..d7879ab26a 100755 --- a/hack/update-requirements.sh +++ b/hack/update-requirements.sh @@ -18,7 +18,7 @@ set -euo pipefail IMAGE=${1:-"python:3.7"} docker run -i --rm --entrypoint "" "$IMAGE" sh -c ' - python3 -m pip install pip setuptools --upgrade --quiet - python3 -m pip install pip-tools==5.4.0 --quiet - pip-compile --verbose --output-file - - + python3 -m pip install pip setuptools --quiet --upgrade + python3 -m pip install pip-tools==6.14.0 --quiet + pip-compile --resolver=backtracking --output-file - - ' From c5baf57db277580dc14307e646aaf9ff7961e10b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Sep 2023 23:40:32 +0000 Subject: [PATCH 167/253] chore(deps): bump github.com/emicklei/go-restful from 2.15.0+incompatible to 2.16.0+incompatible (#9348) Bumps [github.com/emicklei/go-restful](https://github.com/emicklei/go-restful) from 2.15.0+incompatible to 2.16.0+incompatible. - [Release notes](https://github.com/emicklei/go-restful/releases) - [Changelog](https://github.com/emicklei/go-restful/blob/v3/CHANGES.md) - [Commits](https://github.com/emicklei/go-restful/compare/v2.15.0...v2.16.0) --- updated-dependencies: - dependency-name: github.com/emicklei/go-restful dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- go.mod | 1 + go.sum | 2 ++ 2 files changed, 3 insertions(+) diff --git a/go.mod b/go.mod index 5a6fffb148..30743ae074 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/cenkalti/backoff v2.2.1+incompatible github.com/eapache/go-resiliency v1.2.0 github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a // indirect + github.com/emicklei/go-restful v2.16.0+incompatible // indirect github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5 // indirect github.com/fsnotify/fsnotify v1.5.1 github.com/go-openapi/errors v0.20.2 diff --git a/go.sum b/go.sum index d349781102..2f8a648c3e 100644 --- a/go.sum +++ b/go.sum @@ -361,6 +361,8 @@ github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.12.0+incompatible h1:SIvoTSbsMEwuM3dzFirLwKc4BH6VXP5CNf+G1FfJVr4= github.com/emicklei/go-restful v2.12.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.16.0+incompatible h1:rgqiKNjTnFQA6kkhFe16D8epTksy9HQ1MyrbDXSdYhM= +github.com/emicklei/go-restful v2.16.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful/v3 v3.8.0 h1:eCZ8ulSerjdAiaNpF7GxXIE7ZCMo1moN1qX+S609eVw= github.com/emicklei/go-restful/v3 v3.8.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= From 623e3094466485319bf1bdbd2585017172bf492e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 22 Sep 2023 00:45:50 -0700 Subject: [PATCH 168/253] chore(components): fix import in proto/README PiperOrigin-RevId: 567545593 --- .../google_cloud_pipeline_components/proto/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/proto/README.md b/components/google-cloud/google_cloud_pipeline_components/proto/README.md index 6b1dce053a..2162abb80d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/proto/README.md +++ b/components/google-cloud/google_cloud_pipeline_components/proto/README.md @@ -14,7 +14,7 @@ pip install -U google-cloud-pipeline-components To write a resource as an output parameter ``` -from google_cloud_pipeline_components.experimental.proto.gcp_resources_pb2 import GcpResources +from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources from google.protobuf.json_format import MessageToJson dataflow_resources = GcpResources() From e8b8450e0a9501eca130b02d2cf2995b994d02c3 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 22 Sep 2023 09:55:49 -0700 Subject: [PATCH 169/253] feat(components): Add support for customizing evaluation_display_name in model evaluation pipelines PiperOrigin-RevId: 567648855 --- components/google-cloud/RELEASE.md | 1 + .../evaluation_llm_classification_pipeline.py | 4 +++- .../evaluation_llm_text_generation_pipeline.py | 5 +++-- .../v1/model_evaluation/error_analysis_pipeline.py | 11 ++++------- .../evaluated_annotation_pipeline.py | 3 ++- ..._automl_tabular_feature_attribution_pipeline.py | 14 ++++++++------ .../evaluation_automl_tabular_pipeline.py | 12 ++++++++---- .../evaluation_automl_unstructure_data_pipeline.py | 10 ++++++++-- .../evaluation_feature_attribution_pipeline.py | 10 ++++++++-- 9 files changed, 45 insertions(+), 25 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 1a98d48d10..560140a53d 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -5,6 +5,7 @@ * Add feature_selection_pipeline to preview.automl.tabular. * Bump supported KFP versions to kfp>=2.0.0b10,<=2.2.0 * Add `time_series_dense_encoder_forecasting_pipeline`, `learn_to_learn_forecasting_pipeline`, `sequence_to_sequence_forecasting_pipeline`, and `temporal_fusion_transformer_forecasting_pipeline` to `preview.automl.forecasting`. +* Add support for customizing evaluation display name on `v1` and `preview` `model_evaluation` pipelines. ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index da5888bd42..d32ebde7e1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -48,6 +48,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-llm-classification-pipeline-{{$.pipeline_job_uuid}}', ) -> NamedTuple( 'outputs', evaluation_metrics=ClassificationMetrics, @@ -77,6 +78,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. Returns: evaluation_metrics: ClassificationMetrics Artifact for LLM Text Classification. @@ -148,7 +150,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default model=get_vertex_model_task.outputs['artifact'], dataset_type=batch_predict_instances_format, dataset_paths=batch_predict_gcs_source_uris, - display_name=_PIPELINE_NAME, + display_name=evaluation_display_name, ) return outputs( diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index 3227664200..ee8f5ceaf9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -39,6 +39,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul service_account: str = '', network: str = '', encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-llm-text-generation-pipeline-{{$.pipeline_job_uuid}}', ) -> NamedTuple( 'outputs', evaluation_metrics=Metrics, evaluation_resource_name=str ): @@ -61,6 +62,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name, as in `myVPC`. To specify this field, you must have already configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. Returns: evaluation_metrics: Metrics Artifact for LLM Text Generation. @@ -104,7 +106,6 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul joined_predictions_gcs_source=batch_predict_task.outputs[ 'gcs_output_directory' ], - display_name=_PIPELINE_NAME, machine_type=machine_type, service_account=service_account, network=network, @@ -117,7 +118,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul problem_type=evaluation_task, dataset_type=batch_predict_predictions_format, dataset_paths=batch_predict_gcs_source_uris, - display_name=_PIPELINE_NAME, + display_name=evaluation_display_name, ) return outputs( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py index 9b6cba7866..43f09e4916 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/error_analysis_pipeline.py @@ -53,6 +53,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-vision-error-analysis-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): @@ -88,11 +89,11 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ # fmt: on - evaluation_display_name = 'automl-vision-error-analysis-pipeline' with dsl.Condition( ( @@ -207,9 +208,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataset_paths=dataset_preprocessor_task.outputs[ 'batch_prediction_storage_source' ], - display_name=( - f'{evaluation_display_name}-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}' - ), + display_name=evaluation_display_name, ) ModelImportEvaluatedAnnotationOp( model=get_model_task.outputs['model'], @@ -327,9 +326,7 @@ def vision_model_error_analysis_pipeline( # pylint: disable=dangerous-default-v dataset_paths=dataset_preprocessor_task.outputs[ 'batch_prediction_storage_source' ], - display_name=( - f'{evaluation_display_name}-{dsl.PIPELINE_JOB_ID_PLACEHOLDER}' - ), + display_name=evaluation_display_name, ) ModelImportEvaluatedAnnotationOp( model=get_model_task.outputs['model'], diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py index 4d33383c86..cfb08646d8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluated_annotation_pipeline.py @@ -48,6 +48,7 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-vision-evaluated-annotation-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): @@ -75,11 +76,11 @@ def evaluated_annotation_pipeline( # pylint: disable=dangerous-default-value dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. """ # fmt: off - evaluation_display_name = 'automl-vision-evaluated-annotation-pipeline' get_test_dataset_task = GetVertexDatasetOp( dataset_resource_name=test_dataset_resource_name diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py index 174c80b346..e8db28b4a3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_feature_attribution_pipeline.py @@ -48,6 +48,7 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic + evaluation_display_name: str = 'evaluation-automl-tabular-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, @@ -94,6 +95,7 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -107,9 +109,6 @@ def evaluation_automl_tabular_feature_attribution_classification_pipeline( # py evaluation_resource_name=str, ) - evaluation_display_name = ( - 'evaluation-automl-tabular-feature-attribution-pipeline' - ) get_model_task = GetVertexModelOp(model_name=model_name) # Run Batch Prediction. @@ -234,6 +233,7 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-tabular-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ) -> NamedTuple( 'outputs', @@ -272,6 +272,7 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: A google.RegressionMetrics artifact. @@ -283,9 +284,6 @@ def evaluation_automl_tabular_feature_attribution_regression_pipeline( # pylint evaluation_resource_name=str, ) - evaluation_display_name = ( - 'evaluation-automl-tabular-feature-attribution-pipeline' - ) get_model_task = GetVertexModelOp(model_name=model_name) # Run Batch Prediction. @@ -409,6 +407,7 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-tabular-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ): # fmt: off @@ -447,6 +446,7 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ # fmt: on @@ -480,6 +480,7 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) @@ -510,5 +511,6 @@ def evaluation_automl_tabular_feature_attribution_pipeline( # pylint: disable=d dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py index 027d72c14b..fa49509dde 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_tabular_pipeline.py @@ -42,6 +42,7 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic + evaluation_display_name: str = 'evaluation-automl-tabular-pipeline-{{$.pipeline_job_uuid}}', dataflow_machine_type: str = 'n1-standard-4', dataflow_max_num_workers: int = 5, dataflow_disk_size_gb: int = 50, @@ -88,6 +89,7 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -101,8 +103,6 @@ def evaluation_automl_tabular_classification_pipeline( # pylint: disable=danger evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation-automl-tabular-pipeline' - # Get the Vertex AI Model. get_model_task = GetVertexModelOp(model_name=model_name) @@ -190,6 +190,7 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-tabular-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ) -> NamedTuple( 'outputs', @@ -224,6 +225,7 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: @@ -236,8 +238,6 @@ def evaluation_automl_tabular_regression_pipeline( # pylint: disable=dangerous- evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation-automl-tabular-pipeline' - # Get the Vertex AI Model. get_model_task = GetVertexModelOp(model_name=model_name) @@ -326,6 +326,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-tabular-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ): # fmt: off @@ -358,6 +359,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ # fmt: on @@ -388,6 +390,7 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) @@ -415,5 +418,6 @@ def evaluation_automl_tabular_pipeline( # pylint: disable=dangerous-default-val dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index 59fa225b10..34fc9ad764 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -53,6 +53,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-unstructured-data-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( @@ -90,6 +91,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -102,7 +104,6 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation_automl_unstructure_data_pipeline' get_model_task = GetVertexModelOp(model_name=model_name) # Remove the ground truth from the given GCS data. @@ -217,6 +218,7 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-unstructured-data-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ) -> NamedTuple( 'outputs', @@ -254,6 +256,7 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: A Tuple of google.RegressionMetrics artifact and the imported evaluation metrics resource name. @@ -265,7 +268,6 @@ def evaluation_automl_unstructure_data_regression_pipeline( # pylint: disable=d evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation_automl_unstructure_data_pipeline' get_model_task = GetVertexModelOp(model_name=model_name) # Remove the ground truth from the given GCS data. @@ -381,6 +383,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-automl-unstructured-data-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ): # fmt: off @@ -416,6 +419,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ # fmt: on @@ -448,6 +452,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) @@ -476,5 +481,6 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 3c0a630c6f..4e5a34b58b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -56,6 +56,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ) -> NamedTuple( @@ -96,6 +97,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. project: The GCP project that runs the pipeline components. Defaults to the project in which the PipelineJob is run. @@ -108,7 +110,6 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation-feature-attribution-pipeline' get_model_task = GetVertexModelOp(model_name=model_name) # Remove the ground truth from the given GCS or BQ data. @@ -263,6 +264,7 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ) -> NamedTuple( 'outputs', @@ -305,6 +307,7 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. Returns: @@ -317,7 +320,6 @@ def evaluation_feature_attribution_regression_pipeline( # pylint: disable=dange evaluation_resource_name=str, ) - evaluation_display_name = 'evaluation-feature-attribution-pipeline' get_model_task = GetVertexModelOp(model_name=model_name) # Remove the ground truth from the given GCS or BQ data. @@ -473,6 +475,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul dataflow_subnetwork: str = '', dataflow_use_public_ips: bool = True, encryption_spec_key_name: str = '', + evaluation_display_name: str = 'evaluation-feature-attribution-pipeline-{{$.pipeline_job_uuid}}', force_runner_mode: str = '', ): # fmt: off @@ -512,6 +515,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: Customer-managed encryption key options. If set, resources created by this pipeline will be encrypted with the provided encryption key. Has the form: `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource is created. + evaluation_display_name: The display name of the uploaded evaluation resource to the Vertex AI model. force_runner_mode: Indicate the runner mode to use forcely. Valid options are `Dataflow` and `DirectRunner`. """ # fmt: on @@ -547,6 +551,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) @@ -578,5 +583,6 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul dataflow_subnetwork=dataflow_subnetwork, dataflow_use_public_ips=dataflow_use_public_ips, encryption_spec_key_name=encryption_spec_key_name, + evaluation_display_name=evaluation_display_name, force_runner_mode=force_runner_mode, ) From b0cccfee9432d2e787ba1f74eb8beb906222bea8 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 22 Sep 2023 12:55:20 -0700 Subject: [PATCH 170/253] fix(components): include model version in upload model output artifact (fix) PiperOrigin-RevId: 567695541 --- components/google-cloud/RELEASE.md | 3 ++- .../endpoint/undeploy_model/remote_runner.py | 27 ++++++++++++++----- .../v1/model/delete_model/remote_runner.py | 19 +++++++------ .../v1/model/upload_model/remote_runner.py | 25 ++++++++--------- .../v1/model/delete_model/component.py | 2 +- .../v1/model/upload_model/component.py | 4 +-- 6 files changed, 50 insertions(+), 30 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 560140a53d..1eb742762b 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,11 +1,12 @@ ## Upcoming release * Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` - * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` * Add feature_selection_pipeline to preview.automl.tabular. * Bump supported KFP versions to kfp>=2.0.0b10,<=2.2.0 * Add `time_series_dense_encoder_forecasting_pipeline`, `learn_to_learn_forecasting_pipeline`, `sequence_to_sequence_forecasting_pipeline`, and `temporal_fusion_transformer_forecasting_pipeline` to `preview.automl.forecasting`. * Add support for customizing evaluation display name on `v1` and `preview` `model_evaluation` pipelines. +* Include model version ID in `v1.model.upload_model.ModelUploadOp`'s `VertexModel` output (key: `model`). The URI and metadata `resourceName` field in the outputted `VertexModel` now have `@` appended, corresponding to the model that was just created. Downstream components `DeleteModel` and `UndeployModel` will respect the model version if provided. + ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/undeploy_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/undeploy_model/remote_runner.py index 65e79e3958..01218c1245 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/undeploy_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/endpoint/undeploy_model/remote_runner.py @@ -19,7 +19,6 @@ from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util - _MODEL_NAME_TEMPLATE = r'(projects/(?P.*)/locations/(?P.*)/models/(?P.*))' _ENDPOINT_NAME_TEMPLATE = r'(projects/(?P.*)/locations/(?P.*)/endpoints/(?P.*))' @@ -60,19 +59,35 @@ def undeploy_model( get_endpoint_remote_runner = lro_remote_runner.LroRemoteRunner(location) endpoint = get_endpoint_remote_runner.request(get_endpoint_url, '', 'get') + # may or may not contain a model version + full_model_name = undeploy_model_request['model'] + + if '@' in full_model_name: + model_name_no_version, model_version = full_model_name.rsplit('@') + else: + model_name_no_version = full_model_name + model_version = None + # Get the deployed_model_id deployed_model_id = '' - model_name = undeploy_model_request['model'] for deployed_model in endpoint['deployedModels']: - if deployed_model['model'] == model_name: + if deployed_model['model'] == model_name_no_version and ( + # undeploy if version is unspecified + model_version is None + # or version matches + or deployed_model['modelVersionId'] == model_version + ): deployed_model_id = deployed_model['id'] break + print('deployed_model_id', deployed_model_id) if not deployed_model_id: # TODO(ruifang) propagate the error. raise ValueError( - 'Model {} not found at endpoint {}.'.format(model_name, endpoint_name) + 'Model {} not found at endpoint {}.'.format( + full_model_name, endpoint_name + ) ) # Undeploy the model @@ -85,14 +100,14 @@ def undeploy_model( ] model_uri_pattern = re.compile(_MODEL_NAME_TEMPLATE) - match = model_uri_pattern.match(model_name) + match = model_uri_pattern.match(model_name_no_version) try: location = match.group('location') except AttributeError as err: # TODO(ruifang) propagate the error. raise ValueError( 'Invalid model name: {}. Expect: {}.'.format( - model_name, + full_model_name, 'projects/[project_id]/locations/[location]/models/[model_id]', ) ) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/delete_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/delete_model/remote_runner.py index 4798fbc630..b73b010d90 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/delete_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/delete_model/remote_runner.py @@ -14,6 +14,7 @@ import json import re + from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util @@ -22,12 +23,12 @@ def delete_model( - type, - project, - location, - payload, - gcp_resources, -): + type: str, + project: str, + location: str, + payload: str, + gcp_resources: str, +) -> None: """Delete model and poll the LongRunningOperator till it reaches a final state.""" # TODO(IronPan) temporarily remove the empty fields from the spec delete_model_request = json_util.recursive_remove_empty( @@ -49,13 +50,15 @@ def delete_model( ) api_endpoint = location + '-aiplatform.googleapis.com' vertex_uri_prefix = f'https://{api_endpoint}/v1/' - delete_model_url = f'{vertex_uri_prefix}{model_name}' + # vertex AI delete model API deletes the full model, not only the version + model_name_no_version = model_name.rsplit('@', 1)[0] + delete_model_url = f'{vertex_uri_prefix}{model_name_no_version}' try: remote_runner = lro_remote_runner.LroRemoteRunner(location) delete_model_lro = remote_runner.create_lro( delete_model_url, '', gcp_resources, 'delete' ) - delete_model_lro = remote_runner.poll_lro(lro=delete_model_lro) + remote_runner.poll_lro(lro=delete_model_lro) except (ConnectionError, RuntimeError) as err: error_util.exit_with_internal_error(err.args[0]) diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py index 5ea07e9a2b..3a88048805 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/model/upload_model/remote_runner.py @@ -13,7 +13,7 @@ # limitations under the License. import json -from typing import Optional +from typing import Any, Dict, Optional from google_cloud_pipeline_components.container.utils import artifact_utils from google_cloud_pipeline_components.container.v1.gcp_launcher import lro_remote_runner @@ -22,7 +22,6 @@ from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import json_util from google_cloud_pipeline_components.types.artifact_types import VertexModel - ARTIFACT_PROPERTY_KEY_UNMANAGED_CONTAINER_MODEL = 'unmanaged_container_model' API_KEY_PREDICT_SCHEMATA = 'predict_schemata' API_KEY_CONTAINER_SPEC = 'container_spec' @@ -30,7 +29,9 @@ _LABELS_PAYLOAD_KEY = 'labels' -def append_unmanaged_model_artifact_into_payload(executor_input, model_spec): +def append_unmanaged_model_artifact_into_payload( + executor_input: str, model_spec: Dict[str, Any] +) -> Dict[str, Any]: artifact = ( json.loads(executor_input) .get('inputs', {}) @@ -54,14 +55,14 @@ def append_unmanaged_model_artifact_into_payload(executor_input, model_spec): def upload_model( - type, - project, - location, - payload, - gcp_resources, - executor_input, + type: str, + project: str, + location: str, + payload: str, + gcp_resources: str, + executor_input: str, parent_model_name: Optional[str] = None, -): +) -> None: """Upload model and poll the LongRunningOperator till it reaches a final state.""" api_endpoint = location + '-aiplatform.googleapis.com' vertex_uri_prefix = f'https://{api_endpoint}/v1/' @@ -100,9 +101,9 @@ def upload_model( ) upload_model_lro = remote_runner.poll_lro(lro=upload_model_lro) model_resource_name = upload_model_lro['response']['model'] - if 'model_version_id' in upload_model_lro['response']: + if 'modelVersionId' in upload_model_lro['response']: model_resource_name += ( - f'@{upload_model_lro["response"]["model_version_id"]}' + f'@{upload_model_lro["response"]["modelVersionId"]}' ) vertex_model = VertexModel.create( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py index 157a14f06d..480c73c749 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/delete_model/component.py @@ -21,7 +21,7 @@ @dsl.container_component def model_delete(model: Input[VertexModel], gcp_resources: dsl.OutputPath(str)): # fmt: off - """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models). See the [Model delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) method for more information. + """[Deletes](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models). See the [Model delete](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete) method for more information. Note that the full model is deleted, NOT only the model version. Args: model: The name of the Model resource to be deleted. Format: `projects/{project}/locations/{location}/models/{model}`. [More information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/delete#path-parameters). diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py index 5ab4a29911..7f9397b80a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model/upload_model/component.py @@ -44,7 +44,7 @@ def model_upload( project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): # fmt: off - """[Uploads](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) and returns a Model artifact representing the uploaded Model resource. See [Model upload](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) method for more information. + """[Uploads](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) a Google Cloud Vertex [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models) and returns a Model artifact representing the uploaded Model resource, with a tag for the particular version. See [Model upload](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload) method for more information. Args: location: Optional location to upload this Model to. If not set, defaults to `us-central1`. @@ -65,7 +65,7 @@ def model_upload( project: Project to upload this Model to. Defaults to the project in which the PipelineJob is run. Returns: - model: Artifact tracking the created Model. + model: Artifact tracking the created Model version. gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the upload Model's long-running operation. """ # fmt: on From 9fe2dced4630d5a71ac9c5e844d96a7e6ca4953c Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Fri, 22 Sep 2023 13:43:44 -0700 Subject: [PATCH 171/253] chore: fix go mod file (#10023) --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index 2f8a648c3e..24aa560db5 100644 --- a/go.sum +++ b/go.sum @@ -359,7 +359,6 @@ github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a h1:A4wNiqeKqU56Zht github.com/elazarl/goproxy v0.0.0-20181111060418-2ce16c963a8a/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= -github.com/emicklei/go-restful v2.12.0+incompatible h1:SIvoTSbsMEwuM3dzFirLwKc4BH6VXP5CNf+G1FfJVr4= github.com/emicklei/go-restful v2.12.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful v2.16.0+incompatible h1:rgqiKNjTnFQA6kkhFe16D8epTksy9HQ1MyrbDXSdYhM= github.com/emicklei/go-restful v2.16.0+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= From 2ef5afe0bbfb33dad4fabdf1982f4058921ed8da Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 22 Sep 2023 14:32:57 -0700 Subject: [PATCH 172/253] chore(sdk): release KFP SDK 2.3.0 (#10024) --- docs/conf.py | 9 ++++++++- sdk/RELEASE.md | 10 ++++++++++ sdk/python/kfp/__init__.py | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index faff716cd7..253fae4000 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -132,12 +132,19 @@ True, 'version_info': [ # need to use the sdk- prefix to avoid conflict with the BE's GitHub release tags + { + 'version': + 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.3.0/', + 'title': + '2.3.0', + 'aliases': ['stable'], + }, { 'version': 'https://kubeflow-pipelines.readthedocs.io/en/sdk-2.2.0/', 'title': '2.2.0', - 'aliases': ['stable'], + 'aliases': [], }, { 'version': diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 1d9e328fda..3f171205cd 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -1,6 +1,16 @@ # Current Version (in development) +## Features + +## Breaking changes + +## Deprecations + +## Bug fixes and other changes + +## Documentation updates +# 2.3.0 ## Features * Support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` [\#10010](https://github.com/kubeflow/pipelines/pull/10010) diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 926df57d4a..74d0332f3b 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -16,7 +16,7 @@ # https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages __path__ = __import__('pkgutil').extend_path(__path__, __name__) -__version__ = '2.2.0' +__version__ = '2.3.0' TYPE_CHECK = True From 4003e562713bd04fa94387d8b53dfbe3cf31cb12 Mon Sep 17 00:00:00 2001 From: Vadim Reider Date: Sat, 23 Sep 2023 00:32:57 +0200 Subject: [PATCH 173/253] fix(backend): OutPutPath dir creation mode Fixes #7629 (#9946) --- backend/src/v2/component/launcher_v2.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/v2/component/launcher_v2.go b/backend/src/v2/component/launcher_v2.go index e0f5060114..92a951c1c1 100644 --- a/backend/src/v2/component/launcher_v2.go +++ b/backend/src/v2/component/launcher_v2.go @@ -714,7 +714,7 @@ func localPathForURI(uri string) (string, error) { func prepareOutputFolders(executorInput *pipelinespec.ExecutorInput) error { for name, parameter := range executorInput.GetOutputs().GetParameters() { dir := filepath.Dir(parameter.OutputFile) - if err := os.MkdirAll(dir, 0644); err != nil { + if err := os.MkdirAll(dir, 0755); err != nil { return fmt.Errorf("failed to create directory %q for output parameter %q: %w", dir, name, err) } } @@ -730,7 +730,7 @@ func prepareOutputFolders(executorInput *pipelinespec.ExecutorInput) error { return fmt.Errorf("failed to generate local storage path for output artifact %q: %w", name, err) } - if err := os.MkdirAll(filepath.Dir(localPath), 0644); err != nil { + if err := os.MkdirAll(filepath.Dir(localPath), 0755); err != nil { return fmt.Errorf("unable to create directory %q for output artifact %q: %w", filepath.Dir(localPath), name, err) } } From dcaafeee8b98e2733444455e7117b628f017422d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Sat, 23 Sep 2023 20:59:58 +0200 Subject: [PATCH 174/253] fix(backend): Sync scheduled workflows v1 if APIVersion and Kind are missing. Fixes #9809 (#9968) --- .../persistence/client/scheduled_workflow_client.go | 6 ++++++ backend/src/common/util/scheduled_workflow.go | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/src/agent/persistence/client/scheduled_workflow_client.go b/backend/src/agent/persistence/client/scheduled_workflow_client.go index bf8b2acb1c..3fe4090d45 100644 --- a/backend/src/agent/persistence/client/scheduled_workflow_client.go +++ b/backend/src/agent/persistence/client/scheduled_workflow_client.go @@ -62,5 +62,11 @@ func (c *ScheduledWorkflowClient) Get(namespace string, name string) ( "Error retrieving scheduled workflow (%v) in namespace (%v): %v", name, namespace, err) } + // If the APIVersion and Kind are not set then set them to the default values. + if schedule.APIVersion == "" && schedule.Kind == "" { + schedule.Kind = util.SwfKind + schedule.APIVersion = util.ApiVersionV1 + } + return util.NewScheduledWorkflow(schedule), nil } diff --git a/backend/src/common/util/scheduled_workflow.go b/backend/src/common/util/scheduled_workflow.go index 07b292d719..caa25dd3cb 100644 --- a/backend/src/common/util/scheduled_workflow.go +++ b/backend/src/common/util/scheduled_workflow.go @@ -31,9 +31,9 @@ const ( SWFlegacy ScheduledWorkflowType = "legacy" SWFunknown ScheduledWorkflowType = "Unknown" - apiVersionV1 = "kubeflow.org/v1beta1" - apiVersionV2 = "kubeflow.org/v2beta1" - swfKind = "ScheduledWorkflow" + ApiVersionV1 = "kubeflow.org/v1beta1" + ApiVersionV2 = "kubeflow.org/v2beta1" + SwfKind = "ScheduledWorkflow" ) // ScheduledWorkflow is a type to help manipulate ScheduledWorkflow objects. @@ -160,9 +160,9 @@ func (s *ScheduledWorkflow) ToStringForStore() string { } func (s *ScheduledWorkflow) GetVersion() ScheduledWorkflowType { - if strings.HasPrefix(s.APIVersion, apiVersionV1) && s.Kind == swfKind { + if strings.HasPrefix(s.APIVersion, ApiVersionV1) && s.Kind == SwfKind { return SWFv1 - } else if strings.HasPrefix(s.APIVersion, apiVersionV2) && s.Kind == swfKind { + } else if strings.HasPrefix(s.APIVersion, ApiVersionV2) && s.Kind == SwfKind { return SWFv2 } else if s.APIVersion == "" && s.Kind == "" { return SWFlegacy From a76e385b20b30f9a974139bc4d6d04f8517441f2 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 25 Sep 2023 09:48:14 -0700 Subject: [PATCH 175/253] feat(components): Implement the chunking component PiperOrigin-RevId: 568246828 --- .../model_evaluation/__init__.py | 2 + .../model_evaluation/chunking/__init__.py | 14 +++ .../model_evaluation/chunking/component.py | 90 +++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index fe52d91dcc..04cbe28d46 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -15,6 +15,7 @@ import os +from google_cloud_pipeline_components._implementation.model_evaluation.chunking.component import chunking as ChunkingOp from google_cloud_pipeline_components._implementation.model_evaluation.data_sampler.component import evaluation_data_sampler as EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp @@ -38,6 +39,7 @@ __all__ = [ 'evaluation_llm_safety_bias_pipeline', 'evaluation_llm_embedding_pipeline', + 'ChunkingOp', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', 'ErrorAnalysisAnnotationOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/__init__.py new file mode 100644 index 0000000000..34890407db --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation Chunking Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py new file mode 100644 index 0000000000..79e8b7932c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py @@ -0,0 +1,90 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Embedding eval chunking component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' + + +@container_component +def chunking( + gcp_resources: OutputPath(str), + project: str, + location: str, + input_text_gcs_dir: str, + embedding_bq_uri: str, + display_name: str = 'chunking', + machine_type: str = 'n1-standard-8', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Chunk files in GCS and write to BigQuery table. + + Args: + project: The GCP project that runs the pipeline component. + location: The GCP region that runs the pipeline component. + input_text_gcs_dir: the GCS directory containing the files to chunk. DO NOT + include '/' at the end of the path. + embedding_bq_uri: The BigQuery table URI where the component will write + chunks to. + display_name: The name of the chunking job/component. + machine_type: The machine type of this custom job. + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. + network: The full name of the Compute Engine network to which the job should + be peered. For example, projects/12345/global/networks/myVPC. Format is of + the form projects/{project}/global/networks/{network}. Where {project} is + a project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + customJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--process_chunking={True}', + f'--project={project}', + f'--location={location}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--input_text_gcs_dir={input_text_gcs_dir}', + f'--embedding_bq_uri={embedding_bq_uri}', + f'--gcp_resources={gcp_resources}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) From 173be837d118505a9753c6593a51939d6d72dfc2 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 25 Sep 2023 10:14:24 -0700 Subject: [PATCH 176/253] chore(components): bump GCPC's KFP SDK upper bound to 2.3.0 PiperOrigin-RevId: 568254514 --- components/google-cloud/RELEASE.md | 1 + components/google-cloud/setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 1eb742762b..619073b3b9 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -6,6 +6,7 @@ * Add `time_series_dense_encoder_forecasting_pipeline`, `learn_to_learn_forecasting_pipeline`, `sequence_to_sequence_forecasting_pipeline`, and `temporal_fusion_transformer_forecasting_pipeline` to `preview.automl.forecasting`. * Add support for customizing evaluation display name on `v1` and `preview` `model_evaluation` pipelines. * Include model version ID in `v1.model.upload_model.ModelUploadOp`'s `VertexModel` output (key: `model`). The URI and metadata `resourceName` field in the outputted `VertexModel` now have `@` appended, corresponding to the model that was just created. Downstream components `DeleteModel` and `UndeployModel` will respect the model version if provided. +* Bump KFP SDK upper bound to 2.3.0 ## Release 2.3.1 diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index adfc020872..aa4b5d1c9e 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -82,7 +82,7 @@ # Pin google-api-core version for the bug fixing in 1.31.5 # https://github.com/googleapis/python-api-core/releases/tag/v1.31.5 "google-api-core>=1.31.5,<3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "kfp>=2.0.0b10,<=2.2.0", + "kfp>=2.0.0b10,<=2.3.0", "google-cloud-aiplatform>=1.14.0,<2", "Jinja2==3.1.2", ], From 45fe8e86583646143d1685d9e04d887ff27440ee Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 25 Sep 2023 11:48:49 -0700 Subject: [PATCH 177/253] feat(components): Use 64 v3 TPUs for llm pipelines PiperOrigin-RevId: 568282755 --- .../_implementation/llm/function_based.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index b4da8a350a..8f27dcb94b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -57,7 +57,7 @@ def resolve_machine_spec( return outputs( machine_type='cloud-tpu', accelerator_type='TPU_V3', - accelerator_count=32, + accelerator_count=64, ) elif location in gpu_regions: return outputs( From a070006a55bd1747d385df3d5f20d88d10921d61 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 28 Sep 2023 16:11:35 -0700 Subject: [PATCH 178/253] chore(components): release GCPC SDK 2.4.0 PiperOrigin-RevId: 569317711 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 4 +++- components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index d162ebe3c0..a949aa268c 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.3.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.4.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 619073b3b9..381fab8de3 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,4 +1,6 @@ ## Upcoming release + +## Release 2.4.0 * Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` * Fix use of `encryption_spec_key_name` in `v1.custom_job.CustomTrainingJobOp` and `v1.custom_job.create_custom_training_job_from_component` * Add feature_selection_pipeline to preview.automl.tabular. @@ -7,7 +9,7 @@ * Add support for customizing evaluation display name on `v1` and `preview` `model_evaluation` pipelines. * Include model version ID in `v1.model.upload_model.ModelUploadOp`'s `VertexModel` output (key: `model`). The URI and metadata `resourceName` field in the outputted `VertexModel` now have `@` appended, corresponding to the model that was just created. Downstream components `DeleteModel` and `UndeployModel` will respect the model version if provided. * Bump KFP SDK upper bound to 2.3.0 - +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) ## Release 2.3.1 * Make LLM pipelines compatible with KFP SDK 2.1.3 diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index f311cc5692..cf299b70ad 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.4.0", + "title": "2.4.0", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.3.1", "title": "2.3.1", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 92721a9dc9..2b07bb0173 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.3.1" +__version__ = "2.4.0" From 4fd1c02fc0a17d4d1272dde69d81c0bbb1fe18b4 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 2 Oct 2023 08:13:05 -0700 Subject: [PATCH 179/253] fix(components): Disable caching for LLM pipeline components that store temporary artifacts PiperOrigin-RevId: 570070284 --- components/google-cloud/RELEASE.md | 1 + .../preview/llm/infer/component.py | 30 ++-- .../preview/llm/rlhf/component.py | 170 ++++++++++-------- 3 files changed, 112 insertions(+), 89 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 381fab8de3..e166fc215b 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,4 +1,5 @@ ## Upcoming release +* Disable caching for LLM pipeline tasks that store temporary artifacts. ## Release 2.4.0 * Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index 8dfc65d1c4..0096b89796 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -72,19 +72,23 @@ def infer_pipeline( prompt_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_importer', ).set_display_name('PromptDatasetImageUriResolver') - prompt_dataset_importer = private_text_importer.PrivateTextImporter( - project=project, - location=location, - input_text=prompt_dataset, - inputs_field_name=prompt_column, - targets_field_name='', # ignore targets_field_name - output_split_name=env.TRAIN_SPLIT, - large_model_reference=reference_model_metadata.outputs[ - 'large_model_reference' - ], - image_uri=prompt_dataset_image_uri.output, - instruction=instruction, - ).set_display_name('PromptDatasetImporter') + prompt_dataset_importer = ( + private_text_importer.PrivateTextImporter( + project=project, + location=location, + input_text=prompt_dataset, + inputs_field_name=prompt_column, + targets_field_name='', # ignore targets_field_name + output_split_name=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + image_uri=prompt_dataset_image_uri.output, + instruction=instruction, + ) + .set_display_name('PromptDatasetImporter') + .set_caching_options(False) + ) bulk_inferrer_image_uri = function_based.resolve_private_image_uri( image_name='infer', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index eef19e6d96..33f8b48504 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -99,20 +99,24 @@ def rlhf_pipeline( prompt_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_importer' ).set_display_name('PromptDatasetImageUriResolver') - prompt_dataset_importer = private_text_importer.PrivateTextImporter( - project=project, - location=location, - input_text=prompt_dataset, - inputs_field_name=prompt_column, - # Target field name does not matter because this field is not used. - targets_field_name='non_existent_targets_field_name', - output_split_name=env.TRAIN_SPLIT, - large_model_reference=reference_model_metadata.outputs[ - 'large_model_reference' - ], - image_uri=prompt_dataset_image_uri.output, - instruction=instruction, - ).set_display_name('PromptDatasetImporter') + prompt_dataset_importer = ( + private_text_importer.PrivateTextImporter( + project=project, + location=location, + input_text=prompt_dataset, + inputs_field_name=prompt_column, + # Target field name does not matter because this field is not used. + targets_field_name='non_existent_targets_field_name', + output_split_name=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + image_uri=prompt_dataset_image_uri.output, + instruction=instruction, + ) + .set_display_name('PromptDatasetImporter') + .set_caching_options(False) + ) preference_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_comparison_importer' @@ -120,21 +124,23 @@ def rlhf_pipeline( comma_separated_candidates_field_names = ( function_based.convert_to_delimited_string(items=candidate_columns) ) - preference_dataset_importer = private_text_comparison_importer.PrivateTextComparisonImporter( - project=project, - location=location, - input_text=preference_dataset, - inputs_field_name=prompt_column, - comma_separated_candidates_field_names=comma_separated_candidates_field_names.output, - choice_field_name=choice_column, - split=env.TRAIN_SPLIT, - large_model_reference=reference_model_metadata.outputs[ - 'reward_model_reference' - ], - image_uri=preference_dataset_image_uri.output, - instruction=instruction, - ).set_display_name( - 'PreferenceDatasetImporter' + preference_dataset_importer = ( + private_text_comparison_importer.PrivateTextComparisonImporter( + project=project, + location=location, + input_text=preference_dataset, + inputs_field_name=prompt_column, + comma_separated_candidates_field_names=comma_separated_candidates_field_names.output, + choice_field_name=choice_column, + split=env.TRAIN_SPLIT, + large_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + image_uri=preference_dataset_image_uri.output, + instruction=instruction, + ) + .set_display_name('PreferenceDatasetImporter') + .set_caching_options(False) ) reward_model_image_uri = function_based.resolve_private_image_uri( @@ -142,59 +148,71 @@ def rlhf_pipeline( accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], ).set_display_name('RewardModelImageUriResolver') - reward_model = reward_model_trainer.RewardModelTrainer( - project=project, - location=location, - input_model_path=reference_model_metadata.outputs['reward_model_path'], - input_dataset_path=preference_dataset_importer.outputs[ - 'output_dataset_path' - ], - train_steps=reward_model_train_steps, - accelerator_type=machine_spec.outputs['accelerator_type'], - accelerator_count=machine_spec.outputs['accelerator_count'], - large_model_reference=reference_model_metadata.outputs[ - 'reward_model_reference' - ], - machine_type=machine_spec.outputs['machine_type'], - image_uri=reward_model_image_uri.output, - inputs_sequence_length=prompt_sequence_length, - targets_sequence_length=target_sequence_length, - batch_size=batch_size, - learning_rate_multiplier=reward_model_learning_rate_multiplier, - lora_dim=reward_model_lora_dim, - ).set_display_name('RewardModelTrainer') + reward_model = ( + reward_model_trainer.RewardModelTrainer( + project=project, + location=location, + input_model_path=reference_model_metadata.outputs[ + 'reward_model_path' + ], + input_dataset_path=preference_dataset_importer.outputs[ + 'output_dataset_path' + ], + train_steps=reward_model_train_steps, + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + large_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + machine_type=machine_spec.outputs['machine_type'], + image_uri=reward_model_image_uri.output, + inputs_sequence_length=prompt_sequence_length, + targets_sequence_length=target_sequence_length, + batch_size=batch_size, + learning_rate_multiplier=reward_model_learning_rate_multiplier, + lora_dim=reward_model_lora_dim, + ) + .set_display_name('RewardModelTrainer') + .set_caching_options(False) + ) rl_image_uri = function_based.resolve_private_image_uri( image_name='reinforcer', accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], ).set_display_name('ReinforcerImageUriResolver') - rl_model = reinforcer.Reinforcer( - project=project, - location=location, - input_reference_model_path=reference_model_metadata.outputs[ - 'reference_model_path' - ], - input_reward_model_path=reward_model.outputs['output_model_path'], - input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'], - train_steps=reinforcement_learning_train_steps, - accelerator_type=machine_spec.outputs['accelerator_type'], - accelerator_count=machine_spec.outputs['accelerator_count'], - large_model_reference=reference_model_metadata.outputs[ - 'large_model_reference' - ], - reward_model_reference=reference_model_metadata.outputs[ - 'reward_model_reference' - ], - machine_type=machine_spec.outputs['machine_type'], - image_uri=rl_image_uri.output, - inputs_sequence_length=prompt_sequence_length, - targets_sequence_length=target_sequence_length, - batch_size=batch_size, - learning_rate_multiplier=reinforcement_learning_rate_multiplier, - kl_coeff=kl_coeff, - lora_dim=policy_model_lora_dim, - ).set_display_name('Reinforcer') + rl_model = ( + reinforcer.Reinforcer( + project=project, + location=location, + input_reference_model_path=reference_model_metadata.outputs[ + 'reference_model_path' + ], + input_reward_model_path=reward_model.outputs['output_model_path'], + input_dataset_path=prompt_dataset_importer.outputs[ + 'imported_data_path' + ], + train_steps=reinforcement_learning_train_steps, + accelerator_type=machine_spec.outputs['accelerator_type'], + accelerator_count=machine_spec.outputs['accelerator_count'], + large_model_reference=reference_model_metadata.outputs[ + 'large_model_reference' + ], + reward_model_reference=reference_model_metadata.outputs[ + 'reward_model_reference' + ], + machine_type=machine_spec.outputs['machine_type'], + image_uri=rl_image_uri.output, + inputs_sequence_length=prompt_sequence_length, + targets_sequence_length=target_sequence_length, + batch_size=batch_size, + learning_rate_multiplier=reinforcement_learning_rate_multiplier, + kl_coeff=kl_coeff, + lora_dim=policy_model_lora_dim, + ) + .set_display_name('Reinforcer') + .set_caching_options(False) + ) should_perform_inference = function_based.value_exists(value=eval_dataset) with kfp.dsl.Condition( From cc9d7e79708d1d8f5a66379c2a33ec0aea3f4808 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 2 Oct 2023 15:01:26 -0700 Subject: [PATCH 180/253] chore(components): fix GCPC readthedocs dropdown versions PiperOrigin-RevId: 570190000 --- components/google-cloud/docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/docs/source/conf.py b/components/google-cloud/docs/source/conf.py index 3f2f7de1c4..a12613d28f 100644 --- a/components/google-cloud/docs/source/conf.py +++ b/components/google-cloud/docs/source/conf.py @@ -193,7 +193,7 @@ def __getitem__(self, type_) -> str: }], 'font': {'text': 'Open Sans'}, 'version_dropdown': True, - 'version_json': 'https://raw.githubusercontent.com/kubeflow/pipelines/test-gcpc-dropdown/versions.json', + 'version_json': 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/google-cloud/docs/source/versions.json', # "toc_title_is_page_title": True, } # Add any paths that contain templates here, relative to this directory. From 3581146c5d3d37c08f17eef6032b91af499b64b5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 2 Oct 2023 16:38:08 -0700 Subject: [PATCH 181/253] chore(components): add `build.os` field to readthedocs config PiperOrigin-RevId: 570215323 --- components/google-cloud/docs/.readthedocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/google-cloud/docs/.readthedocs.yml b/components/google-cloud/docs/.readthedocs.yml index 2febfec5c1..028312faf7 100644 --- a/components/google-cloud/docs/.readthedocs.yml +++ b/components/google-cloud/docs/.readthedocs.yml @@ -9,3 +9,5 @@ python: path: components/google-cloud extra_requirements: - docs +build: + os: ubuntu-22.04 \ No newline at end of file From 42df5e1301de4300c2b84205ef22c786f791d4c3 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 2 Oct 2023 17:02:57 -0700 Subject: [PATCH 182/253] fix(components): Fix the feature transform engine arguments PiperOrigin-RevId: 570221353 --- components/google-cloud/RELEASE.md | 1 + .../forecasting/forecasting_ensemble.py | 3 +- .../forecasting/forecasting_stage_1_tuner.py | 5 +- .../forecasting/forecasting_stage_2_tuner.py | 5 +- .../learn_to_learn_forecasting_pipeline.yaml | 758 ++++----- ...ence_to_sequence_forecasting_pipeline.yaml | 760 ++++----- ...sion_transformer_forecasting_pipeline.yaml | 760 ++++----- ...es_dense_encoder_forecasting_pipeline.yaml | 758 ++++----- .../preview/automl/tabular/__init__.py | 1 + .../tabular/auto_feature_engineering.py | 3 +- ...ml_tabular_feature_selection_pipeline.yaml | 1312 ++++++++-------- .../tabular/automl_tabular_v2_pipeline.yaml | 1380 ++++++++--------- ...illation_stage_feature_transform_engine.py | 4 +- .../automl/tabular/feature_selection.py | 5 +- .../tabular/feature_selection_pipeline.yaml | 99 +- .../tabular/feature_transform_engine.py | 68 +- .../tabnet_hyperparameter_tuning_job.py | 5 +- ...et_hyperparameter_tuning_job_pipeline.yaml | 379 +++-- .../preview/automl/tabular/tabnet_trainer.py | 5 +- .../tabular/tabnet_trainer_pipeline.yaml | 371 ++--- .../preview/automl/tabular/utils.py | 97 +- ...wide_and_deep_hyperparameter_tuning_job.py | 5 +- ...ep_hyperparameter_tuning_job_pipeline.yaml | 328 ++-- .../automl/tabular/wide_and_deep_trainer.py | 5 +- .../wide_and_deep_trainer_pipeline.yaml | 322 ++-- .../xgboost_hyperparameter_tuning_job.py | 1 + ...st_hyperparameter_tuning_job_pipeline.yaml | 330 ++-- .../preview/automl/tabular/xgboost_trainer.py | 1 + .../tabular/xgboost_trainer_pipeline.yaml | 322 ++-- .../v1/automl/forecasting/__init__.py | 1 + .../bqml_arima_predict_pipeline.yaml | 53 +- .../bqml_arima_train_pipeline.yaml | 234 ++- .../forecasting/prophet_predict_pipeline.yaml | 233 +-- .../v1/automl/forecasting/prophet_trainer.py | 8 +- .../forecasting/prophet_trainer_pipeline.yaml | 282 ++-- .../v1/automl/forecasting/utils.py | 12 +- .../v1/automl/tabular/__init__.py | 1 + .../tabular/automl_tabular_pipeline.yaml | 1308 ++++++++-------- .../v1/automl/tabular/cv_trainer.py | 5 +- .../v1/automl/tabular/ensemble.py | 5 +- .../v1/automl/tabular/finalizer.py | 3 +- .../v1/automl/tabular/infra_validator.py | 3 +- .../automl/tabular/split_materialized_data.py | 3 +- .../v1/automl/tabular/stage_1_tuner.py | 5 +- .../automl/tabular/stats_and_example_gen.py | 5 +- .../training_configurator_and_validator.py | 3 +- .../v1/automl/tabular/transform.py | 5 +- .../v1/automl/tabular/utils.py | 59 +- 48 files changed, 4750 insertions(+), 5571 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index e166fc215b..35d7e2d22b 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,6 @@ ## Upcoming release * Disable caching for LLM pipeline tasks that store temporary artifacts. +* Fix the mismatched arguments in 2.4.0 for the Feature Transform Engine component. ## Release 2.4.0 * Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py index 8782c6d880..d683aeceef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_ensemble.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Forecasting Ensemble component spec.""" from typing import Optional @@ -71,7 +72,7 @@ def automl_forecasting_ensemble( # fmt: on job_id = dsl.PIPELINE_JOB_ID_PLACEHOLDER task_id = dsl.PIPELINE_TASK_ID_PLACEHOLDER - image_uri = 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325' + image_uri = 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125' display_name = f'automl-forecasting-ensemble-{job_id}-{task_id}' error_file_path = f'{root_dir}/{job_id}/{task_id}/error.pb' diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py index ea34e5bcb6..aaef5f438d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_1_tuner.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Forecasting Stage 1 Tuner component spec.""" from typing import Optional @@ -98,14 +99,14 @@ def automl_forecasting_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125', '", "args": ["forecasting_mp_l2l_stage_1_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125', '", "--reduce_search_space_mode=', reduce_search_space_mode, f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py index 361dce3d06..f33a2be92d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/forecasting_stage_2_tuner.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Forecasting Stage 2 Tuner component spec.""" from typing import Optional @@ -96,14 +97,14 @@ def automl_forecasting_stage_2_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125', '", "args": ["forecasting_mp_l2l_stage_2_tuner', '", "--region=', location, '", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125', f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}', '", "--training_base_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml index db07733ed7..ea5e0f0de2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/learn_to_learn_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: learn-to-learn-forecasting -# Description: Train a model using Tabular Workflows for Learn to Learn Forecasting pipelines. +# Description: The AutoML Forecasting pipeline. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] @@ -663,7 +663,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -955,7 +955,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -976,6 +976,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1030,7 +1033,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1070,7 +1073,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1211,7 +1214,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri inputDefinitions: @@ -1389,7 +1392,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -1670,7 +1673,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1691,6 +1694,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1745,7 +1751,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1785,7 +1791,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1926,7 +1932,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri-2 inputDefinitions: @@ -2576,118 +2582,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2711,118 +2661,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -3006,61 +2900,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -4059,7 +3898,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4102,7 +3941,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4110,25 +3949,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4154,7 +3993,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4170,9 +4009,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4231,47 +4070,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4306,11 +4149,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4319,38 +4162,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4361,14 +4209,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4388,9 +4236,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4428,7 +4274,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4436,7 +4282,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4447,13 +4293,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -4521,7 +4373,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4564,7 +4416,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4572,25 +4424,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4616,7 +4468,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4632,9 +4484,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4693,47 +4545,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4768,11 +4624,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4781,38 +4637,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4823,14 +4684,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4850,9 +4711,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4890,7 +4749,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4898,7 +4757,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4909,13 +4768,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5192,9 +5057,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5216,9 +5089,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5238,24 +5111,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5278,7 +5166,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5287,6 +5177,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -5297,9 +5189,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5321,9 +5221,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5343,24 +5243,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5383,7 +5298,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5392,6 +5309,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -5887,7 +5806,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5921,7 +5840,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5956,11 +5875,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5999,11 +5918,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6042,7 +5961,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6064,7 +5983,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6126,7 +6045,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6187,6 +6106,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6202,7 +6123,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6215,8 +6136,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6226,7 +6147,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -6238,6 +6159,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6253,7 +6176,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6266,8 +6189,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6277,7 +6200,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-transform-engine: container: args: @@ -6362,8 +6285,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6379,15 +6302,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6403,7 +6319,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6437,7 +6353,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6471,7 +6387,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6506,7 +6422,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6541,7 +6457,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6557,10 +6473,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6577,7 +6493,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6593,10 +6509,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6613,7 +6529,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6629,7 +6545,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-predictions-column-2: container: args: @@ -6642,7 +6558,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6658,7 +6574,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-importer: importer: artifactUri: @@ -6798,7 +6714,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -6847,7 +6763,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-forecasting: container: args: @@ -7000,6 +6916,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7018,12 +6936,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -7044,6 +6964,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7062,12 +6984,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -7138,7 +7062,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7188,7 +7112,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -7234,7 +7158,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-string-not-empty: container: args: @@ -7247,7 +7171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7277,7 +7201,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7300,7 +7224,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -7313,7 +7237,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7336,7 +7260,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -7381,7 +7305,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The AutoML Forecasting pipeline. name: learn-to-learn-forecasting @@ -7976,4 +7900,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml index a56d98b784..a15f89d4f5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/sequence_to_sequence_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: sequence-to-sequence-forecasting -# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. Seq2seq is a simple model, and can be trained very fast, but accuracy is not its strength +# Description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] @@ -661,7 +661,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -949,7 +949,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -970,6 +970,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1025,7 +1028,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1065,7 +1068,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1206,7 +1209,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri inputDefinitions: @@ -1382,7 +1385,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -1659,7 +1662,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1680,6 +1683,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1735,7 +1741,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1775,7 +1781,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1916,7 +1922,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri-2 inputDefinitions: @@ -2369,7 +2375,7 @@ components: componentInputParameter: pipelinechannel--context_window enable_probabilistic_inference: runtimeValue: - constant: 0.0 + constant: false forecast_horizon: componentInputParameter: pipelinechannel--forecast_horizon forecasting_model_type: @@ -2558,118 +2564,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2693,118 +2643,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2988,61 +2882,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -4041,7 +3880,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4084,7 +3923,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4092,25 +3931,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4136,7 +3975,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4152,9 +3991,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4213,47 +4052,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4288,11 +4131,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4301,38 +4144,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4343,14 +4191,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4370,9 +4218,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4410,7 +4256,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4418,7 +4264,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4429,13 +4275,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -4503,7 +4355,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4546,7 +4398,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4554,25 +4406,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4598,7 +4450,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4614,9 +4466,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4675,47 +4527,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4750,11 +4606,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4763,38 +4619,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4805,14 +4666,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4832,9 +4693,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4872,7 +4731,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4880,7 +4739,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4891,13 +4750,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5174,9 +5039,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5198,9 +5071,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5220,24 +5093,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5260,7 +5148,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5269,6 +5159,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -5279,9 +5171,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5303,9 +5203,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5325,24 +5225,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5365,7 +5280,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5374,6 +5291,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -5869,7 +5788,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5903,7 +5822,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5938,11 +5857,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5981,11 +5900,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6024,7 +5943,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6046,7 +5965,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6108,7 +6027,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6169,6 +6088,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6184,7 +6105,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6197,8 +6118,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6208,7 +6129,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -6220,6 +6141,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6235,7 +6158,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6248,8 +6171,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6259,7 +6182,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-transform-engine: container: args: @@ -6344,8 +6267,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6361,15 +6284,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6385,7 +6301,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6419,7 +6335,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6453,7 +6369,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6488,7 +6404,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6523,7 +6439,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6539,10 +6455,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6559,7 +6475,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6575,10 +6491,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6595,7 +6511,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6611,7 +6527,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-predictions-column-2: container: args: @@ -6624,7 +6540,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6640,7 +6556,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-importer: importer: artifactUri: @@ -6780,7 +6696,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -6829,7 +6745,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-forecasting: container: args: @@ -6982,6 +6898,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7000,12 +6918,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -7026,6 +6946,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7044,12 +6966,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -7120,7 +7044,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7170,7 +7094,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -7216,7 +7140,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-string-not-empty: container: args: @@ -7229,7 +7153,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7259,7 +7183,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7282,7 +7206,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -7295,7 +7219,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7318,7 +7242,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -7363,7 +7287,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The Sequence to Sequence (Seq2Seq) Forecasting pipeline. name: sequence-to-sequence-forecasting @@ -7935,4 +7859,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml index 073c9cc94f..87b9f8f710 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/temporal_fusion_transformer_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: temporal-fusion-transformer-forecasting -# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. The TFT model can produce the feature importance using the built-in mask for each feature, which leads to a very cheap prediction explanability overhead. With this model, the user won't need to explicitly enable the explanability support during serving to get the feature importance for each feature column. +# Description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] @@ -661,7 +661,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true selected_trials: runtimeValue: constant: 1.0 @@ -948,7 +948,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -969,6 +969,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1024,7 +1027,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1064,7 +1067,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1205,7 +1208,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri inputDefinitions: @@ -1382,7 +1385,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false selected_trials: runtimeValue: constant: 1.0 @@ -1658,7 +1661,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1679,6 +1682,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1734,7 +1740,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1774,7 +1780,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1915,7 +1921,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri-2 inputDefinitions: @@ -2364,7 +2370,7 @@ components: componentInputParameter: pipelinechannel--context_window enable_probabilistic_inference: runtimeValue: - constant: 0.0 + constant: false forecast_horizon: componentInputParameter: pipelinechannel--forecast_horizon forecasting_model_type: @@ -2551,118 +2557,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2686,118 +2636,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2981,61 +2875,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -4034,7 +3873,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4077,7 +3916,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4085,25 +3924,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4129,7 +3968,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4145,9 +3984,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4206,47 +4045,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4281,11 +4124,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4294,38 +4137,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4336,14 +4184,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4363,9 +4211,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4403,7 +4249,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4411,7 +4257,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4422,13 +4268,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -4496,7 +4348,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4539,7 +4391,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4547,25 +4399,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4591,7 +4443,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4607,9 +4459,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4668,47 +4520,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4743,11 +4599,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4756,38 +4612,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4798,14 +4659,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4825,9 +4686,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4865,7 +4724,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4873,7 +4732,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4884,13 +4743,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5167,9 +5032,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5191,9 +5064,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5213,24 +5086,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5253,7 +5141,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5262,6 +5152,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -5272,9 +5164,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5296,9 +5196,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5318,24 +5218,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5358,7 +5273,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5367,6 +5284,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -5862,7 +5781,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5896,7 +5815,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5931,11 +5850,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5974,11 +5893,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6017,7 +5936,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6039,7 +5958,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6101,7 +6020,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6162,6 +6081,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6177,7 +6098,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6190,8 +6111,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6201,7 +6122,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -6213,6 +6134,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6228,7 +6151,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6241,8 +6164,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6252,7 +6175,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-transform-engine: container: args: @@ -6337,8 +6260,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6354,15 +6277,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6378,7 +6294,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6412,7 +6328,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6446,7 +6362,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6481,7 +6397,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6516,7 +6432,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6532,10 +6448,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6552,7 +6468,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6568,10 +6484,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6588,7 +6504,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6604,7 +6520,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-predictions-column-2: container: args: @@ -6617,7 +6533,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6633,7 +6549,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-importer: importer: artifactUri: @@ -6773,7 +6689,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -6822,7 +6738,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-forecasting: container: args: @@ -6975,6 +6891,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -6993,12 +6911,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -7019,6 +6939,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7037,12 +6959,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -7113,7 +7037,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7163,7 +7087,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -7209,7 +7133,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-string-not-empty: container: args: @@ -7222,7 +7146,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7252,7 +7176,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7275,7 +7199,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -7288,7 +7212,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7311,7 +7235,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -7356,7 +7280,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The Temporal Fusion Transformer (TFT) Forecasting pipeline. name: temporal-fusion-transformer-forecasting @@ -7921,4 +7845,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml index 5d23d21761..50d25236fa 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/forecasting/time_series_dense_encoder_forecasting_pipeline.yaml @@ -1,6 +1,6 @@ # PIPELINE DEFINITION # Name: time-series-dense-encoder-forecasting -# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. TiDE is a new model type in Vertex Forecasting and has the best training and inference performance while not sacrificing any model quality. +# Description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. # Inputs: # available_at_forecast_columns: list # context_window: int [Default: 0.0] @@ -663,7 +663,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -955,7 +955,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -976,6 +976,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1030,7 +1033,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1070,7 +1073,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1211,7 +1214,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri inputDefinitions: @@ -1389,7 +1392,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false selected_trials: componentInputParameter: pipelinechannel--num_selected_trials stage_1_num_parallel_trials: @@ -1670,7 +1673,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1691,6 +1694,9 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + runtimeValue: + constant: forecasting project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1745,7 +1751,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1785,7 +1791,7 @@ components: componentInputParameter: pipelinechannel--encryption_spec_key_name generate_explanation: runtimeValue: - constant: 0.0 + constant: false instances_format: runtimeValue: constant: bigquery @@ -1926,7 +1932,7 @@ components: parameters: use_bq_prefix: runtimeValue: - constant: 1.0 + constant: true taskInfo: name: table-to-uri-2 inputDefinitions: @@ -2576,118 +2582,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2711,118 +2661,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -3006,61 +2900,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -4059,7 +3898,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4102,7 +3941,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4110,25 +3949,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4154,7 +3993,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4170,9 +4009,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4231,47 +4070,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4306,11 +4149,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4319,38 +4162,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4361,14 +4209,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4388,9 +4236,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4428,7 +4274,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4436,7 +4282,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4447,13 +4293,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -4521,7 +4373,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -4564,7 +4416,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -4572,25 +4424,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -4616,7 +4468,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -4632,9 +4484,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4693,47 +4545,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -4768,11 +4624,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -4781,38 +4637,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -4823,14 +4684,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -4850,9 +4711,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -4890,7 +4749,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -4898,7 +4757,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -4909,13 +4768,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5192,9 +5057,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5216,9 +5089,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5238,24 +5111,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5278,7 +5166,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5287,6 +5177,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -5297,9 +5189,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -5321,9 +5221,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -5343,24 +5243,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -5383,7 +5298,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -5392,6 +5309,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -5887,7 +5806,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5921,7 +5840,7 @@ deploymentSpec: - '{"display_name": "automl-forecasting-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", "encryption_spec": {"kms_key_name": "{{$.inputs.parameters[''encryption_spec_key_name'']}}"}, "job_spec": {"worker_pool_specs": [{"replica_count": 1, "machine_spec": - {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + {"machine_type": "n1-highmem-8"}, "container_spec": {"image_uri": "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "args": ["forecasting_mp_ensemble", "--transform_output_path={{$.inputs.artifacts[''transform_output''].uri}}", "--error_file_path={{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb", "--metadata_path={{$.inputs.artifacts[''metadata''].uri}}", "--tuning_result_input_path={{$.inputs.artifacts[''tuning_result_input''].uri}}", @@ -5956,11 +5875,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_1_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", @@ -5999,11 +5918,11 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"args\": [\"forecasting_mp_l2l_stage_2_tuner", "\", \"--region=", "{{$.inputs.parameters[''location'']}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--training_docker_uri=", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20230910_1325", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/forecasting-training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}", "\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train", "\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -6042,7 +5961,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -6064,7 +5983,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6126,7 +6045,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6187,6 +6106,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6202,7 +6123,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6215,8 +6136,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6226,7 +6147,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -6238,6 +6159,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -6253,7 +6176,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -6266,8 +6189,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -6277,7 +6200,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-transform-engine: container: args: @@ -6362,8 +6285,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -6379,15 +6302,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -6403,7 +6319,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6437,7 +6353,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6471,7 +6387,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6506,7 +6422,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6541,7 +6457,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6557,10 +6473,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6577,7 +6493,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6593,10 +6509,10 @@ deploymentSpec: Returns the prediction image corresponding to the given model type.\"\"\"\ \n # Keys come from AutoMlTimeSeriesForecastingTrainSpec.\n # The URIs\ \ must be hardcoded without any breaks in the code so string\n # replacement\ - \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20230910_1325',\n\ - \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20230910_1325',\n\ - \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20230910_1325',\n\ - \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20230910_1325',\n\ + \ will work correctly.\n images = {\n 'l2l': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-l2l:20231002_0125',\n\ + \ 'seq2seq': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-seq2seq:20231002_0125',\n\ + \ 'tft': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tft:20231002_0125',\n\ + \ 'tide': 'us-docker.pkg.dev/vertex-ai/automl-tabular/forecasting-prediction-server-tide:20231002_0125',\n\ \ }\n if model_type not in images:\n raise ValueError(\n f'Invalid\ \ forecasting model type: {model_type}. Valid options are: '\n f'{images.keys()}.'\n\ \ )\n return images[model_type]\n\n" @@ -6613,7 +6529,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6629,7 +6545,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-predictions-column-2: container: args: @@ -6642,7 +6558,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -6658,7 +6574,7 @@ deploymentSpec: \ str) -> str:\n \"\"\"Generates the BP output's target column name.\"\"\ \"\n if forecasting_type == 'quantile':\n return f'predicted_{target_column}.quantile_predictions'\n\ \ return f'predicted_{target_column}.value'\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-importer: importer: artifactUri: @@ -6798,7 +6714,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -6847,7 +6763,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-forecasting: container: args: @@ -7000,6 +6916,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7018,12 +6936,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -7044,6 +6964,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -7062,12 +6984,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -7138,7 +7062,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7188,7 +7112,7 @@ deploymentSpec: \ 'model_display_name',\n 'transformations',\n ],\n\ \ )(\n data_source_csv_filenames,\n data_source_bigquery_table_path,\n\ \ model_display_name,\n transformations,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -7234,7 +7158,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-string-not-empty: container: args: @@ -7247,7 +7171,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7277,7 +7201,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7300,7 +7224,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -7313,7 +7237,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -7336,7 +7260,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -7381,7 +7305,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The Timeseries Dense Encoder (TiDE) Forecasting pipeline. name: time-series-dense-encoder-forecasting @@ -7976,4 +7900,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py index 39db8e0e17..4268da69ff 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Preview AutoML tabular components.""" import os diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py index d23a97d116..b6d87ac94f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/auto_feature_engineering.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Auto Feature Engineering component spec.""" from typing import Optional @@ -64,7 +65,7 @@ def automated_feature_engineering( ' 1, "machine_spec": {"machine_type": "n1-standard-16"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["feature_engineering", "--project=', project, '", "--location=', location, '", "--data_source_bigquery_table_path=', data_source_bigquery_table_path, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml index 134953ae7c..ed45c6e92c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_feature_selection_pipeline.yaml @@ -1401,7 +1401,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -1677,7 +1677,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1698,6 +1698,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1724,7 +1726,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -2149,7 +2151,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -2734,7 +2736,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -2755,6 +2757,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -2781,7 +2785,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -3114,7 +3118,7 @@ components: componentInputParameter: pipelinechannel--root_dir run_distillation: runtimeValue: - constant: 1.0 + constant: true single_run_max_secs: componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs worker_pool_specs_override_json: @@ -3554,7 +3558,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-3 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -3575,6 +3579,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -3601,7 +3607,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -4523,118 +4529,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4658,118 +4608,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4793,118 +4687,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -5303,7 +5141,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -5346,7 +5184,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5354,25 +5192,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5398,7 +5236,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5414,9 +5252,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5475,47 +5313,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -5550,11 +5392,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5563,38 +5405,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -5605,14 +5452,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -5632,9 +5479,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -5672,7 +5517,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -5680,7 +5525,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -5691,13 +5536,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5765,7 +5616,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -5808,7 +5659,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5816,25 +5667,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5860,7 +5711,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5876,9 +5727,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5937,47 +5788,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6012,11 +5867,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6025,38 +5880,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6067,14 +5927,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -6094,9 +5954,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6134,7 +5992,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6142,7 +6000,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6153,13 +6011,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -6227,7 +6091,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6270,7 +6134,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6278,25 +6142,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6322,7 +6186,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6338,9 +6202,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6399,47 +6263,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6474,11 +6342,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6487,38 +6355,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6529,14 +6402,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -6556,9 +6429,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6596,7 +6467,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6604,7 +6475,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6615,13 +6486,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -6689,7 +6566,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6732,7 +6609,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6740,25 +6617,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6784,7 +6661,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6800,9 +6677,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6861,47 +6738,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6936,11 +6817,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6949,38 +6830,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6991,14 +6877,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7018,9 +6904,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7058,7 +6942,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -7066,7 +6950,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -7077,13 +6961,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7151,7 +7041,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -7194,7 +7084,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7202,25 +7092,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7246,7 +7136,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7262,9 +7152,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7323,47 +7213,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -7398,11 +7292,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7411,38 +7305,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -7453,14 +7352,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7480,9 +7379,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7520,7 +7417,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -7528,7 +7425,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -7539,13 +7436,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7864,9 +7767,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -7888,9 +7799,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -7910,24 +7821,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -7950,7 +7876,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -7959,6 +7887,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -7969,9 +7899,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -7993,9 +7931,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -8015,24 +7953,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -8055,7 +8008,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -8064,6 +8019,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-3: @@ -8074,9 +8031,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -8098,9 +8063,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -8120,24 +8085,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -8160,7 +8140,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -8169,6 +8151,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -8811,9 +8795,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8854,9 +8838,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8897,7 +8881,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8909,7 +8893,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8938,7 +8922,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8950,7 +8934,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8979,7 +8963,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8991,7 +8975,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -9020,7 +9004,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -9035,7 +9019,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9044,7 +9028,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9053,7 +9037,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -9073,9 +9057,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9120,9 +9104,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -9167,7 +9151,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9188,7 +9172,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -9219,7 +9203,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9240,7 +9224,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -9267,7 +9251,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9295,7 +9279,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9323,7 +9307,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9351,7 +9335,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9449,7 +9433,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9547,7 +9531,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9580,6 +9564,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9595,7 +9581,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9608,8 +9594,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9619,7 +9605,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -9631,6 +9617,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9646,7 +9634,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9659,8 +9647,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9670,7 +9658,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-3: container: args: @@ -9682,6 +9670,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9697,7 +9687,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9710,8 +9700,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9721,7 +9711,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-importer: importer: artifactUri: @@ -9741,7 +9731,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9936,7 +9926,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -9985,7 +9975,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-3: container: args: @@ -10034,7 +10024,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-4: container: args: @@ -10083,7 +10073,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-5: container: args: @@ -10132,7 +10122,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -10348,6 +10338,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10366,12 +10358,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -10392,6 +10386,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10410,12 +10406,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-3: container: args: @@ -10436,6 +10434,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10454,12 +10454,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -10559,7 +10561,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10615,7 +10617,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10649,7 +10651,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10683,7 +10685,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10717,7 +10719,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"feature_selection\", \"--data_source=", "{{$.inputs.artifacts[''data_source''].uri}}", "\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", "\", \"--prediction_type=", "{{$.inputs.parameters[''prediction_type'']}}", @@ -10730,7 +10732,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", @@ -10763,7 +10765,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10796,7 +10798,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", @@ -10834,7 +10836,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10870,7 +10872,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -11483,4 +11485,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml index aa56e2832e..6b9892f263 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/automl_tabular_v2_pipeline.yaml @@ -1160,7 +1160,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -1491,7 +1491,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1512,6 +1512,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1538,7 +1540,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -1976,7 +1978,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -2670,7 +2672,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -2691,6 +2693,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -2717,7 +2721,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -3049,7 +3053,7 @@ components: componentInputParameter: pipelinechannel--root_dir run_distillation: runtimeValue: - constant: 1.0 + constant: true single_run_max_secs: componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs worker_pool_specs_override_json: @@ -3495,7 +3499,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-3 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -3516,6 +3520,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -3542,7 +3548,7 @@ components: componentInputParameter: pipelinechannel--root_dir generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: bigquery @@ -4519,118 +4525,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4654,118 +4604,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4789,118 +4683,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -5084,61 +4922,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -6242,7 +6025,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6285,7 +6068,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6293,25 +6076,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6337,7 +6120,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6353,9 +6136,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6414,47 +6197,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6489,11 +6276,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6502,38 +6289,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6544,14 +6336,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -6571,9 +6363,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6611,7 +6401,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6619,7 +6409,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6630,13 +6420,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -6704,7 +6500,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6747,7 +6543,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6755,25 +6551,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6799,7 +6595,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6815,9 +6611,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6876,47 +6672,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6951,11 +6751,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6964,38 +6764,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -7006,14 +6811,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7033,9 +6838,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7073,7 +6876,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -7081,7 +6884,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -7092,13 +6895,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7166,7 +6975,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -7209,7 +7018,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7217,25 +7026,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7261,7 +7070,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7277,9 +7086,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7338,47 +7147,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -7413,11 +7226,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7426,38 +7239,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -7468,14 +7286,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7495,9 +7313,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7535,7 +7351,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -7543,7 +7359,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -7554,13 +7370,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7628,7 +7450,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -7671,7 +7493,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7679,25 +7501,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7723,7 +7545,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7739,9 +7561,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7800,47 +7622,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -7875,11 +7701,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7888,38 +7714,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -7930,14 +7761,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7957,9 +7788,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7997,7 +7826,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -8005,7 +7834,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -8016,13 +7845,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -8090,7 +7925,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -8133,7 +7968,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -8141,25 +7976,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -8185,7 +8020,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -8201,9 +8036,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -8262,47 +8097,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -8337,11 +8176,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -8350,38 +8189,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -8392,14 +8236,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -8419,9 +8263,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -8459,7 +8301,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -8467,7 +8309,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -8478,13 +8320,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -8803,9 +8651,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -8827,9 +8683,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -8849,24 +8705,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -8889,7 +8760,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -8898,6 +8771,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -8908,9 +8783,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -8932,9 +8815,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -8954,24 +8837,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -8994,7 +8892,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -9003,6 +8903,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-3: @@ -9013,9 +8915,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -9037,9 +8947,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -9059,24 +8969,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -9099,7 +9024,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -9108,6 +9035,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -9871,9 +9800,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -9914,9 +9843,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -9957,7 +9886,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -9969,7 +9898,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -9998,7 +9927,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -10010,7 +9939,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -10039,7 +9968,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -10051,7 +9980,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -10080,7 +10009,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -10095,7 +10024,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -10104,7 +10033,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -10113,7 +10042,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -10133,9 +10062,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -10180,9 +10109,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -10223,7 +10152,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10251,7 +10180,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10279,7 +10208,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10307,7 +10236,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10405,7 +10334,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10524,14 +10453,14 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' - '{"Concat": ["--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}"]}' - '{"Concat": ["--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - '{"Concat": ["--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -10546,6 +10475,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -10561,7 +10492,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -10574,8 +10505,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -10585,7 +10516,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -10597,6 +10528,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -10612,7 +10545,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -10625,8 +10558,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -10636,7 +10569,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-3: container: args: @@ -10648,6 +10581,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -10663,7 +10598,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -10676,8 +10611,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -10687,7 +10622,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-transform-engine: container: args: @@ -10772,8 +10707,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -10789,15 +10724,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -10813,7 +10741,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10851,7 +10779,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10889,7 +10817,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10923,7 +10851,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10957,7 +10885,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10983,7 +10911,7 @@ deploymentSpec: \ )\n\n return collections.namedtuple(\n 'Outputs',\n [\n \ \ 'transform_config_path',\n ],\n )(\n transform_config_path,\n\ \ )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-importer: importer: artifactUri: @@ -11003,7 +10931,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -11198,7 +11126,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -11247,7 +11175,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-3: container: args: @@ -11296,7 +11224,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-4: container: args: @@ -11345,7 +11273,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-5: container: args: @@ -11394,7 +11322,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -11610,6 +11538,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -11628,12 +11558,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -11654,6 +11586,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -11672,12 +11606,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-3: container: args: @@ -11698,6 +11634,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -11716,12 +11654,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -11821,7 +11761,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -11865,7 +11805,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -11911,7 +11851,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-split-materialized-data-2: container: args: @@ -11957,7 +11897,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-string-not-empty: container: args: @@ -11970,7 +11910,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -12032,7 +11972,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-training-configurator-and-validator-2: container: args: @@ -12077,7 +12017,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The AutoML Tabular pipeline v2. name: automl-tabular-v2 @@ -12758,4 +12698,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py index eda3503d90..cb17bb7073 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/distillation_stage_feature_transform_engine.py @@ -75,7 +75,7 @@ def distillation_stage_feature_transform_engine( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125', command=[], args=[ 'distillation_stage_feature_transform_engine', @@ -183,7 +183,7 @@ def distillation_stage_feature_transform_engine( dataflow_machine_type, ] ), - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', dsl.ConcatPlaceholder( items=[ '--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py index 5478275e09..0bcd1a8915 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Feature Ranking and Selection component spec.""" from typing import Optional @@ -99,7 +100,7 @@ def tabular_feature_ranking_and_selection( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["feature_selection", "--data_source=', data_source.uri, '", "--target_column=', @@ -136,7 +137,7 @@ def tabular_feature_ranking_and_selection( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml index 435d8dae31..6934ea26e7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_selection_pipeline.yaml @@ -202,61 +202,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1224,8 +1169,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -1241,15 +1186,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -1297,7 +1235,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: Defines pipeline for feature transform engine component. name: feature-selection @@ -1333,27 +1271,6 @@ root: componentInputParameter: dataset_level_custom_transformation_definitions dataset_level_transformations: componentInputParameter: dataset_level_transformations - embedding_batch_prediction_accelerator_count: - runtimeValue: - constant: -1.0 - embedding_batch_prediction_accelerator_type: - runtimeValue: - constant: accelerator_type_unspecified - embedding_batch_prediction_batch_size: - runtimeValue: - constant: -1.0 - embedding_batch_prediction_machine_type: - runtimeValue: - constant: '' - embedding_batch_prediction_max_replica_count: - runtimeValue: - constant: -1.0 - embedding_batch_prediction_starting_replica_count: - runtimeValue: - constant: -1.0 - embedding_prediction_server_docker_uri: - runtimeValue: - constant: '' encryption_spec_key_name: componentInputParameter: encryption_spec_key_name feature_selection_algorithm: @@ -1477,7 +1394,7 @@ root: constant: -1.0 enable_probabilistic_inference: runtimeValue: - constant: 0.0 + constant: false forecast_horizon: runtimeValue: constant: -1.0 @@ -1493,10 +1410,10 @@ root: componentInputParameter: prediction_type run_distill: runtimeValue: - constant: 0.0 + constant: false run_evaluation: runtimeValue: - constant: 0.0 + constant: false split_example_counts: taskOutputParameter: outputParameterKey: split_example_counts @@ -1635,4 +1552,4 @@ root: isOptional: true parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py index 48741880e4..6316f5de08 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/feature_transform_engine.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Feature Transform Engine component spec.""" from typing import Optional @@ -91,15 +92,6 @@ def feature_transform_engine( group_total_weight: float = 0.0, temporal_total_weight: float = 0.0, group_temporal_total_weight: float = 0.0, - embedding_prediction_server_docker_uri: Optional[str] = '', - embedding_batch_prediction_machine_type: Optional[str] = '', - embedding_batch_prediction_accelerator_type: Optional[ - str - ] = 'accelerator_type_unspecified', - embedding_batch_prediction_accelerator_count: Optional[int] = -1, - embedding_batch_prediction_starting_replica_count: Optional[int] = -1, - embedding_batch_prediction_max_replica_count: Optional[int] = -1, - embedding_batch_prediction_batch_size: Optional[int] = -1, ): # fmt: off """Transforms raw data to engineered features. @@ -292,14 +284,8 @@ def feature_transform_engine( dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. dataflow_service_account: Custom service account to run Dataflow jobs. encryption_spec_key_name: Customer-managed encryption key. - autodetect_csv_schema: If True, infers the column types when importing CSVs into BigQuery. - embedding_prediction_server_docker_uri: The docker image inside which to run the embedding models to generate embeddings. - embedding_batch_prediction_machine_type: The machine type to be used to run the embedding batch prediction job. If not provided, `n1-highmem-32` will be used. For more details, see: https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types - embedding_batch_prediction_accelerator_type: The accelerator type to use to generate embeddings. If not provided, no accelerator is used. More details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype - embedding_batch_prediction_accelerator_count: The number of accelerators to use to generate the embeddings. Default is 0. - embedding_batch_prediction_starting_replica_count: The starting replica count for embedding batch prediction job. Default = 20. - embedding_batch_prediction_max_replica_count: The max replica count for embedding batch prediction job. Default = 50. - embedding_batch_prediction_batch_size: The batch size for embedding batch prediction job. Default = 1024. + autodetect_csv_schema: If True, infers the column types + when importing CSVs into BigQuery. Returns: dataset_stats: The stats of the dataset. @@ -322,7 +308,7 @@ def feature_transform_engine( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125', command=[], args=[ 'feature_transform_engine', @@ -651,8 +637,8 @@ def feature_transform_engine( dsl.ConcatPlaceholder( items=['--dataflow_machine_type=', dataflow_machine_type] ), - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', - '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', + '--feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125', dsl.ConcatPlaceholder( items=['--dataflow_disk_size_gb=', dataflow_disk_size_gb] ), @@ -702,48 +688,6 @@ def feature_transform_engine( ] ), ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_prediction_server_docker_uri=', - embedding_prediction_server_docker_uri, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_machine_type=', - embedding_batch_prediction_machine_type, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_accelerator_type=', - embedding_batch_prediction_accelerator_type, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_accelerator_count=', - embedding_batch_prediction_accelerator_count, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_starting_replica_count=', - embedding_batch_prediction_starting_replica_count, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_max_replica_count=', - embedding_batch_prediction_max_replica_count, - ] - ), - dsl.ConcatPlaceholder( - items=[ - '--embedding_batch_prediction_batch_size=', - embedding_batch_prediction_batch_size, - ] - ), dsl.ConcatPlaceholder( items=[ '--encryption_spec_key_name=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py index 1ed5fdd75d..fdcf4b357c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Tabnet Hyperparameter Tuning component spec.""" from typing import Optional @@ -157,7 +158,7 @@ def tabnet_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20231002_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -165,7 +166,7 @@ def tabnet_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml index 1ff0ae13ee..afa6cf7f4c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.yaml @@ -991,61 +991,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1780,7 +1725,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -1823,7 +1768,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1831,25 +1776,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1875,7 +1820,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1891,9 +1836,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1952,47 +1897,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -2027,11 +1976,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -2040,38 +1989,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -2082,14 +2036,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2109,9 +2063,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2149,7 +2101,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2157,7 +2109,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2168,13 +2120,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2321,9 +2279,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -2345,9 +2311,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -2367,24 +2333,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -2407,7 +2388,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -2416,6 +2399,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -2426,28 +2411,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2455,7 +2437,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2465,7 +2447,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2476,22 +2458,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2511,13 +2486,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2525,15 +2503,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-parse-worker-pool-specs-override: executorLabel: exec-parse-worker-pool-specs-override @@ -3070,7 +3044,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3085,7 +3059,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3101,7 +3075,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3201,8 +3175,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3218,15 +3192,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3242,7 +3209,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3300,7 +3267,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-tabnet-study-spec-parameters: container: args: @@ -3313,7 +3280,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3816,7 +3783,7 @@ deploymentSpec: \ = ', '.join(extra_overrides)\n warnings.warn(\n f'The overrides\ \ {extra_override_str} were not found in the params and '\n 'will\ \ be ignored.'\n )\n\n return study_spec_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-model-batch-predict: container: args: @@ -3865,7 +3832,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3951,6 +3918,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -3969,12 +3938,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -3986,7 +3957,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3995,14 +3968,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-parse-worker-pool-specs-override: container: args: @@ -4015,7 +3988,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4070,7 +4043,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4114,7 +4087,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -4160,7 +4133,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-tabnet-hyperparameter-tuning-job: container: args: @@ -4188,11 +4161,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20231002_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", @@ -4261,7 +4234,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: The TabNet built-in algorithm HyperparameterTuningJob pipeline. name: automl-tabular-tabnet-hyperparameter-tuning-job @@ -4790,4 +4763,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py index db64853055..8ffa57a135 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Tabnet Trainer component spec.""" from typing import Optional @@ -164,7 +165,7 @@ def tabnet_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20231002_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -172,7 +173,7 @@ def tabnet_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml index 5a06291db8..6126b36af9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/tabnet_trainer_pipeline.yaml @@ -1020,61 +1020,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1735,7 +1680,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -1778,7 +1723,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1786,25 +1731,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1830,7 +1775,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1846,9 +1791,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1907,47 +1852,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -1982,11 +1931,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1995,38 +1944,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -2037,14 +1991,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2064,9 +2018,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2104,7 +2056,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2112,7 +2064,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2123,13 +2075,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2276,9 +2234,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -2300,9 +2266,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -2322,24 +2288,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -2362,7 +2343,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -2371,6 +2354,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -2381,28 +2366,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2410,7 +2392,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2420,7 +2402,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2431,22 +2413,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2466,13 +2441,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2480,15 +2458,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-parse-worker-pool-specs-override: executorLabel: exec-parse-worker-pool-specs-override @@ -3139,7 +3113,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3154,7 +3128,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -3170,7 +3144,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3270,8 +3244,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3287,15 +3261,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3347,7 +3314,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3433,6 +3400,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -3451,12 +3420,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -3468,7 +3439,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3477,14 +3450,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-parse-worker-pool-specs-override: container: args: @@ -3497,7 +3470,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3552,7 +3525,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3596,7 +3569,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -3642,7 +3615,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-tabnet-trainer: container: args: @@ -3660,11 +3633,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20230910_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:20231002_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -3751,7 +3724,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 pipelineInfo: description: 'Train a model using the Tabular Workflow for TabNet pipelines. @@ -4437,4 +4410,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py index f9634726db..97e6b370a1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/utils.py @@ -41,7 +41,8 @@ def _update_parameters( def _generate_model_display_name() -> str: """Automatically generates a model_display_name. - Returns: model_display_name. + Returns: + model_display_name. """ return f'tabular-workflow-model-{uuid.uuid4()}' @@ -149,10 +150,10 @@ def _get_default_pipeline_params( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -169,7 +170,7 @@ def _get_default_pipeline_params( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -224,7 +225,8 @@ def _get_default_pipeline_params( model_description: The description for the uploaded model. enable_fte: Whether to enable the Feature Transform Engine. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if not study_spec_parameters_override: study_spec_parameters_override = [] @@ -513,10 +515,10 @@ def get_automl_tabular_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -533,7 +535,7 @@ def get_automl_tabular_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -585,7 +587,8 @@ def get_automl_tabular_pipeline_and_parameters( model_description: The description for the uploaded model. enable_fte: Whether to enable the Feature Transform Engine. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = _get_default_pipeline_params( project=project, @@ -763,10 +766,10 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -783,7 +786,7 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -822,7 +825,8 @@ def get_automl_tabular_feature_selection_pipeline_and_parameters( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ model_display_name = ( model_display_name @@ -906,7 +910,8 @@ def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: Args: input_dict: The input json dictionary. - Returns: The encoded string used for parameter. + Returns: + The encoded string used for parameter. """ if not input_dict: return '' @@ -999,7 +1004,7 @@ def get_skip_architecture_search_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1016,7 +1021,7 @@ def get_skip_architecture_search_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1044,7 +1049,8 @@ def get_skip_architecture_search_pipeline_and_parameters( evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ return get_automl_tabular_pipeline_and_parameters( @@ -1275,7 +1281,7 @@ def get_wide_and_deep_trainer_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1294,12 +1300,13 @@ def get_wide_and_deep_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -1566,7 +1573,7 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1585,12 +1592,13 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'This method is deprecated. Please use' @@ -1842,7 +1850,7 @@ def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -1861,12 +1869,13 @@ def get_tabnet_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2123,7 +2132,7 @@ def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -2142,12 +2151,13 @@ def get_wide_and_deep_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2451,7 +2461,7 @@ def get_tabnet_trainer_pipeline_and_parameters( transform component. worker_pool_specs_override: The dictionary for overriding training and evaluation worker pool specs. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. run_evaluation: Whether to run evaluation steps during training. evaluation_batch_predict_machine_type: The prediction server machine type for batch predict components during evaluation. @@ -2470,12 +2480,13 @@ def get_tabnet_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if isinstance(tf_auto_transform_features, list): tf_auto_transform_features = {'auto': tf_auto_transform_features} @@ -2631,7 +2642,8 @@ def get_tabnet_study_spec_parameters_override( parameter is only used as a hint for the hyperparameter search space, unrelated to the real cost. - Returns: List of study_spec_parameters_override. + Returns: + List of study_spec_parameters_override. """ if dataset_size_bucket not in ['small', 'medium', 'large']: @@ -2675,7 +2687,8 @@ def _format_tabnet_regression_study_spec_parameters_override( parameter is only used as a hint for the hyperparameter search space, unrelated to the real cost. - Returns: List of study_spec_parameters_override for regression. + Returns: + List of study_spec_parameters_override for regression. """ # To get regression study_spec_parameters, we need to set @@ -2707,7 +2720,8 @@ def _format_tabnet_regression_study_spec_parameters_override( def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: """Get study_spec_parameters_override for a Wide & Deep hyperparameter tuning job. - Returns: List of study_spec_parameters_override. + Returns: + List of study_spec_parameters_override. """ param_path = os.path.join( pathlib.Path(__file__).parent.resolve(), @@ -2723,7 +2737,8 @@ def get_wide_and_deep_study_spec_parameters_override() -> List[Dict[str, Any]]: def get_xgboost_study_spec_parameters_override() -> List[Dict[str, Any]]: """Get study_spec_parameters_override for an XGBoost hyperparameter tuning job. - Returns: List of study_spec_parameters_override. + Returns: + List of study_spec_parameters_override. """ param_path = os.path.join( pathlib.Path(__file__).parent.resolve(), 'configs/xgboost_params.json' @@ -2949,12 +2964,13 @@ def get_xgboost_trainer_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = {} if isinstance(tf_auto_transform_features, list): @@ -3229,12 +3245,13 @@ def get_xgboost_hyperparameter_tuning_job_pipeline_and_parameters( dataflow_service_account: Custom service account to run dataflow jobs. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = {} if isinstance(tf_auto_transform_features, list): diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py index 1aba4e1c56..2abc66bcc2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Wide and Deep Hyperparameter Tuning component spec.""" from typing import Optional @@ -157,7 +158,7 @@ def wide_and_deep_hyperparameter_tuning_job( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20231002_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -165,7 +166,7 @@ def wide_and_deep_hyperparameter_tuning_job( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', '", "--prediction_docker_uri_artifact_path=', prediction_docker_uri_output, '", "--baseline_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml index 65d9adf30a..f1c88723a8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.yaml @@ -943,61 +943,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1718,7 +1663,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -1761,7 +1706,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1769,25 +1714,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1813,7 +1758,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1829,9 +1774,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1890,47 +1835,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -1965,11 +1914,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1978,38 +1927,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -2020,14 +1974,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2047,9 +2001,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2087,7 +2039,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2095,7 +2047,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2106,13 +2058,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2259,28 +2217,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2288,7 +2243,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2298,7 +2253,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2309,22 +2264,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2344,13 +2292,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2358,15 +2309,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-parse-worker-pool-specs-override: executorLabel: exec-parse-worker-pool-specs-override @@ -2903,7 +2850,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2918,7 +2865,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -2934,7 +2881,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3034,8 +2981,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3051,15 +2998,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3075,7 +3015,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3133,7 +3073,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-wide-and-deep-study-spec-parameters: container: args: @@ -3146,7 +3086,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3266,7 +3206,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3343,7 +3283,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3352,14 +3294,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-parse-worker-pool-specs-override: container: args: @@ -3372,7 +3314,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3427,7 +3369,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3471,7 +3413,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -3517,7 +3459,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -3562,7 +3504,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-wide-and-deep-hyperparameter-tuning-job: container: args: @@ -3590,11 +3532,11 @@ deploymentSpec: ", \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20231002_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--prediction_docker_uri_artifact_path=", "{{$.outputs.parameters[''prediction_docker_uri_output''].output_file}}", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", @@ -4147,4 +4089,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py index 5aac5303e0..ebee0eff06 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Wide and Deep Trainer component spec.""" from typing import Optional @@ -160,7 +161,7 @@ def wide_and_deep_trainer( ', "disk_spec": ', training_disk_spec, ', "container_spec": {"image_uri":"', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20231002_0125', '", "args": ["--target_column=', target_column, '", "--weight_column=', @@ -168,7 +169,7 @@ def wide_and_deep_trainer( '", "--model_type=', prediction_type, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', '", "--baseline_path=', instance_baseline.uri, '", "--metadata_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml index d7638c59f7..c4e5f204e1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/wide_and_deep_trainer_pipeline.yaml @@ -973,61 +973,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1688,7 +1633,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -1731,7 +1676,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1739,25 +1684,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1783,7 +1728,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1799,9 +1744,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1860,47 +1805,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -1935,11 +1884,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1948,38 +1897,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -1990,14 +1944,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2017,9 +1971,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2057,7 +2009,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2065,7 +2017,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2076,13 +2028,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2229,28 +2187,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2258,7 +2213,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2268,7 +2223,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2279,22 +2234,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2314,13 +2262,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2328,15 +2279,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-parse-worker-pool-specs-override: executorLabel: exec-parse-worker-pool-specs-override @@ -2959,7 +2906,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2974,7 +2921,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -2990,7 +2937,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3090,8 +3037,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3107,15 +3054,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3167,7 +3107,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3244,7 +3184,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3253,14 +3195,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-parse-worker-pool-specs-override: container: args: @@ -3273,7 +3215,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3328,7 +3270,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3372,7 +3314,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -3418,7 +3360,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -3463,7 +3405,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-wide-and-deep-trainer: container: args: @@ -3481,11 +3423,11 @@ deploymentSpec: "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"", "1", "\", \"machine_spec\": ", "{{$.inputs.parameters[''training_machine_spec'']}}", ", \"disk_spec\": ", "{{$.inputs.parameters[''training_disk_spec'']}}", - ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20230910_1325", + ", \"container_spec\": {\"image_uri\":\"", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:20231002_0125", "\", \"args\": [\"--target_column=", "{{$.inputs.parameters[''target_column'']}}", "\", \"--weight_column=", "{{$.inputs.parameters[''weight_column'']}}", "\", \"--model_type=", "{{$.inputs.parameters[''prediction_type'']}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", @@ -4183,4 +4125,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py index 6699703e1a..b5f3ed2357 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML XGBoost Hyperparameter Tuning component spec.""" from typing import Optional diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml index 93556638b1..79c07e0e19 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_hyperparameter_tuning_job_pipeline.yaml @@ -485,7 +485,7 @@ components: producerTask: generate-xgboost-hyperparameter-tuning-worker-pool-specs read_value_from_file: runtimeValue: - constant: 1.0 + constant: true study_spec_metric_goal: componentInputParameter: pipelinechannel--study_spec_metric_goal trials_dir: @@ -929,61 +929,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1827,7 +1772,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -1870,7 +1815,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -1878,25 +1823,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -1922,7 +1867,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1938,9 +1883,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1999,47 +1944,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -2074,11 +2023,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -2087,38 +2036,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -2129,14 +2083,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2156,9 +2110,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2196,7 +2148,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2204,7 +2156,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2215,13 +2167,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2368,28 +2326,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2397,7 +2352,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2407,7 +2362,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2418,22 +2373,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2453,13 +2401,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2467,15 +2418,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-set-optional-inputs: executorLabel: exec-set-optional-inputs @@ -2878,7 +2825,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -2900,7 +2847,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3000,8 +2947,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3017,15 +2964,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3041,7 +2981,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3095,7 +3035,7 @@ deploymentSpec: \ return re.sub(r'^/gcs/', r'gs://', path)\n\n master_worker_pool_spec\ \ = {\n 'replica_count': 1,\n 'machine_spec': {\n 'machine_type':\ \ machine_type,\n },\n 'container_spec': {\n 'image_uri':\ - \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230910_1325',\n\ + \ 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20231002_0125',\n\ \ 'args': [\n f'--job_dir={get_gcs_path(job_dir)}',\n\ \ f'--instance_schema_path={get_gcs_path(instance_schema_uri)}',\n\ \ f'--prediction_schema_path={get_gcs_path(prediction_schema_uri)}',\n\ @@ -3108,7 +3048,7 @@ deploymentSpec: \ f'--baseline_path={get_gcs_path(instance_baseline)}',\n \ \ f'--eval_metric={eval_metric}',\n f'--disable_default_eval_metric={disable_default_eval_metric}',\n\ \ f'--seed={seed}',\n f'--seed_per_iteration={seed_per_iteration}',\n\ - \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230910_1325',\n\ + \ '--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20231002_0125',\n\ \ ],\n },\n }\n\n # Add optional arguments if set\n if\ \ weight_column:\n master_worker_pool_spec['container_spec']['args'].append(\n\ \ f'--weight_column={weight_column}'\n )\n\n # Add accelerator_type\ @@ -3140,7 +3080,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3198,7 +3138,7 @@ deploymentSpec: \ = {\n 'instanceSchemaUri': instance_schema_uri,\n 'predictionSchemaUri':\ \ prediction_schema_uri,\n }\n unmanaged_container_model.uri = os.path.join(\n\ \ trials_dir, 'trial_{}'.format(best_trial['id']), 'model'\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-prediction-type-for-xgboost: container: args: @@ -3211,7 +3151,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3246,7 +3186,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3665,7 +3605,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3742,7 +3682,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3751,14 +3693,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-set-optional-inputs: container: args: @@ -3771,7 +3713,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3815,7 +3757,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -3861,7 +3803,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -3906,7 +3848,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-xgboost-hyperparameter-tuning-job: container: args: @@ -4461,4 +4403,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py index e19059a303..582efa4dc3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML XGBoost Trainer component spec.""" from typing import Optional diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml index 4222aa3987..1f761644b8 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/tabular/xgboost_trainer_pipeline.yaml @@ -1029,61 +1029,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -2091,7 +2036,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -2134,7 +2079,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -2142,25 +2087,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -2186,7 +2131,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2202,9 +2147,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -2263,47 +2208,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -2338,11 +2287,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -2351,38 +2300,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -2393,14 +2347,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -2420,9 +2374,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -2460,7 +2412,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -2468,7 +2420,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -2479,13 +2431,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -2632,28 +2590,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -2661,7 +2616,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -2671,7 +2626,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -2682,22 +2637,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -2717,13 +2665,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -2731,15 +2682,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-set-optional-inputs: executorLabel: exec-set-optional-inputs @@ -3081,7 +3028,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -3103,7 +3050,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3203,8 +3150,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -3220,15 +3167,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 30.0 @@ -3244,7 +3184,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3354,10 +3294,10 @@ deploymentSpec: \ worker pool specs.\n \"\"\"\n import copy\n import collections\n import\ \ os\n import re\n\n def get_gcs_path(path):\n return re.sub(r'/gcs/',\ \ 'gs://', path)\n\n formatted_job_dir = get_gcs_path(job_dir)\n prediction_docker_uri\ - \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20230910_1325'\n\ + \ = (\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/xgboost-prediction-server:20231002_0125'\n\ \ )\n master_worker_pool_spec = {\n 'replica_count': 1,\n 'machine_spec':\ \ {\n 'machine_type': machine_type,\n },\n 'container_spec':\ - \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20230910_1325',\n\ + \ {\n 'image_uri': 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/xgboost-training:20231002_0125',\n\ \ 'args': [\n f'--job_dir={formatted_job_dir}',\n\ \ f'--target_column={target_column}',\n f'--objective={objective}',\n\ \ f'--training_data_path={get_gcs_path(materialized_train_split)}',\n\ @@ -3428,7 +3368,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3499,7 +3439,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -3576,7 +3516,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -3585,14 +3527,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-set-optional-inputs: container: args: @@ -3605,7 +3547,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3649,7 +3591,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-split-materialized-data: container: args: @@ -3695,7 +3637,7 @@ deploymentSpec: \ 'w') as f:\n f.write(file_patterns[0])\n\n with tf.io.gfile.GFile(materialized_eval_split,\ \ 'w') as f:\n f.write(file_patterns[1])\n\n with tf.io.gfile.GFile(materialized_test_split,\ \ 'w') as f:\n f.write(file_patterns[2])\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 exec-training-configurator-and-validator: container: args: @@ -3740,7 +3682,7 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-xgboost-trainer: container: args: @@ -4525,4 +4467,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py index 09583f8b5b..6dbcd85caf 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """GA AutoML forecasting components.""" from google_cloud_pipeline_components.v1.automl.forecasting.prophet_trainer import prophet_trainer as ProphetTrainerOp diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml index dee5297a75..820de13388 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_predict_pipeline.yaml @@ -120,7 +120,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -235,7 +238,7 @@ components: producerTask: maybe-replace-with-default exists_ok: runtimeValue: - constant: 1.0 + constant: true location: taskOutputParameter: outputParameterKey: Output @@ -633,7 +636,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -655,7 +658,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-create-dataset-2: container: args: @@ -668,7 +671,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -690,7 +693,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -703,7 +706,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -724,7 +727,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-query-job: container: args: @@ -751,7 +754,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-build-job-configuration-query: container: args: @@ -764,7 +767,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -785,7 +788,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-first-valid: container: args: @@ -798,7 +801,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -815,7 +818,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-model-metadata: container: args: @@ -828,7 +831,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -854,7 +857,7 @@ deploymentSpec: \ 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n\ \ options.time_series_id_column,\n options.time_series_data_column,\n\ \ options.horizon,\n )\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-table-location: container: args: @@ -867,7 +870,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -890,7 +893,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-load-table-from-uri: container: args: @@ -903,7 +906,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -931,7 +934,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-maybe-replace-with-default: container: args: @@ -944,7 +947,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -959,7 +962,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-validate-inputs: container: args: @@ -972,7 +975,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1061,7 +1064,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 pipelineInfo: description: Forecasts using a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-prediction @@ -1081,7 +1084,7 @@ root: constant: tmp_{{$.pipeline_job_uuid}} delete_contents: runtimeValue: - constant: 1.0 + constant: true project: componentInputParameter: project taskInfo: @@ -1156,4 +1159,4 @@ root: description: The GCP project that runs the pipeline components. parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml index fd5886e218..64e19d3bab 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/bqml_arima_train_pipeline.yaml @@ -103,7 +103,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run BigQuery model creation job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run BigQuery model creation job. Defaults to the + project in which the PipelineJob is run. + isOptional: true parameterType: STRING query: description: 'SQL query text to execute. Only standard SQL is @@ -246,7 +249,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -350,7 +356,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -454,7 +463,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -558,7 +570,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -662,7 +677,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -1369,7 +1387,7 @@ components: componentInputParameter: pipelinechannel--forecast_horizon forecast_horizon_off_by_one: runtimeValue: - constant: 1.0 + constant: true splits: runtimeValue: constant: @@ -1609,7 +1627,7 @@ components: producerTask: maybe-replace-with-default exists_ok: runtimeValue: - constant: 1.0 + constant: true location: taskOutputParameter: outputParameterKey: Output @@ -1771,7 +1789,7 @@ components: parameters: autodetect_csv_schema: runtimeValue: - constant: 1.0 + constant: true bigquery_staging_full_dataset_id: runtimeValue: constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-2-dataset_id'']}}' @@ -1781,7 +1799,7 @@ components: componentInputParameter: pipelinechannel--data_source_csv_filenames forecasting_apply_windowing: runtimeValue: - constant: 0.0 + constant: false forecasting_context_window: runtimeValue: constant: 0.0 @@ -2162,61 +2180,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -3554,7 +3517,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3576,7 +3539,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-create-dataset-2: container: args: @@ -3589,7 +3552,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3611,7 +3574,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-create-model-job: container: args: @@ -3637,7 +3600,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.create_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -3650,7 +3613,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3671,7 +3634,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-list-rows: container: args: @@ -3684,7 +3647,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3709,7 +3672,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-list-rows-2: container: args: @@ -3722,7 +3685,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3747,7 +3710,7 @@ deploymentSpec: \ metadata['datasetId'], metadata['tableId']]))\n result = []\n for row\ \ in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n\ \ return result\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-query-job: container: args: @@ -3774,7 +3737,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-query-job-2: container: args: @@ -3801,7 +3764,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-query-job-3: container: args: @@ -3828,7 +3791,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-query-job-4: container: args: @@ -3855,7 +3818,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-query-job-5: container: args: @@ -3882,7 +3845,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-build-job-configuration-query: container: args: @@ -3895,7 +3858,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3916,7 +3879,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-2: container: args: @@ -3929,7 +3892,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3950,7 +3913,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-3: container: args: @@ -3963,7 +3926,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -3984,7 +3947,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-4: container: args: @@ -3997,7 +3960,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4018,7 +3981,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-5: container: args: @@ -4031,7 +3994,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4052,7 +4015,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-6: container: args: @@ -4065,7 +4028,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4086,7 +4049,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-serialized-query-parameters: container: args: @@ -4099,7 +4062,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4163,7 +4126,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-serialized-query-parameters-2: container: args: @@ -4176,7 +4139,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4240,7 +4203,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-serialized-query-parameters-3: container: args: @@ -4253,7 +4216,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4317,7 +4280,7 @@ deploymentSpec: \ 'name': 'start_time',\n 'parameterType': {\n 'type':\ \ 'TIMESTAMP'\n },\n 'parameterValue': {\n 'value': start_time\n\ \ },\n })\n return query_parameters\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-cond: container: args: @@ -4330,7 +4293,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4345,7 +4308,7 @@ deploymentSpec: \ *\n\ndef cond(predicate: bool, true_str: str, false_str: str) -> str:\n\ \ \"\"\"Returns true_str if predicate is true, else false_str.\"\"\"\n\ \ return true_str if predicate else false_str\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-create-metrics-artifact: container: args: @@ -4358,7 +4321,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4377,7 +4340,7 @@ deploymentSpec: \ 'MAPE': 'meanAbsolutePercentageError',\n }\n metrics = {metric_name_map[k]:\ \ v for k, v in dict(metrics_rows[0]).items()}\n evaluation_metrics.metadata\ \ = metrics\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-feature-transform-engine: container: args: @@ -4462,8 +4425,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -4479,15 +4442,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-get-fte-suffix: container: args: @@ -4500,7 +4456,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4521,7 +4477,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-table-location: container: args: @@ -4534,7 +4490,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4557,7 +4513,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-value: container: args: @@ -4570,7 +4526,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4584,7 +4540,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef get_value(d: Dict[str, str], key: str) -> str:\n return d[key]\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-window-query-priority: container: args: @@ -4597,7 +4553,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4614,7 +4570,7 @@ deploymentSpec: \ depending on the window number.\"\"\"\n if int(window['window_number'])\ \ <= max_interactive:\n return 'INTERACTIVE'\n else:\n return 'BATCH'\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-maybe-replace-with-default: container: args: @@ -4627,7 +4583,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4642,7 +4598,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-query-with-retry: container: args: @@ -4655,7 +4611,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4696,7 +4652,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-query-with-retry-2: container: args: @@ -4709,7 +4665,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4750,7 +4706,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-query-with-retry-3: container: args: @@ -4763,7 +4719,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4804,7 +4760,7 @@ deploymentSpec: \ 'Query failed with %s. Retrying after %d seconds.', e, wait_time)\n\ \ time.sleep(wait_time)\n retry_count += 1\n return destination_uri\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri: container: args: @@ -4817,7 +4773,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4840,7 +4796,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -4853,7 +4809,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4876,7 +4832,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-validate-inputs: container: args: @@ -4889,7 +4845,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -4978,7 +4934,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-wrapped-in-list: container: args: @@ -4991,7 +4947,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -5005,7 +4961,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ \ in a list.\"\"\"\n return [value]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 pipelineInfo: description: Trains a BQML ARIMA_PLUS model. name: automl-tabular-bqml-arima-train @@ -5031,7 +4987,7 @@ root: constant: tmp_{{$.pipeline_job_uuid}} delete_contents: runtimeValue: - constant: 1.0 + constant: true project: componentInputParameter: project taskInfo: @@ -5237,4 +5193,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml index 896d227869..83e39834c4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_predict_pipeline.yaml @@ -104,7 +104,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -208,7 +211,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -858,7 +864,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -901,7 +907,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -909,25 +915,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -953,7 +959,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -969,9 +975,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1030,47 +1036,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -1105,11 +1115,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -1118,38 +1128,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -1160,14 +1175,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -1187,9 +1202,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -1227,7 +1240,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -1235,7 +1248,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -1246,13 +1259,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -1420,7 +1439,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1442,7 +1461,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -1455,7 +1474,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1476,7 +1495,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-query-job: container: args: @@ -1503,7 +1522,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-bigquery-query-job-2: container: args: @@ -1530,7 +1549,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-build-job-configuration-query: container: args: @@ -1543,7 +1562,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1564,7 +1583,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-build-job-configuration-query-2: container: args: @@ -1577,7 +1596,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1598,7 +1617,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-first-valid: container: args: @@ -1611,7 +1630,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1628,7 +1647,7 @@ deploymentSpec: \ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ \n for value in json.loads(values):\n if value:\n return value\n\ \ raise ValueError('No valid values.')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-table-location: container: args: @@ -1641,7 +1660,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1664,7 +1683,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-table-location-2: container: args: @@ -1677,7 +1696,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1700,7 +1719,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-load-table-from-uri: container: args: @@ -1713,7 +1732,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1741,7 +1760,7 @@ deploymentSpec: \ source_format=source_format)\n client.load_table_from_uri(\n source_uris=csv_list,\n\ \ destination=destination,\n project=project,\n location=location,\n\ \ job_config=job_config).result()\n return destination\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-make-vertex-model-artifact: container: args: @@ -1754,7 +1773,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1771,7 +1790,7 @@ deploymentSpec: Creates a google.VertexModel artifact.\"\"\"\n vertex_model.metadata =\ \ {'resourceName': model_resource_name}\n vertex_model.uri = (f'https://{location}-aiplatform.googleapis.com'\n\ \ f'/v1/{model_resource_name}')\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-maybe-replace-with-default: container: args: @@ -1784,7 +1803,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1799,7 +1818,7 @@ deploymentSpec: \ *\n\ndef maybe_replace_with_default(value: str, default: str = '') ->\ \ str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\ \n return default if not value else value\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-model-batch-predict: container: args: @@ -1848,7 +1867,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-table-to-uri: container: args: @@ -1861,7 +1880,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1884,7 +1903,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-table-to-uri-2: container: args: @@ -1897,7 +1916,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -1920,7 +1939,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-validate-inputs: container: args: @@ -1933,7 +1952,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2022,7 +2041,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 pipelineInfo: description: Creates a batch prediction using a Prophet model. name: prophet-predict @@ -2042,7 +2061,7 @@ root: constant: tmp_{{$.pipeline_job_uuid}} delete_contents: runtimeValue: - constant: 1.0 + constant: true project: componentInputParameter: project taskInfo: @@ -2147,4 +2166,4 @@ root: series.' parameterType: STRING schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py index 3844900ff8..c79a97c815 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """Prophet trainer component spec.""" from typing import Optional - from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel from kfp import dsl from kfp.dsl import Artifact @@ -108,17 +108,17 @@ def prophet_trainer( '"machine_spec": {"machine_type": "n1-standard-4"}, ', ( '"container_spec":' - ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", ' + ' {"image_uri":"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", ' ), '"args": ["prophet_trainer", "', ( f'--job_name=dataflow-{dsl.PIPELINE_JOB_NAME_PLACEHOLDER}", "' ), ( - '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", "' + '--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "' ), ( - '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230910_1325", "' + '--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20231002_0125", "' ), '--artifacts_dir=', root_dir, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml index 89d1ddb5f3..e0e311d4ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/prophet_trainer_pipeline.yaml @@ -123,7 +123,10 @@ components: isOptional: true parameterType: STRING project: - description: Project to run the BigQuery query job. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run the BigQuery query job. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING query: defaultValue: '' @@ -211,7 +214,7 @@ components: predictions_gcs_source: componentInputArtifact: pipelinechannel--prophet-trainer-evaluated_examples_directory parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -442,7 +445,7 @@ components: parameters: autodetect_csv_schema: runtimeValue: - constant: 1.0 + constant: true bigquery_staging_full_dataset_id: runtimeValue: constant: '{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-project_id'']}}.{{$.inputs.parameters[''pipelinechannel--bigquery-create-dataset-dataset_id'']}}' @@ -452,7 +455,7 @@ components: componentInputParameter: pipelinechannel--data_source_csv_filenames forecasting_apply_windowing: runtimeValue: - constant: 0.0 + constant: false forecasting_context_window: runtimeValue: constant: 0.0 @@ -935,61 +938,6 @@ components: \ output_column: Name of our output feature." isOptional: true parameterType: LIST - embedding_batch_prediction_accelerator_count: - defaultValue: -1.0 - description: 'The number of accelerators to - - use to generate the embeddings. Default is 0.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_accelerator_type: - defaultValue: accelerator_type_unspecified - description: 'The accelerator type to use to - - generate embeddings. If not provided, no accelerator is used. More - - details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype' - isOptional: true - parameterType: STRING - embedding_batch_prediction_batch_size: - defaultValue: -1.0 - description: 'The batch size for embedding batch - - prediction job. Default = 1024.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_machine_type: - defaultValue: '' - description: 'The machine type to be - - used to run the embedding batch prediction job. If not provided, - - `n1-highmem-32` will be used. For more details, see: - - https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types' - isOptional: true - parameterType: STRING - embedding_batch_prediction_max_replica_count: - defaultValue: -1.0 - description: 'The max replica count for - - embedding batch prediction job. Default = 50.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_batch_prediction_starting_replica_count: - defaultValue: -1.0 - description: 'The starting replica count - - for embedding batch prediction job. Default = 20.' - isOptional: true - parameterType: NUMBER_INTEGER - embedding_prediction_server_docker_uri: - defaultValue: '' - description: 'The docker image inside which to - - run the embedding models to generate embeddings.' - isOptional: true - parameterType: STRING encryption_spec_key_name: defaultValue: '' description: Customer-managed encryption key. @@ -1678,11 +1626,12 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'The managed Vertex Model used for + description: 'The Vertex model used for evaluation. Must be located in the + same - predictions job, if using Vertex batch prediction. Must share the same + region as the location argument. It is used to set the default - location as the provided input argument `location`.' + configurations for AutoML and custom-trained models.' isOptional: true predictions_bigquery_source: artifactType: @@ -1709,36 +1658,36 @@ components: should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 description: 'The disk size (in GB) of the machine - executing the evaluation run. If not set, defaulted to `50`.' + executing the evaluation run.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 description: 'The machine type executing the - evaluation run. If not set, defaulted to `n1-standard-4`.' + evaluation run.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 description: 'The max number of workers - executing the evaluation run. If not set, defaulted to `25`.' + executing the evaluation run.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' description: 'Service account to run the - dataflow job. If not set, dataflow will use the default worker service + Dataflow job. If not set, Dataflow will use the default worker service account. For more details, see - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' + https://cloud.google.com/dataflow/docs/concepts/secURIty-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: @@ -1763,28 +1712,36 @@ components: defaultValue: 1.0 description: 'The number of workers executing the - evaluation run. If not set, defaulted to `10`.' + evaluation run.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: Customer-managed encryption key. + description: ' Customer-managed encryption key options. + + If set, resources created by this pipeline will be encrypted with the + + provided encryption key. Has the form: + + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. + + The key needs to be in the same region as where the compute resource is + + created.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of + force_runner_mode: + defaultValue: '' + description: 'Flag to choose Beam runner. Valid options are - launching a Dataflow job.' + `DirectRunner` and `Dataflow`.' isOptional: true - parameterType: BOOLEAN + parameterType: STRING ground_truth_bigquery_source: defaultValue: '' description: 'Required for custom tabular. - The BigQuery table uri representing where the ground truth is located. + The BigQuery table URI representing where the ground truth is located. Used to provide ground truth for each prediction instance when they are @@ -1797,16 +1754,14 @@ components: tabular data. The file format for the ground truth files. `jsonl`, - `csv`, and `bigquery` are the allowed formats. If not set, defaulted to - - `jsonl`.' + `csv`, and `bigquery` are the allowed formats.' isOptional: true parameterType: STRING ground_truth_gcs_source: defaultValue: [] description: 'Required for custom - tabular and non tabular data. The GCS uris representing where the ground + tabular and non tabular data. The GCS URIs representing where the ground truth is located. Used to provide ground truth for each prediction @@ -1817,20 +1772,16 @@ components: parameterType: LIST location: defaultValue: us-central1 - description: 'Location for running the evaluation. If not set, - - defaulted to `us-central1`.' + description: Location for running the evaluation. isOptional: true parameterType: STRING prediction_score_column: - defaultValue: '' + defaultValue: prediction.value description: 'The column name of the field containing batch prediction scores. Formatted to be able to find nested - columns, delimited by `.`. If not set, defaulted to `prediction.scores` - - for classification.' + columns, delimited by `.`.' isOptional: true parameterType: STRING predictions_format: @@ -1839,20 +1790,21 @@ components: prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' + formats, from Vertex Batch Prediction.' isOptional: true parameterType: STRING project: - description: Project to run evaluation container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to run evaluation container. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING target_field_name: - description: 'The full name path of the features target field + description: 'The target field''s name. Formatted to be able to find - in the predictions file. Formatted to be able to find nested columns, + nested columns, delimited by `.`. Prefixed with ''instance.'' on the - delimited by `.`. Alternatively referred to as the ground truth (or - - ground_truth_column) field.' + component for Vertex Batch Prediction.' parameterType: STRING outputDefinitions: artifacts: @@ -1860,12 +1812,12 @@ components: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'google.ClassificationMetrics representing the classification + description: '`google.RegressionMetrics` representing the regression evaluation metrics in GCS.' parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the dataflow + description: 'Serialized gcp_resources proto tracking the Dataflow job. For more details, see @@ -1879,28 +1831,25 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: 'An artifact of a model - - which to upload a new version to. Only specify this field when - - uploading a new version.' + description: An artifact of a model which to upload a new version to. Only + specify this field when uploading a new version. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models/upload#request-body) isOptional: true unmanaged_container_model: artifactType: schemaTitle: google.UnmanagedContainerModel schemaVersion: 0.0.1 - description: "The unmanaged container model to be uploaded. The model can\n\ - be passed from an upstream step, or imported via an importer.\n\nExamples::\n\ - \n from kfp.dsl import importer\n from\n google_cloud_pipeline_components.google_cloud_pipeline_components.types\n\ - \ import artifact_types\n\n importer_spec = importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ - \ artifact_class=artifact_types.UnmanagedContainerModel, metadata={\n\ - \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ - \ }\n })" + description: "The unmanaged container model to be uploaded. The Model can\ + \ be passed from an upstream step or imported via a KFP `dsl.importer`.\n\ + :Examples:\n ::\n\n from kfp import dsl\n from google_cloud_pipeline_components.google_cloud_pipeline_components.types\ + \ import artifact_types\n\n importer_spec = dsl.importer(\n artifact_uri='gs://managed-pipeline-gcpc-e2e-test/automl-tabular/model',\n\ + \ artifact_class=artifact_types.UnmanagedContainerModel,\n metadata={\n\ + \ 'containerSpec': { 'imageUri':\n 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod'\n\ + \ }\n })" isOptional: true parameters: description: defaultValue: '' - description: The description of the model. + description: The description of the Model. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model) isOptional: true parameterType: STRING display_name: @@ -1908,7 +1857,7 @@ components: can be up to 128 characters long and can be consist of any UTF-8 - characters.' + characters. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models#Model)' parameterType: STRING encryption_spec_key_name: defaultValue: '' @@ -1918,7 +1867,7 @@ components: Model will be secured by this key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -1929,22 +1878,15 @@ components: defaultValue: {} description: 'Metadata describing the Model''s - input and output for explanation. Both `explanation_metadata` and - - `explanation_parameters` must be passed together when used. For more - - details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + input and output for explanation. Both `explanation_metadata` and `explanation_parameters` + must be passed together when used. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata)' isOptional: true parameterType: STRUCT explanation_parameters: defaultValue: {} description: 'Parameters to configure - explaining for Model''s predictions. For more details, see - - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' + explaining for Model''s predictions. [More information.](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters)' isOptional: true parameterType: STRUCT labels: @@ -1964,13 +1906,16 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Optional location to upload this model to. If + description: 'Optional location to upload this Model to. If - not set, default to us-central1.' + not set, defaults to `us-central1`.' isOptional: true parameterType: STRING project: - description: Project to upload this model to. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to upload this Model to. Defaults to the project in + which the PipelineJob is run. + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -1978,15 +1923,11 @@ components: artifactType: schemaTitle: google.VertexModel schemaVersion: 0.0.1 - description: Artifact tracking the created model. + description: Artifact tracking the created Model. parameters: gcp_resources: - description: 'Serialized gcp_resources proto tracking the upload model''s - long - - running operation. For more details, see - - https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' + description: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) + which tracks the upload Model's long-running operation. parameterType: STRING comp-prophet-trainer: executorLabel: exec-prophet-trainer @@ -2231,7 +2172,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2253,7 +2194,7 @@ deploymentSpec: \ = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return\ \ collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n \ \ ref.project, ref.dataset_id)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-delete-dataset-with-prefix: container: args: @@ -2266,7 +2207,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2287,7 +2228,7 @@ deploymentSpec: \ if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n\ \ dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-bigquery-query-job: container: args: @@ -2314,7 +2255,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.bigquery.query_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-build-job-configuration-query: container: args: @@ -2327,7 +2268,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2348,7 +2289,7 @@ deploymentSpec: \ 'datasetId': dataset_id,\n 'tableId': table_id,\n }\n\ \ if write_disposition:\n config['write_disposition'] = write_disposition\n\ \ return config\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-feature-transform-engine: container: args: @@ -2433,8 +2374,8 @@ deploymentSpec: "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp"]}' - '{"Concat": ["--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}"]}' - '{"Concat": ["--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}"]}' - - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325 - - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + - --dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125 + - --feature_transform_engine_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 - '{"Concat": ["--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}"]}' - '{"Concat": ["--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}"]}' - '{"Concat": ["--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}"]}' @@ -2450,15 +2391,8 @@ deploymentSpec: ["--temporal_total_weight=", "{{$.inputs.parameters[''temporal_total_weight'']}}"]}}}' - '{"IfPresent": {"InputName": "group_temporal_total_weight", "Then": {"Concat": ["--group_temporal_total_weight=", "{{$.inputs.parameters[''group_temporal_total_weight'']}}"]}}}' - - '{"Concat": ["--embedding_prediction_server_docker_uri=", "{{$.inputs.parameters[''embedding_prediction_server_docker_uri'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_machine_type=", "{{$.inputs.parameters[''embedding_batch_prediction_machine_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_type=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_type'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_accelerator_count=", "{{$.inputs.parameters[''embedding_batch_prediction_accelerator_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_starting_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_starting_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_max_replica_count=", "{{$.inputs.parameters[''embedding_batch_prediction_max_replica_count'']}}"]}' - - '{"Concat": ["--embedding_batch_prediction_batch_size=", "{{$.inputs.parameters[''embedding_batch_prediction_batch_size'']}}"]}' - '{"Concat": ["--encryption_spec_key_name=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}"]}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125 exec-get-fte-suffix: container: args: @@ -2471,7 +2405,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2492,7 +2426,7 @@ deploymentSpec: \ table.table_id.startswith(fte_table):\n return table.table_id[len(fte_table)\ \ + 1:]\n raise ValueError(\n f'No FTE output tables found in {bigquery_staging_full_dataset_id}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-get-table-location: container: args: @@ -2505,7 +2439,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2528,7 +2462,7 @@ deploymentSpec: \ if table.startswith('bq://'):\n table = table[len('bq://'):]\n elif\ \ table.startswith('bigquery://'):\n table = table[len('bigquery://'):]\n\ \ return client.get_table(table).location\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-model-evaluation-regression: container: args: @@ -2568,7 +2502,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -2581,8 +2515,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --output_metrics_gcs_path - '{{$.outputs.artifacts[''evaluation_metrics''].path}}' - --gcp_resources @@ -2592,7 +2526,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-model-upload: container: args: @@ -2604,7 +2538,9 @@ deploymentSpec: "\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", ", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", "}", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", - "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' + "\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"pipeline_job\": + \"", "projects/{{$.inputs.parameters[''project'']}}/locations/{{$.inputs.parameters[''location'']}}/pipelineJobs/{{$.pipeline_job_uuid}}", + "\"", "}"]}' - --project - '{{$.inputs.parameters[''project'']}}' - --location @@ -2613,14 +2549,14 @@ deploymentSpec: - '{{$.outputs.parameters[''gcp_resources''].output_file}}' - --executor_input - '{{$}}' - - '{"IfPresent": {"InputName": "parent_model", "Then": {"Concat": ["--parent_model_name - ", "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}}' + - '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", + "{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' command: - python3 - -u - -m - google_cloud_pipeline_components.container.v1.model.upload_model.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-prophet-trainer: container: args: @@ -2637,10 +2573,10 @@ deploymentSpec: ", "\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, ", "\"job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"1\", ", "\"machine_spec\": {\"machine_type\": \"n1-standard-4\"}, ", "\"container_spec\": - {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325\", + {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125\", ", "\"args\": [\"prophet_trainer\", \"", "--job_name=dataflow-{{$.pipeline_job_name}}\", - \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325\", - \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20230910_1325\", + \"", "--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125\", + \"", "--prediction_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/fte-prediction-server:20231002_0125\", \"", "--artifacts_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/model/\", \"", "--evaluated_examples_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/eval/\", \"", "--region=", "{{$.inputs.parameters[''location'']}}", @@ -2681,7 +2617,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2704,7 +2640,7 @@ deploymentSpec: \ if use_bq_prefix:\n bq_uri = 'bq://' + bq_uri\n outputs.append(bq_uri)\n\ \ return collections.namedtuple(\n 'Outputs',\n ['project_id',\ \ 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-validate-inputs: container: args: @@ -2717,7 +2653,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2806,7 +2742,7 @@ deploymentSpec: \ raise ValueError(\n 'Granularity unit should be one of the\ \ following: '\n f'{valid_data_granularity_units}, got: {data_granularity_unit}.')\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-wrapped-in-list: container: args: @@ -2819,7 +2755,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -2833,7 +2769,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef wrapped_in_list(value: str) -> List[str]:\n \"\"\"Wraps a string\ \ in a list.\"\"\"\n return [value]\n\n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 pipelineInfo: description: Trains one Prophet model per time series. name: prophet-train @@ -2853,7 +2789,7 @@ root: constant: tmp_{{$.pipeline_job_uuid}} delete_contents: runtimeValue: - constant: 1.0 + constant: true project: componentInputParameter: project taskInfo: @@ -3111,4 +3047,4 @@ root: isOptional: true parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py index 0e025ff059..b69d5430a5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/forecasting/utils.py @@ -76,7 +76,8 @@ def get_bqml_arima_train_pipeline_and_parameters( but also the longest training runtime. run_evaluation: Whether to run evaluation steps during training. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -133,7 +134,8 @@ def get_bqml_arima_predict_pipeline_and_parameters( results. This will cause the batch prediction output to include explanations. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -235,7 +237,8 @@ def get_prophet_train_pipeline_and_parameters( addresses. run_evaluation: Whether to run evaluation steps during training. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, @@ -316,7 +319,8 @@ def get_prophet_prediction_pipeline_and_parameters( machine_type: The machine type used for batch prediction. max_num_workers: The max number of workers used for batch prediction. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = { 'project': project, diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py index 0bda943e91..840cd055f5 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """GA AutoML tabular components.""" import os diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml index 583bec59ef..421cda69d0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/automl_tabular_pipeline.yaml @@ -1389,7 +1389,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 1.0 + constant: true run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -1662,7 +1662,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -1683,6 +1683,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -1709,7 +1711,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -2130,7 +2132,7 @@ components: componentInputParameter: pipelinechannel--fast_testing is_skip_architecture_search: runtimeValue: - constant: 0.0 + constant: false run_distillation: componentInputParameter: pipelinechannel--run_distillation stage_1_num_parallel_trials: @@ -2709,7 +2711,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-2 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -2730,6 +2732,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -2756,7 +2760,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -3089,7 +3093,7 @@ components: componentInputParameter: pipelinechannel--root_dir run_distillation: runtimeValue: - constant: 1.0 + constant: true single_run_max_secs: componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs worker_pool_specs_override_json: @@ -3529,7 +3533,7 @@ components: outputArtifactKey: gcs_output_directory producerTask: model-batch-explanation-3 parameters: - dataflow_disk_size: + dataflow_disk_size_gb: componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb dataflow_machine_type: componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type @@ -3550,6 +3554,8 @@ components: predictions_format: runtimeValue: constant: jsonl + problem_type: + componentInputParameter: pipelinechannel--prediction_type project: componentInputParameter: pipelinechannel--project taskInfo: @@ -3576,7 +3582,7 @@ components: componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json generate_explanation: runtimeValue: - constant: 1.0 + constant: true instances_format: runtimeValue: constant: tf-record @@ -4401,118 +4407,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4536,118 +4486,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -4671,118 +4565,62 @@ components: artifactType: schemaTitle: google.BQTable schemaVersion: 0.0.1 - description: 'BigQuery table - - with prediction or explanation data to be used for this evaluation. For - - prediction results, the table column should be named "predicted_*".' isOptional: true predictions_gcs_source: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 - description: 'An artifact with its - - URI pointing toward a GCS directory with prediction or explanation files - - to be used for this evaluation. For prediction results, the files should - - be named "prediction.results-*" or "predictions_". For explanation - - results, the files should be named "explanation.results-*".' isOptional: true parameters: - dataflow_disk_size: + dataflow_disk_size_gb: defaultValue: 50.0 - description: 'The disk size (in GB) of the machine - - executing the evaluation run. If not set, defaulted to `50`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_machine_type: defaultValue: n1-standard-4 - description: 'The machine type executing the - - evaluation run. If not set, defaulted to `n1-standard-4`.' isOptional: true parameterType: STRING dataflow_max_workers_num: defaultValue: 5.0 - description: 'The max number of workers - - executing the evaluation run. If not set, defaulted to `25`.' isOptional: true parameterType: NUMBER_INTEGER dataflow_service_account: defaultValue: '' - description: 'Service account to run the - - dataflow job. If not set, dataflow will use the default worker service - - account. For more details, see - - https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' isOptional: true parameterType: STRING dataflow_subnetwork: defaultValue: '' - description: 'Dataflow''s fully qualified subnetwork - - name, when empty the default subnetwork will be used. More details: - - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' isOptional: true parameterType: STRING dataflow_use_public_ips: defaultValue: true - description: 'Specifies whether Dataflow - - workers use public IP addresses.' isOptional: true parameterType: BOOLEAN dataflow_workers_num: defaultValue: 1.0 - description: 'The number of workers executing the - - evaluation run. If not set, defaulted to `10`.' isOptional: true parameterType: NUMBER_INTEGER encryption_spec_key_name: defaultValue: '' - description: 'Customer-managed encryption key - - for the Dataflow job. If this is set, then all resources created by the - - Dataflow job will be encrypted with the provided encryption key.' isOptional: true parameterType: STRING - force_direct_runner: - defaultValue: false - description: 'Flag to use Beam DirectRunner. If set to true, - - use Apache Beam DirectRunner to execute the task locally instead of - - launching a Dataflow job.' + force_runner_mode: + defaultValue: '' isOptional: true - parameterType: BOOLEAN + parameterType: STRING location: defaultValue: us-central1 - description: 'Location running feature attribution. If not - - set, defaulted to `us-central1`.' isOptional: true parameterType: STRING predictions_format: defaultValue: jsonl - description: 'The file format for the batch - - prediction results. `jsonl`, `csv`, and `bigquery` are the allowed - - formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' isOptional: true parameterType: STRING + problem_type: + parameterType: STRING project: - description: Project to run feature attribution container. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + isOptional: true parameterType: STRING outputDefinitions: artifacts: @@ -5181,7 +5019,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -5224,7 +5062,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5232,25 +5070,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5276,7 +5114,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5292,9 +5130,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5353,47 +5191,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -5428,11 +5270,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5441,38 +5283,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -5483,14 +5330,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -5510,9 +5357,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -5550,7 +5395,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -5558,7 +5403,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -5569,13 +5414,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -5643,7 +5494,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -5686,7 +5537,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -5694,25 +5545,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -5738,7 +5589,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -5754,9 +5605,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5815,47 +5666,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -5890,11 +5745,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -5903,38 +5758,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -5945,14 +5805,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -5972,9 +5832,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6012,7 +5870,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6020,7 +5878,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6031,13 +5889,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -6105,7 +5969,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6148,7 +6012,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6156,25 +6020,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6200,7 +6064,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6216,9 +6080,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6277,47 +6141,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6352,11 +6220,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6365,38 +6233,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6407,14 +6280,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -6434,9 +6307,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6474,7 +6345,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6482,7 +6353,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6493,13 +6364,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -6567,7 +6444,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -6610,7 +6487,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -6618,25 +6495,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -6662,7 +6539,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -6678,9 +6555,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6739,47 +6616,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -6814,11 +6695,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -6827,38 +6708,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -6869,14 +6755,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -6896,9 +6782,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -6936,7 +6820,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -6944,7 +6828,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -6955,13 +6839,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7029,7 +6919,7 @@ components: deployments of the Model and their resources. Either this or - unmanaged_container_model must be specified.' + `unmanaged_container_model` must be specified.' isOptional: true unmanaged_container_model: artifactType: @@ -7072,7 +6962,7 @@ components: the given project a new dataset is created with name - ``prediction__`` where is made + `prediction__` where is made BigQuery-dataset-name compatible (for example, most special characters @@ -7080,25 +6970,25 @@ components: "based on ISO-8601" format. In the dataset two tables will be created, - ``predictions``, and ``errors``. If the Model has both ``instance`` + `predictions`, and `errors`. If the Model has both `instance` - and ``prediction`` schemata defined then the tables have columns as + and `prediction` schemata defined then the tables have columns as - follows: The ``predictions`` table contains instances for which the + follows: The `predictions` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the - Model''s instance and prediction schemata. The ``errors`` table + Model''s instance and prediction schemata. The `errors` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" - column, which as values has ```google.rpc.Status`` `__ + column, which as values has [google.rpc.Status](Status) - represented as a STRUCT, and containing only ``code`` and + represented as a STRUCT, and containing only `code` and - ``message``. For more details about this output config, see + `message`. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true @@ -7124,7 +7014,7 @@ components: provided encryption key. Has the form: - ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. + `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`. The key needs to be in the same region as where the compute resource @@ -7140,9 +7030,9 @@ components: Excluded will be attached to the batch prediction output if - [key_field][] is not specified. + key_field is not specified. - When excluded_fields is populated, [included_fields][] must be empty. + When `excluded_fields` is populated, `included_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7201,47 +7091,51 @@ components: to. In the given directory a new directory is created. Its name is - ``prediction--``, where timestamp + `prediction--`, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files - ``predictions_0001.``, ``predictions_0002.``, + `predictions_0001.`, `predictions_0002.`, - ..., ``predictions_N.`` are created where ```` + ..., `predictions_N.` are created where `` - depends on chosen ``predictions_format``, and N may equal 0001 and + depends on chosen `predictions_format`, and N may equal 0001 and depends on the total number of successfully predicted instances. If - the Model has both ``instance`` and ``prediction`` schemata defined + the Model has both `instance` and `prediction` schemata defined then each such file contains predictions as per the - ``predictions_format``. If prediction for any instance failed + `predictions_format`. If prediction for any instance failed (partially or completely), then an additional - ``errors_0001.``, ``errors_0002.``,..., + `errors_0001.`, `errors_0002.`,..., - ``errors_N.`` files are created (N depends on total number + `errors_N.` files are created (N depends on total number of failed predictions). These files contain the failed instances, as - per their schema, followed by an additional ``error`` field which as + per their schema, followed by an additional `error` field which as - value has ``google.rpc.Status`` containing only ``code`` and + value has `google.rpc.Status` containing only `code` and - ``message`` fields. For more details about this output config, see + `message` fields. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' isOptional: true parameterType: STRING gcs_source_uris: defaultValue: [] - description: "Google Cloud Storage URI(-s) to your instances to run batch\ - \ prediction\non. They must match `instances_format`. May contain wildcards.\ - \ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ - For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'Google Cloud Storage URI(-s) to your instances to run batch + prediction + + on. They must match `instances_format`. May contain wildcards. For more + + information on wildcards, see [WildcardNames](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames). + + For more details about this input config, see [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).' isOptional: true parameterType: LIST generate_explanation: @@ -7276,11 +7170,11 @@ components: sent to the Model. - If [instance_type][] is `array`, the order of field names in + If `instance_type` is `array`, the order of field names in - included_fields also determines the order of the values in the array. + `included_fields` also determines the order of the values in the array. - When included_fields is populated, [excluded_fields][] must be empty. + When `included_fields` is populated, `excluded_fields` must be empty. The input must be JSONL with objects at each line, CSV, BigQuery @@ -7289,38 +7183,43 @@ components: parameterType: LIST instance_type: defaultValue: '' - description: "The format of the instance that the Model accepts. Vertex\ - \ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ - to the specified format.\nSupported values are:\n** `object`: Each input\ - \ is converted to JSON object format.\n* For `bigquery`, each row is converted\ - \ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ - \ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ - ** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ - \ each row is converted to an array. The order\n of columns is determined\ - \ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ - \ [included_fields][] must be populated for specifying field orders.\n\ - * For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ - \ must be populated for specifying field orders.\n* Does not apply to\ - \ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ - \ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ - \ and `csv`, the behavior is the same as `array`. The\n order of columns\ - \ is the same as defined in the file or table, unless\n [included_fields][]\ - \ is populated.\n * For `jsonl`, the prediction instance format is determined\ - \ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ - \ each record will be converted to\n an object in the format of `{\"\ - b64\": }`, where `` is\n the Base64-encoded string of\ - \ the content of the record.\n * For `file-list`, each file in the list\ - \ will be converted to an\n object in the format of `{\"b64\": }`,\ + description: "The format of the instance that the Model\naccepts. Vertex\ + \ AI will convert compatible\n[InstancesFormat](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\n\ + to the specified format. Supported values are:\n`object`: Each input is\ + \ converted to JSON object format.\n * For `bigquery`, each row is converted\ + \ to an object.\n * For `jsonl`, each line of the JSONL input must be\ + \ an object.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\ + \ `tf-record-gzip`.\n`array`: Each input is converted to JSON array format.\n\ + \ * For `bigquery`, each row is converted to an array. The order\n \ + \ of columns is determined by the BigQuery column order, unless\n \ + \ [included_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig)\ + \ is populated.\n `included_fields` must be populated for specifying\ + \ field orders.\n * For `jsonl`, if each line of the JSONL input is an\ + \ object,\n `included_fields` must be populated for specifying field\ + \ orders.\n * Does not apply to `csv`, `file-list`, `tf-record`, or\n\ + \ `tf-record-gzip`.\nIf not specified, Vertex AI converts the batch\ + \ prediction input as\nfollows:\n * For `bigquery` and `csv`, the behavior\ + \ is the same as `array`. The\n order of columns is the same as defined\ + \ in the file or table, unless\n included_fields is populated.\n * For\ + \ `jsonl`, the prediction instance format is determined by\n each line\ + \ of the input.\n * For `tf-record`/`tf-record-gzip`, each record will\ + \ be converted to\n an object in the format of `{\"b64\": }`,\ \ where `` is\n the Base64-encoded string of the content of the\ - \ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ - \ Base64 is not for this field. --)" + \ record.\n * For `file-list`, each file in the list will be converted\ + \ to an\n object in the format of `{\"b64\": }`, where ``\ + \ is\n the Base64-encoded string of the content of the file." isOptional: true parameterType: STRING instances_format: defaultValue: jsonl - description: "The format in which instances are\ngiven, must be one of the\ - \ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ - . For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." + description: 'The format in which instances are + + given, must be one of the [Model](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.models)''s + supportedInputStorageFormats. + + For more details about this input config, see + + [InputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.)' isOptional: true parameterType: STRING job_display_name: @@ -7331,14 +7230,14 @@ components: description: "The name of the field that is considered as a key.\nThe values\ \ identified by the key field is not included in the\ntransformed instances\ \ that is sent to the Model. This is similar to\nspecifying this name\ - \ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ - \ output will not include the instances. Instead the\noutput will only\ - \ include the value of the key field, in a field named\n`key` in the output:\n\ - \ * For `jsonl` output format, the output will have a `key` field\n \ - \ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ - \ the output will have have a `key`\n column instead of the instance\ - \ feature columns.\nThe input must be JSONL with objects at each line,\ - \ CSV, BigQuery\nor TfRecord." + \ of the field in [excluded_fields](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig).\ + \ In addition,\nthe batch prediction output will not include the instances.\ + \ Instead the\noutput will only include the value of the key field, in\ + \ a field named\n`key` in the output:\n * For `jsonl` output format, the\ + \ output will have a `key` field\n instead of the `instance` field.\n\ + \ * For `csv`/`bigquery` output format, the output will have have a `key`\n\ + \ column instead of the instance feature columns.\nThe input must be\ + \ JSONL with objects at each line, CSV, BigQuery\nor TfRecord." isOptional: true parameterType: STRING labels: @@ -7358,9 +7257,7 @@ components: parameterType: STRUCT location: defaultValue: us-central1 - description: 'Location for creating the BatchPredictionJob. - - If not set, default to us-central1.' + description: Location for creating the BatchPredictionJob. isOptional: true parameterType: STRING machine_type: @@ -7398,7 +7295,7 @@ components: batch not fitting in a machine''s memory, and the whole operation will - fail. The default value is 4.' + fail.' isOptional: true parameterType: NUMBER_INTEGER max_replica_count: @@ -7406,7 +7303,7 @@ components: description: 'The maximum number of machine replicas the batch operation may be scaled - to. Only used if `machine_type` is set. Default is 10.' + to. Only used if `machine_type` is set.' isOptional: true parameterType: NUMBER_INTEGER model_parameters: @@ -7417,13 +7314,19 @@ components: parameterType: STRUCT predictions_format: defaultValue: jsonl - description: "The format in which Vertex AI gives the predictions. Must\ - \ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ - \ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." + description: 'The format in which Vertex AI gives the predictions. Must + be one of the + + Model''s supportedOutputStorageFormats. + + For more details about this output config, see [OutputConfig](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig).' isOptional: true parameterType: STRING project: - description: Project to create the BatchPredictionJob. + defaultValue: '{{$.pipeline_google_cloud_project_id}}' + description: Project to create the BatchPredictionJob. Defaults to the project + in which the PipelineJob is run. + isOptional: true parameterType: STRING starting_replica_count: defaultValue: 0.0 @@ -7742,9 +7645,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from - classification evaluation component.' + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the + + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -7766,9 +7677,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -7788,24 +7699,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -7828,7 +7754,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -7837,6 +7765,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-2: @@ -7847,9 +7777,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -7871,9 +7809,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -7893,24 +7831,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -7933,7 +7886,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -7942,6 +7897,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-evaluation-import-3: @@ -7952,9 +7909,17 @@ components: artifactType: schemaTitle: google.ClassificationMetrics schemaVersion: 0.0.1 - description: 'Path of classification metrics generated from the + description: 'google.ClassificationMetrics artifact generated from + + the ModelEvaluationClassificationOp component.' + isOptional: true + embedding_metrics: + artifactType: + schemaTitle: system.Metrics + schemaVersion: 0.0.1 + description: 'The embedding metrics artifact generated from the - classification evaluation component.' + embedding retrieval metrics component.' isOptional: true explanation: artifactType: @@ -7976,9 +7941,9 @@ components: artifactType: schemaTitle: google.ForecastingMetrics schemaVersion: 0.0.1 - description: 'Path of forecasting metrics generated from the + description: 'google.ForecastingMetrics artifact generated from - forecasting evaluation component.' + the ModelEvaluationForecastingOp component.' isOptional: true metrics: artifactType: @@ -7998,24 +7963,39 @@ components: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.QuestionAnsweringMetrics.' isOptional: true regression_metrics: artifactType: schemaTitle: google.RegressionMetrics schemaVersion: 0.0.1 - description: 'Path of regression metrics generated from the regression + description: 'google.ClassificationMetrics artifact generated from - evaluation component.' + the ModelEvaluationRegressionOp component.' isOptional: true summarization_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.SummarizationMetrics.' isOptional: true text_generation_metrics: artifactType: schemaTitle: system.Metrics schemaVersion: 0.0.1 + description: 'system.Metrics artifact generated from + + the LLMEvaluationTextGenerationOp component. Subject to change to + + google.TextGenerationMetrics.' isOptional: true parameters: dataset_path: @@ -8038,7 +8018,9 @@ components: problem_type: description: 'The problem type of the metrics being imported to the - VertexModel. `classification`, `regression`, and `forecasting` are the + VertexModel. `classification`, `regression`, `forecasting`, + + `text-generation`, `question-answering`, and `summarization` are the currently supported problem types. Must be provided when `metrics` is @@ -8047,6 +8029,8 @@ components: parameterType: STRING outputDefinitions: parameters: + evaluation_resource_name: + parameterType: STRING gcp_resources: parameterType: STRING comp-model-upload: @@ -8582,9 +8566,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8625,9 +8609,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", @@ -8668,7 +8652,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8680,7 +8664,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8709,7 +8693,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8721,7 +8705,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8750,7 +8734,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", "\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", \"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", @@ -8762,7 +8746,7 @@ deploymentSpec: "\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", "\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", - \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325", + \"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125", "\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", "{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", "{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", @@ -8791,7 +8775,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' @@ -8806,7 +8790,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8815,7 +8799,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8824,7 +8808,7 @@ deploymentSpec: args: - --executor_input - '{{$}}' - image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125 resources: cpuLimit: 8.0 memoryLimit: 52.0 @@ -8844,9 +8828,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8891,9 +8875,9 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", - "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", + "\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", "\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", "\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", @@ -8938,7 +8922,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -8959,7 +8943,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -8990,7 +8974,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", "{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", @@ -9011,7 +8995,7 @@ deploymentSpec: \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", "\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", @@ -9038,7 +9022,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9066,7 +9050,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9094,7 +9078,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9122,7 +9106,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9220,7 +9204,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9317,6 +9301,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9332,7 +9318,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9345,8 +9331,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9356,7 +9342,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-2: container: args: @@ -9368,6 +9354,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9383,7 +9371,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9396,8 +9384,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9407,7 +9395,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-feature-attribution-3: container: args: @@ -9419,6 +9407,8 @@ deploymentSpec: - '{{$.inputs.parameters[''project'']}}' - --location - '{{$.inputs.parameters[''location'']}}' + - --problem_type + - '{{$.inputs.parameters[''problem_type'']}}' - --root_dir - '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - --batch_prediction_format @@ -9434,7 +9424,7 @@ deploymentSpec: - --dataflow_service_account - '{{$.inputs.parameters[''dataflow_service_account'']}}' - --dataflow_disk_size - - '{{$.inputs.parameters[''dataflow_disk_size'']}}' + - '{{$.inputs.parameters[''dataflow_disk_size_gb'']}}' - --dataflow_machine_type - '{{$.inputs.parameters[''dataflow_machine_type'']}}' - --dataflow_workers_num @@ -9447,8 +9437,8 @@ deploymentSpec: - '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' - --kms_key_name - '{{$.inputs.parameters[''encryption_spec_key_name'']}}' - - --force_direct_runner - - '{{$.inputs.parameters[''force_direct_runner'']}}' + - --force_runner_mode + - '{{$.inputs.parameters[''force_runner_mode'']}}' - --gcs_output_path - '{{$.outputs.artifacts[''feature_attributions''].path}}' - --gcp_resources @@ -9458,7 +9448,7 @@ deploymentSpec: command: - python3 - /main.py - image: gcr.io/ml-pipeline/model-evaluation:v0.9 + image: gcr.io/ml-pipeline/model-evaluation:v0.9.2 exec-importer: importer: artifactUri: @@ -9478,7 +9468,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -9673,7 +9663,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-2: container: args: @@ -9722,7 +9712,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-3: container: args: @@ -9771,7 +9761,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-4: container: args: @@ -9820,7 +9810,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-batch-predict-5: container: args: @@ -9869,7 +9859,7 @@ deploymentSpec: - -u - -m - google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation: container: args: @@ -10085,6 +10075,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10103,12 +10095,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-2: container: args: @@ -10129,6 +10123,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10147,12 +10143,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-evaluation-import-3: container: args: @@ -10173,6 +10171,8 @@ deploymentSpec: "{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", "{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' + - '{"IfPresent": {"InputName": "embedding_metrics", "Then": ["--embedding_metrics", + "{{$.inputs.artifacts[''embedding_metrics''].uri}}"]}}' - '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", "{{$.inputs.parameters[''problem_type'']}}"]}}' - --display_name @@ -10191,12 +10191,14 @@ deploymentSpec: - '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' - --gcp_resources - '{{$.outputs.parameters[''gcp_resources''].output_file}}' + - --evaluation_resource_name + - '{{$.outputs.parameters[''evaluation_resource_name''].output_file}}' command: - python3 - -u - -m - - google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation - image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 + - google_cloud_pipeline_components.container._implementation.model_evaluation.import_model_evaluation + image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.3.1 exec-model-upload: container: args: @@ -10296,7 +10298,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10330,7 +10332,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10364,7 +10366,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10408,7 +10410,7 @@ deploymentSpec: \ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ \ data_source_bigquery_table_path,\n model_display_name,\n )\n\ \n" - image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230910_1325 + image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20231002_0125 exec-string-not-empty: container: args: @@ -10421,7 +10423,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10455,7 +10457,7 @@ deploymentSpec: \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", - "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325", "\", + "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125", "\", \"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": \\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": \\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": @@ -10488,7 +10490,7 @@ deploymentSpec: \"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", - "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325", + "\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125", "\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", "\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", "\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", @@ -10526,7 +10528,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -10562,7 +10564,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-rc.2'\ \ && \"$0\" \"$@\"\n" - sh - -ec @@ -11207,4 +11209,4 @@ root: schemaTitle: system.Metrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 -sdkVersion: kfp-2.0.0-beta.17 +sdkVersion: kfp-2.0.0-rc.2 diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py index ffb9afb4a7..fc415969ff 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/cv_trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Tabular Cross Validation Trainer component spec.""" from typing import Optional @@ -98,11 +99,11 @@ def automl_tabular_cv_trainer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["l2l_cv_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', ( f'", "--component_id={dsl.PIPELINE_TASK_ID_PLACEHOLDER}",' ' "--training_base_dir=' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py index 5bcd942ee9..453050dcb7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/ensemble.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Tabular Ensemble component spec.""" from typing import Optional @@ -105,7 +106,7 @@ def automl_tabular_ensemble( ' 1, "machine_spec": {"machine_type": "n1-highmem-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["ensemble", "--transform_output_path=', transform_output.uri, '", "--model_output_path=', @@ -136,7 +137,7 @@ def automl_tabular_ensemble( '", "--warmup_data=', warmup_data.uri, '", "--prediction_docker_uri=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', '", "--model_path=', model.uri, '", "--custom_model_path=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py index 6bd420b1a0..d272b5be4e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/finalizer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Pipeline Finalizer component spec.""" from typing import Optional @@ -71,7 +72,7 @@ def automl_tabular_finalizer( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["cancel_l2l_tuner", "--error_file_path=', root_dir, ( diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py index fd2fd96b10..ef9c5b38f7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/infra_validator.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Infra Validator component spec.""" from google_cloud_pipeline_components.types.artifact_types import UnmanagedContainerModel @@ -31,7 +32,7 @@ def automl_tabular_infra_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230910_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20231002_0125', command=[], args=['--executor_input', '{{$}}'], ) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py index 8f9b8dca8c..90ab7709f0 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/split_materialized_data.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Split Materialized Data component spec.""" from kfp import dsl @@ -51,7 +52,7 @@ def split_materialized_data( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', command=[ 'sh', '-ec', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py index 1664e75443..21dd5d48f2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stage_1_tuner.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Tabular Stage 1 Tuner component spec.""" from typing import Optional @@ -109,11 +110,11 @@ def automl_tabular_stage_1_tuner( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["l2l_stage_1_tuner", "--transform_output_path=', transform_output.uri, '", "--training_docker_uri=', - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "--feature_selection_result_path=', feature_ranking.uri, '", "--disable_early_stopping=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py index d3794ea3d4..7f1aee2194 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/stats_and_example_gen.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Stats and Example Generation component spec.""" from typing import Optional @@ -138,7 +139,7 @@ def tabular_stats_and_example_gen( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', '", "args": ["stats_generator",', '"--train_spec={\\"prediction_type\\": \\"', prediction_type, @@ -217,7 +218,7 @@ def tabular_stats_and_example_gen( ), dataflow_max_num_workers, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_disk_size_gb=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py index 6ec30c3b44..13f5fda4ee 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/training_configurator_and_validator.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Training Configurator and Validator component spec.""" from typing import Optional @@ -96,7 +97,7 @@ def training_configurator_and_validator( # fmt: on return dsl.ContainerSpec( - image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20230910_1325', + image='us-docker.pkg.dev/vertex-ai/automl-tabular/feature-transform-engine:20231002_0125', command=[], args=[ 'training_configurator_and_validator', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py index 8b1a366956..3565816924 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/transform.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """AutoML Transform component spec.""" from typing import Optional @@ -108,7 +109,7 @@ def automl_tabular_transform( ' 1, "machine_spec": {"machine_type": "n1-standard-8"},' ' "container_spec": {"image_uri":"' ), - 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230910_1325', + 'us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20231002_0125', ( '", "args": ["transform", "--is_mp=true",' ' "--transform_output_artifact_path=' @@ -167,7 +168,7 @@ def automl_tabular_transform( '", "--dataflow_machine_type=', dataflow_machine_type, '", "--dataflow_worker_container_image=', - 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230910_1325', + 'us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20231002_0125', '", "--dataflow_disk_size_gb=', dataflow_disk_size_gb, '", "--dataflow_subnetwork_fully_qualified=', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py index 21221e5bd1..6889db79ae 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/tabular/utils.py @@ -135,10 +135,10 @@ def _get_default_pipeline_params( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -155,7 +155,7 @@ def _get_default_pipeline_params( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -209,7 +209,8 @@ def _get_default_pipeline_params( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ if not study_spec_parameters_override: study_spec_parameters_override = [] @@ -503,10 +504,10 @@ def get_automl_tabular_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -523,7 +524,7 @@ def get_automl_tabular_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -574,7 +575,8 @@ def get_automl_tabular_pipeline_and_parameters( model_display_name: The display name of the uploaded Vertex model. model_description: The description for the uploaded model. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ parameter_values = _get_default_pipeline_params( project=project, @@ -666,7 +668,8 @@ def input_dictionary_to_parameter(input_dict: Optional[Dict[str, Any]]) -> str: Args: input_dict: The input json dictionary. - Returns: The encoded string used for parameter. + Returns: + The encoded string used for parameter. """ if not input_dict: return '' @@ -739,10 +742,10 @@ def get_skip_evaluation_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -759,13 +762,14 @@ def get_skip_evaluation_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. additional_experiments: Use this field to config private preview features. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ return get_default_pipeline_and_parameters( project=project, @@ -880,10 +884,10 @@ def get_default_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -900,7 +904,7 @@ def get_default_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -927,7 +931,8 @@ def get_default_pipeline_and_parameters( distill_batch_predict_max_replica_count: The max number of prediction server for batch predict component in the model distillation. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'This method is deprecated,' @@ -1179,7 +1184,7 @@ def get_skip_architecture_search_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1196,7 +1201,7 @@ def get_skip_architecture_search_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1224,7 +1229,8 @@ def get_skip_architecture_search_pipeline_and_parameters( evaluation_dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation components. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ return get_automl_tabular_pipeline_and_parameters( @@ -1321,9 +1327,7 @@ def get_distill_skip_evaluation_pipeline_and_parameters( distill_batch_predict_starting_replica_count: int = 25, distill_batch_predict_max_replica_count: int = 25, ) -> Tuple[str, Dict[str, Any]]: - """Get the AutoML Tabular training pipeline that distill and skips. - - evaluation. + """Get the AutoML Tabular training pipeline that distill and skips evaluation. Args: project: The GCP project that runs the pipeline components. @@ -1356,10 +1360,10 @@ def get_distill_skip_evaluation_pipeline_and_parameters( is "maximize-recall-at-precision". Must be between 0 and 1, inclusive. stage_1_tuner_worker_pool_specs_override: The dictionary for overriding. stage 1 tuner worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. cv_trainer_worker_pool_specs_override: The dictionary for overriding stage cv trainer worker pool spec. The dictionary should be of format - https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. + https://github.com/googleapis/googleapis/blob/4e836c7c257e3e20b1de14d470993a2b1f4736a8/google/cloud/aiplatform/v1beta1/custom_job.proto#L172. export_additional_model_without_custom_ops: Whether to export additional model without custom TensorFlow operators. stats_and_example_gen_dataflow_machine_type: The dataflow machine type for @@ -1376,7 +1380,7 @@ def get_distill_skip_evaluation_pipeline_and_parameters( transform component. dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be used. Example: - https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications dataflow_use_public_ips: Specifies whether Dataflow workers use public IP addresses. encryption_spec_key_name: The KMS key name. @@ -1388,7 +1392,8 @@ def get_distill_skip_evaluation_pipeline_and_parameters( distill_batch_predict_max_replica_count: The max number of prediction server for batch predict component in the model distillation. - Returns: Tuple of pipeline_definition_path and parameter_values. + Returns: + Tuple of pipeline_definition_path and parameter_values. """ warnings.warn( 'Depreciated. Please use get_automl_tabular_pipeline_and_parameters.' From 3886ae8637f59e54c9eefd6f8842828c9a3b235d Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 2 Oct 2023 17:24:17 -0700 Subject: [PATCH 183/253] chore(components): release GCPC SDK 2.4.1 PiperOrigin-RevId: 570225887 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 3 +++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index a949aa268c..6e45b90e6e 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -38,7 +38,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.4.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.4.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 35d7e2d22b..07250f9356 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,6 +1,9 @@ ## Upcoming release + +## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. * Fix the mismatched arguments in 2.4.0 for the Feature Transform Engine component. +* Apply latest GCPC image vulnerability resolutions (base OS and software updates). ## Release 2.4.0 * Add support for running tasks on a `PersistentResource` (see [CustomJobSpec](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/CustomJobSpec)) via `persistent_resource_id` parameter on `preview.custom_job.CustomTrainingJobOp` and `preview.custom_job.create_custom_training_job_from_component` diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index cf299b70ad..7e03867497 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.4.1", + "title": "2.4.1", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.4.0", "title": "2.4.0", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 2b07bb0173..737994ce38 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.4.0" +__version__ = "2.4.1" From f55ec3f74f4a300cc84a014b58c4ae83224e47fe Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 3 Oct 2023 14:22:30 -0700 Subject: [PATCH 184/253] docs(components): migrate to `build.os` (additional changes) PiperOrigin-RevId: 570496144 --- components/google-cloud/docs/.readthedocs.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/docs/.readthedocs.yml b/components/google-cloud/docs/.readthedocs.yml index 028312faf7..dc99a9d93a 100644 --- a/components/google-cloud/docs/.readthedocs.yml +++ b/components/google-cloud/docs/.readthedocs.yml @@ -3,11 +3,12 @@ version: 2 sphinx: configuration: components/google-cloud/docs/source/conf.py python: - version: 3.7 install: - method: pip path: components/google-cloud extra_requirements: - docs build: - os: ubuntu-22.04 \ No newline at end of file + os: ubuntu-22.04 + tools: + python: "3.7" \ No newline at end of file From b83b9a4644b3639c910015d5b583b9f10e71e46b Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 4 Oct 2023 10:42:12 -0700 Subject: [PATCH 185/253] docs(components): fix preview.custom_job.create_custom_training_job_from_component reference docs PiperOrigin-RevId: 570738849 --- .../preview/custom_job/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py index 73849ed29a..9651cc8467 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py @@ -75,7 +75,7 @@ def create_custom_training_job_from_component( This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. - Args: + Args: component_spec: A KFP component. display_name: The name of the CustomJob. If not provided the component's name will be used instead. replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) @@ -97,7 +97,7 @@ def create_custom_training_job_from_component( labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) - Returns: + Returns: A KFP component with CustomJob specification applied. """ # fmt: on From 07156ae8a6c59b378de3a8e960cd7c703130037a Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 4 Oct 2023 12:23:09 -0700 Subject: [PATCH 186/253] fix(components): Minor update for chunking parameter name PiperOrigin-RevId: 570768999 --- .../_implementation/model_evaluation/chunking/component.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py index 79e8b7932c..291f480b9c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py @@ -29,7 +29,7 @@ def chunking( project: str, location: str, input_text_gcs_dir: str, - embedding_bq_uri: str, + output_bq_destination: str, display_name: str = 'chunking', machine_type: str = 'n1-standard-8', service_account: str = '', @@ -43,7 +43,7 @@ def chunking( location: The GCP region that runs the pipeline component. input_text_gcs_dir: the GCS directory containing the files to chunk. DO NOT include '/' at the end of the path. - embedding_bq_uri: The BigQuery table URI where the component will write + output_bq_destination: The BigQuery table URI where the component will write chunks to. display_name: The name of the chunking job/component. machine_type: The machine type of this custom job. @@ -78,7 +78,7 @@ def chunking( f'--location={location}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--input_text_gcs_dir={input_text_gcs_dir}', - f'--embedding_bq_uri={embedding_bq_uri}', + f'--output_bq_destination={output_bq_destination}', f'--gcp_resources={gcp_resources}', '--executor_input={{$.json_escape[1]}}', ], From 87005cc0157bb55b7e3f38d15a04bbce9aa139a4 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 4 Oct 2023 19:45:15 -0700 Subject: [PATCH 187/253] chore(sdk): wrap conditional branch groups in outer pipeline (#10034) --- sdk/python/kfp/compiler/compiler_test.py | 104 +++-- .../kfp/compiler/pipeline_spec_builder.py | 14 + sdk/python/kfp/dsl/tasks_group.py | 79 +++- .../test_data/pipelines/if_elif_else.yaml | 104 ++--- .../pipelines/if_elif_else_complex.py | 7 +- .../pipelines/if_elif_else_complex.yaml | 394 ++++++++++-------- sdk/python/test_data/pipelines/if_else.yaml | 68 +-- 7 files changed, 483 insertions(+), 287 deletions(-) diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index dcf68f17c5..a8b0f37215 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -4355,13 +4355,15 @@ def flip_coin_pipeline(): print_and_return(text='Got tails!') self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-2'] .trigger_policy.condition, "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" ) self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-3'] .trigger_policy.condition, "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads')" ) @@ -4379,18 +4381,21 @@ def flip_coin_pipeline(): print_and_return(text='Draw!') self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-2'] .trigger_policy.condition, "inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads'" ) self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-3'] .trigger_policy.condition, "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails'" ) self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-3'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-4'] .trigger_policy.condition, "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails')" ) @@ -4410,23 +4415,23 @@ def int_to_string(): print_and_return(text='Got three!') self.assertEqual( - int_to_string.pipeline_spec.root.dag.tasks['condition-1'] - .trigger_policy.condition, + int_to_string.pipeline_spec.components['comp-condition-branches-1'] + .dag.tasks['condition-2'].trigger_policy.condition, "int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0" ) self.assertEqual( - int_to_string.pipeline_spec.root.dag.tasks['condition-2'] - .trigger_policy.condition, + int_to_string.pipeline_spec.components['comp-condition-branches-1'] + .dag.tasks['condition-3'].trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1" ) self.assertEqual( - int_to_string.pipeline_spec.root.dag.tasks['condition-3'] - .trigger_policy.condition, + int_to_string.pipeline_spec.components['comp-condition-branches-1'] + .dag.tasks['condition-4'].trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2" ) self.assertEqual( - int_to_string.pipeline_spec.root.dag.tasks['condition-4'] - .trigger_policy.condition, + int_to_string.pipeline_spec.components['comp-condition-branches-1'] + .dag.tasks['condition-5'].trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2)" ) @@ -4452,39 +4457,77 @@ def flip_coin_pipeline(confirm: bool): with dsl.Else(): print_and_return(text='Got three!') - # top level conditions + # tests that the pipeline wrapper works well with multiple if/elif/else + with dsl.ParallelFor(['Game #1', 'Game #2']) as game_no: + heads_task = flip_coin() + with dsl.If(heads_task.output == 'heads'): + print_and_return(text=game_no) + print_and_return(text='Got heads!') + with dsl.Else(): + print_and_return(text=game_no) + print_and_return(text='Got tail!') + + # first group + ## top level conditions + ### if self.assertEqual( flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] .trigger_policy.condition, "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" ) - # second level nested conditions + ## second level nested conditions + ### if self.assertEqual( - flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag - .tasks['condition-2'].trigger_policy.condition, + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-2'].dag.tasks['condition-3'] + .trigger_policy.condition, "int(inputs.parameter_values[\'pipelinechannel--int-zero-through-three-Output\']) == 0" ) + ### elif self.assertEqual( - flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag - .tasks['condition-3'].trigger_policy.condition, + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-2'].dag.tasks['condition-4'] + .trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1" ) + ### elif #2 self.assertEqual( - flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag - .tasks['condition-5'].trigger_policy.condition, + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-2'].dag.tasks['condition-6'] + .trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2" ) + ### else self.assertEqual( - flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag - .tasks['condition-6'].trigger_policy.condition, + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-2'].dag.tasks['condition-7'] + .trigger_policy.condition, "!(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 0) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 1) && !(int(inputs.parameter_values['pipelinechannel--int-zero-through-three-Output']) == 2)" ) - # third level nested conditions + ## third level nested conditions + ### if self.assertEqual( - flip_coin_pipeline.pipeline_spec.components['comp-condition-3'].dag - .tasks['condition-4'].trigger_policy.condition, + flip_coin_pipeline.pipeline_spec.components['comp-condition-4'].dag + .tasks['condition-5'].trigger_policy.condition, "inputs.parameter_values['pipelinechannel--confirm'] == true") + # second group + + ## if + self.assertEqual( + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-10'].dag.tasks['condition-11'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'heads'" + ) + ## elif + self.assertEqual( + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-10'].dag.tasks['condition-12'] + .trigger_policy.condition, + "!(inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'heads')" + ) + def test_multiple_ifs_permitted(self): @dsl.pipeline @@ -4589,17 +4632,20 @@ def flip_coin_pipeline(): text=f'Coin three result: {flip_coin_task_3.output}') self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-1'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-2'] .trigger_policy.condition, "inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads'" ) self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-2'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-3'] .trigger_policy.condition, "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'tails'" ) self.assertEqual( - flip_coin_pipeline.pipeline_spec.root.dag.tasks['condition-3'] + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].dag.tasks['condition-4'] .trigger_policy.condition, "!(inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--flip-coin-2-Output'] == 'tails')" ) diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 3e242892d3..e6083d8ba9 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -1286,6 +1286,20 @@ def build_spec_by_group( is_parent_component_root=is_parent_component_root, ) + # handles the conditional group wrapping only + elif isinstance(subgroup, tasks_group.ConditionBranches): + subgroup_component_spec = build_component_spec_for_group( + input_pipeline_channels=subgroup_input_channels, + output_pipeline_channels={}, + ) + + subgroup_task_spec = build_task_spec_for_group( + group=subgroup, + pipeline_channels=subgroup_input_channels, + tasks_in_current_dag=tasks_in_current_dag, + is_parent_component_root=is_parent_component_root, + ) + else: raise RuntimeError( f'Unexpected task/group type: Got {subgroup} of type ' diff --git a/sdk/python/kfp/dsl/tasks_group.py b/sdk/python/kfp/dsl/tasks_group.py index 6bf6b63cc0..2d4bb8d693 100644 --- a/sdk/python/kfp/dsl/tasks_group.py +++ b/sdk/python/kfp/dsl/tasks_group.py @@ -28,6 +28,7 @@ class TasksGroupType(str, enum.Enum): """Types of TasksGroup.""" PIPELINE = 'pipeline' CONDITION = 'condition' + CONDITION_BRANCHES = 'condition-branches' FOR_LOOP = 'for-loop' EXIT_HANDLER = 'exit-handler' @@ -140,6 +141,16 @@ def __init__( self.exit_task = exit_task +class ConditionBranches(TasksGroup): + + def __init__(self) -> None: + super().__init__( + group_type=TasksGroupType.CONDITION_BRANCHES, + name=None, + is_root=False, + ) + + class _ConditionBase(TasksGroup): """Parent class for condition control flow context managers (Condition, If, Elif, Else). @@ -263,6 +274,18 @@ def __init__( name=name, ) + def __enter__(self): + if not pipeline_context.Pipeline.get_default_pipeline(): + raise ValueError('Default pipeline not defined.') + + pipeline = pipeline_context.Pipeline.get_default_pipeline() + + maybe_make_and_insert_conditional_branches_group(pipeline) + + self._make_name_unique() + pipeline.push_tasks_group(self) + return self + class Else(_ConditionBase): """A class for creating a conditional control flow "else" block within a @@ -293,7 +316,9 @@ def __init__( prev_cond = pipeline_context.Pipeline.get_default_pipeline( ).get_last_tasks_group() - if isinstance(prev_cond, Else): + # if it immediately follows as TasksGroup, this is because it immediately + # follows Else in the user code and we wrap Else in a TasksGroup + if isinstance(prev_cond, ConditionBranches): # prefer pushing toward dsl.If rather than dsl.Condition for syntactic consistency with the if-elif-else keywords in Python raise InvalidControlFlowException( 'Cannot use dsl.Else following another dsl.Else. dsl.Else can only be used following an upstream dsl.If or dsl.Elif.' @@ -309,6 +334,58 @@ def __init__( name=name, ) + def __enter__(self): + if not pipeline_context.Pipeline.get_default_pipeline(): + raise ValueError('Default pipeline not defined.') + + pipeline = pipeline_context.Pipeline.get_default_pipeline() + + maybe_make_and_insert_conditional_branches_group(pipeline) + + self._make_name_unique() + pipeline.push_tasks_group(self) + return self + + def __exit__(self, *unused_args): + pipeline = pipeline_context.Pipeline.get_default_pipeline() + pipeline.pop_tasks_group() + + # since this is an else, also pop off the parent dag for conditional branches + # this parent TasksGroup is not a context manager, so we simulate its + # __exit__ call with this + pipeline.pop_tasks_group() + + +def maybe_make_and_insert_conditional_branches_group( + pipeline: 'pipeline_context.Pipeline') -> None: + + already_has_pipeline_wrapper = isinstance( + pipeline.get_last_tasks_group(), + Elif, + ) + if already_has_pipeline_wrapper: + return + + condition_wrapper_group = ConditionBranches() + condition_wrapper_group._make_name_unique() + + # swap outer and inner group ids so that numbering stays sequentially consistent with how such hypothetical code would be authored + def swap_group_ids(parent: TasksGroup, cond: TasksGroup): + parent_name, parent_id = parent.name.rsplit('-', 1) + cond_name, cond_id = cond.name.split('-') + cond.name = f'{cond_name}-{parent_id}' + parent.name = f'{parent_name}-{cond_id}' + + # replace last pushed group (If or Elif) with condition group + last_pushed_group = pipeline.groups[-1].groups.pop() + swap_group_ids(condition_wrapper_group, last_pushed_group) + pipeline.push_tasks_group(condition_wrapper_group) + + # then repush (__enter__) and pop (__exit__) the last pushed group + # before the wrapper to emulate re-entering and exiting its context + pipeline.push_tasks_group(last_pushed_group) + pipeline.pop_tasks_group() + class InvalidControlFlowException(Exception): pass diff --git a/sdk/python/test_data/pipelines/if_elif_else.yaml b/sdk/python/test_data/pipelines/if_elif_else.yaml index a222a43d73..3887ce09a9 100644 --- a/sdk/python/test_data/pipelines/if_elif_else.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else.yaml @@ -1,7 +1,7 @@ # PIPELINE DEFINITION # Name: roll-die-pipeline components: - comp-condition-1: + comp-condition-2: dag: tasks: print-and-return: @@ -20,7 +20,7 @@ components: parameters: pipelinechannel--flip-three-sided-die-Output: parameterType: STRING - comp-condition-2: + comp-condition-3: dag: tasks: print-and-return-2: @@ -39,7 +39,7 @@ components: parameters: pipelinechannel--flip-three-sided-die-Output: parameterType: STRING - comp-condition-3: + comp-condition-4: dag: tasks: print-and-return-3: @@ -58,6 +58,51 @@ components: parameters: pipelinechannel--flip-three-sided-die-Output: parameterType: STRING + comp-condition-branches-1: + dag: + tasks: + condition-2: + componentRef: + name: comp-condition-2 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-2 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-three-sided-die-Output'] + == 'heads' + condition-3: + componentRef: + name: comp-condition-3 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-3 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails''' + condition-4: + componentRef: + name: comp-condition-4 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-4 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && !(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails'')' + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING comp-flip-three-sided-die: executorLabel: exec-flip-three-sided-die outputDefinitions: @@ -108,7 +153,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -139,7 +184,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -168,7 +213,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -197,7 +242,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -219,42 +264,9 @@ pipelineInfo: root: dag: tasks: - condition-1: - componentRef: - name: comp-condition-1 - dependentTasks: - - flip-three-sided-die - inputs: - parameters: - pipelinechannel--flip-three-sided-die-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: flip-three-sided-die - taskInfo: - name: condition-1 - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--flip-three-sided-die-Output'] - == 'heads' - condition-2: - componentRef: - name: comp-condition-2 - dependentTasks: - - flip-three-sided-die - inputs: - parameters: - pipelinechannel--flip-three-sided-die-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: flip-three-sided-die - taskInfo: - name: condition-2 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''heads'') && inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''tails''' - condition-3: + condition-branches-1: componentRef: - name: comp-condition-3 + name: comp-condition-branches-1 dependentTasks: - flip-three-sided-die inputs: @@ -264,11 +276,7 @@ root: outputParameterKey: Output producerTask: flip-three-sided-die taskInfo: - name: condition-3 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''heads'') && !(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''tails'')' + name: condition-branches-1 flip-three-sided-die: cachingOptions: enableCache: true @@ -277,4 +285,4 @@ root: taskInfo: name: flip-three-sided-die schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.py b/sdk/python/test_data/pipelines/if_elif_else_complex.py index 42623cb508..45efe58cd2 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.py +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.py @@ -39,6 +39,11 @@ def print_strings(strings: List[str]): print(strings) +@dsl.component +def print_ints(ints: List[int]): + print(ints) + + @dsl.pipeline def lucky_number_pipeline(add_drumroll: bool = True, repeat_if_lucky_number: bool = True, @@ -77,7 +82,7 @@ def lucky_number_pipeline(add_drumroll: bool = True, text='Announcing again: Got the lucky number 5000! A one in 10,000 chance.' ) - print_strings(strings=dsl.Collected(even_or_odd_task.output)) + print_ints(ints=dsl.Collected(int_task.output)) if __name__ == '__main__': diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml index ca7f09b1a1..0726ea48e0 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml @@ -5,12 +5,54 @@ # repeat_if_lucky_number: bool [Default: True] # trials: list [Default: [1.0, 2.0, 3.0]] components: - comp-condition-10: + comp-condition-11: dag: tasks: - condition-11: + print-and-return-4: + cachingOptions: + enableCache: true componentRef: - name: comp-condition-11 + name: comp-print-and-return-4 + inputs: + parameters: + text: + runtimeValue: + constant: Got a high even number! + taskInfo: + name: print-and-return-4 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-2-Output: + parameterType: STRING + comp-condition-12: + dag: + tasks: + print-and-return-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-5 + inputs: + parameters: + text: + runtimeValue: + constant: Got a high odd number! + taskInfo: + name: print-and-return-5 + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-2-Output: + parameterType: STRING + comp-condition-13: + dag: + tasks: + condition-14: + componentRef: + name: comp-condition-14 inputs: parameters: pipelinechannel--int-0-to-9999-Output: @@ -18,7 +60,7 @@ components: pipelinechannel--repeat_if_lucky_number: componentInputParameter: pipelinechannel--repeat_if_lucky_number taskInfo: - name: condition-11 + name: condition-14 triggerPolicy: condition: inputs.parameter_values['pipelinechannel--repeat_if_lucky_number'] == true @@ -41,12 +83,12 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--repeat_if_lucky_number: parameterType: BOOLEAN - comp-condition-11: + comp-condition-14: dag: tasks: - for-loop-13: + for-loop-16: componentRef: - name: comp-for-loop-13 + name: comp-for-loop-16 inputs: parameters: pipelinechannel--int-0-to-9999-Output: @@ -54,11 +96,11 @@ components: pipelinechannel--repeat_if_lucky_number: componentInputParameter: pipelinechannel--repeat_if_lucky_number parameterIterator: - itemInput: pipelinechannel--loop-item-param-12 + itemInput: pipelinechannel--loop-item-param-15 items: raw: '[1, 2]' taskInfo: - name: for-loop-13 + name: for-loop-16 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: @@ -109,61 +151,7 @@ components: parameterType: BOOLEAN pipelinechannel--trials-loop-item: parameterType: NUMBER_INTEGER - comp-condition-4: - dag: - tasks: - condition-5: - componentRef: - name: comp-condition-5 - dependentTasks: - - is-even-or-odd - inputs: - parameters: - pipelinechannel--int-0-to-9999-Output: - componentInputParameter: pipelinechannel--int-0-to-9999-Output - pipelinechannel--is-even-or-odd-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: is-even-or-odd - taskInfo: - name: condition-5 - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-Output'] - == 'even' - condition-6: - componentRef: - name: comp-condition-6 - dependentTasks: - - is-even-or-odd - inputs: - parameters: - pipelinechannel--int-0-to-9999-Output: - componentInputParameter: pipelinechannel--int-0-to-9999-Output - pipelinechannel--is-even-or-odd-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: is-even-or-odd - taskInfo: - name: condition-6 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-Output''] - == ''even'')' - is-even-or-odd: - cachingOptions: - enableCache: true - componentRef: - name: comp-is-even-or-odd - inputs: - parameters: - num: - componentInputParameter: pipelinechannel--int-0-to-9999-Output - taskInfo: - name: is-even-or-odd - inputDefinitions: - parameters: - pipelinechannel--int-0-to-9999-Output: - parameterType: NUMBER_INTEGER - comp-condition-5: + comp-condition-6: dag: tasks: print-and-return-2: @@ -184,7 +172,7 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-Output: parameterType: STRING - comp-condition-6: + comp-condition-7: dag: tasks: print-and-return-3: @@ -205,36 +193,45 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-Output: parameterType: STRING - comp-condition-7: + comp-condition-8: dag: - outputs: - parameters: - pipelinechannel--is-even-or-odd-2-Output: - valueFromParameter: - outputParameterKey: Output - producerSubtask: is-even-or-odd-2 tasks: - condition-8: + condition-branches-5: componentRef: - name: comp-condition-8 + name: comp-condition-branches-5 dependentTasks: - - is-even-or-odd-2 + - is-even-or-odd inputs: parameters: pipelinechannel--int-0-to-9999-Output: componentInputParameter: pipelinechannel--int-0-to-9999-Output - pipelinechannel--is-even-or-odd-2-Output: + pipelinechannel--is-even-or-odd-Output: taskOutputParameter: outputParameterKey: Output - producerTask: is-even-or-odd-2 + producerTask: is-even-or-odd taskInfo: - name: condition-8 - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-2-Output'] - == 'even' - condition-9: + name: condition-branches-5 + is-even-or-odd: + cachingOptions: + enableCache: true componentRef: - name: comp-condition-9 + name: comp-is-even-or-odd + inputs: + parameters: + num: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + taskInfo: + name: is-even-or-odd + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + comp-condition-9: + dag: + tasks: + condition-branches-10: + componentRef: + name: comp-condition-branches-10 dependentTasks: - is-even-or-odd-2 inputs: @@ -246,10 +243,7 @@ components: outputParameterKey: Output producerTask: is-even-or-odd-2 taskInfo: - name: condition-9 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-2-Output''] - == ''even'')' + name: condition-branches-10 is-even-or-odd-2: cachingOptions: enableCache: true @@ -265,80 +259,138 @@ components: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER - outputDefinitions: - parameters: - pipelinechannel--is-even-or-odd-2-Output: - parameterType: NUMBER_INTEGER - comp-condition-8: + comp-condition-branches-10: dag: tasks: - print-and-return-4: - cachingOptions: - enableCache: true + condition-11: componentRef: - name: comp-print-and-return-4 + name: comp-condition-11 inputs: parameters: - text: - runtimeValue: - constant: Got a high even number! + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-2-Output: + componentInputParameter: pipelinechannel--is-even-or-odd-2-Output taskInfo: - name: print-and-return-4 + name: condition-11 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-2-Output'] + == 'even' + condition-12: + componentRef: + name: comp-condition-12 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-2-Output: + componentInputParameter: pipelinechannel--is-even-or-odd-2-Output + taskInfo: + name: condition-12 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-2-Output''] + == ''even'')' inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-2-Output: parameterType: STRING - comp-condition-9: + comp-condition-branches-4: dag: tasks: - print-and-return-5: - cachingOptions: - enableCache: true + condition-13: componentRef: - name: comp-print-and-return-5 + name: comp-condition-13 inputs: parameters: - text: - runtimeValue: - constant: Got a high odd number! + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: pipelinechannel--repeat_if_lucky_number taskInfo: - name: print-and-return-5 + name: condition-13 + triggerPolicy: + condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + < 5000) && !(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + > 5000)' + condition-8: + componentRef: + name: comp-condition-8 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + taskInfo: + name: condition-8 + triggerPolicy: + condition: int(inputs.parameter_values['pipelinechannel--int-0-to-9999-Output']) + < 5000 + condition-9: + componentRef: + name: comp-condition-9 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + taskInfo: + name: condition-9 + triggerPolicy: + condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + < 5000) && int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) + > 5000' inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER - pipelinechannel--is-even-or-odd-2-Output: - parameterType: STRING - comp-for-loop-1: + pipelinechannel--repeat_if_lucky_number: + parameterType: BOOLEAN + comp-condition-branches-5: dag: - outputs: - parameters: - pipelinechannel--is-even-or-odd-2-Output: - valueFromParameter: - outputParameterKey: pipelinechannel--is-even-or-odd-2-Output - producerSubtask: condition-7 tasks: - condition-10: + condition-6: componentRef: - name: comp-condition-10 - dependentTasks: - - int-0-to-9999 + name: comp-condition-6 inputs: parameters: pipelinechannel--int-0-to-9999-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: int-0-to-9999 - pipelinechannel--repeat_if_lucky_number: - componentInputParameter: pipelinechannel--repeat_if_lucky_number + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-Output: + componentInputParameter: pipelinechannel--is-even-or-odd-Output taskInfo: - name: condition-10 + name: condition-6 triggerPolicy: - condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) - < 5000) && !(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) - > 5000)' + condition: inputs.parameter_values['pipelinechannel--is-even-or-odd-Output'] + == 'even' + condition-7: + componentRef: + name: comp-condition-7 + inputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + componentInputParameter: pipelinechannel--int-0-to-9999-Output + pipelinechannel--is-even-or-odd-Output: + componentInputParameter: pipelinechannel--is-even-or-odd-Output + taskInfo: + name: condition-7 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--is-even-or-odd-Output''] + == ''even'')' + inputDefinitions: + parameters: + pipelinechannel--int-0-to-9999-Output: + parameterType: NUMBER_INTEGER + pipelinechannel--is-even-or-odd-Output: + parameterType: STRING + comp-for-loop-1: + dag: + outputs: + parameters: + pipelinechannel--int-0-to-9999-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: int-0-to-9999 + tasks: condition-2: componentRef: name: comp-condition-2 @@ -353,25 +405,9 @@ components: triggerPolicy: condition: inputs.parameter_values['pipelinechannel--add_drumroll'] == true - condition-4: - componentRef: - name: comp-condition-4 - dependentTasks: - - int-0-to-9999 - inputs: - parameters: - pipelinechannel--int-0-to-9999-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: int-0-to-9999 - taskInfo: - name: condition-4 - triggerPolicy: - condition: int(inputs.parameter_values['pipelinechannel--int-0-to-9999-Output']) - < 5000 - condition-7: + condition-branches-4: componentRef: - name: comp-condition-7 + name: comp-condition-branches-4 dependentTasks: - int-0-to-9999 inputs: @@ -380,12 +416,10 @@ components: taskOutputParameter: outputParameterKey: Output producerTask: int-0-to-9999 + pipelinechannel--repeat_if_lucky_number: + componentInputParameter: pipelinechannel--repeat_if_lucky_number taskInfo: - name: condition-7 - triggerPolicy: - condition: '!(int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) - < 5000) && int(inputs.parameter_values[''pipelinechannel--int-0-to-9999-Output'']) - > 5000' + name: condition-branches-4 int-0-to-9999: cachingOptions: {} componentRef: @@ -404,9 +438,9 @@ components: parameterType: NUMBER_INTEGER outputDefinitions: parameters: - pipelinechannel--is-even-or-odd-2-Output: + pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER - comp-for-loop-13: + comp-for-loop-16: dag: tasks: print-and-return-7: @@ -426,7 +460,7 @@ components: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER - pipelinechannel--loop-item-param-12: + pipelinechannel--loop-item-param-15: parameterType: NUMBER_INTEGER pipelinechannel--repeat_if_lucky_number: parameterType: BOOLEAN @@ -526,11 +560,11 @@ components: parameters: Output: parameterType: STRING - comp-print-strings: - executorLabel: exec-print-strings + comp-print-ints: + executorLabel: exec-print-ints inputDefinitions: parameters: - strings: + ints: parameterType: LIST deploymentSpec: executors: @@ -546,7 +580,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -575,7 +609,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -604,7 +638,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -633,7 +667,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -662,7 +696,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -691,7 +725,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -720,7 +754,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -749,7 +783,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -778,7 +812,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -807,7 +841,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -824,19 +858,19 @@ deploymentSpec: \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ \ text\n\n" image: python:3.7 - exec-print-strings: + exec-print-ints: container: args: - --executor_input - '{{$}}' - --function_to_execute - - print_strings + - print_ints command: - sh - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -850,7 +884,7 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef print_strings(strings: List[str]):\n print(strings)\n\n" + \ *\n\ndef print_ints(ints: List[int]):\n print(ints)\n\n" image: python:3.7 pipelineInfo: name: lucky-number-pipeline @@ -874,21 +908,21 @@ root: inputParameter: pipelinechannel--trials taskInfo: name: for-loop-1 - print-strings: + print-ints: cachingOptions: enableCache: true componentRef: - name: comp-print-strings + name: comp-print-ints dependentTasks: - for-loop-1 inputs: parameters: - strings: + ints: taskOutputParameter: - outputParameterKey: pipelinechannel--is-even-or-odd-2-Output + outputParameterKey: pipelinechannel--int-0-to-9999-Output producerTask: for-loop-1 taskInfo: - name: print-strings + name: print-ints inputDefinitions: parameters: add_drumroll: @@ -907,4 +941,4 @@ root: isOptional: true parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/if_else.yaml b/sdk/python/test_data/pipelines/if_else.yaml index 02232f779f..bdd9a8d0cb 100644 --- a/sdk/python/test_data/pipelines/if_else.yaml +++ b/sdk/python/test_data/pipelines/if_else.yaml @@ -1,7 +1,7 @@ # PIPELINE DEFINITION # Name: flip-coin-pipeline components: - comp-condition-1: + comp-condition-2: dag: tasks: print-and-return: @@ -20,7 +20,7 @@ components: parameters: pipelinechannel--flip-coin-Output: parameterType: STRING - comp-condition-2: + comp-condition-3: dag: tasks: print-and-return-2: @@ -39,6 +39,37 @@ components: parameters: pipelinechannel--flip-coin-Output: parameterType: STRING + comp-condition-branches-1: + dag: + tasks: + condition-2: + componentRef: + name: comp-condition-2 + inputs: + parameters: + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: condition-2 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] + == 'heads' + condition-3: + componentRef: + name: comp-condition-3 + inputs: + parameters: + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: condition-3 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] + == ''heads'')' + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING comp-flip-coin: executorLabel: exec-flip-coin outputDefinitions: @@ -79,7 +110,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -108,7 +139,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -137,7 +168,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -159,25 +190,9 @@ pipelineInfo: root: dag: tasks: - condition-1: - componentRef: - name: comp-condition-1 - dependentTasks: - - flip-coin - inputs: - parameters: - pipelinechannel--flip-coin-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: flip-coin - taskInfo: - name: condition-1 - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] - == 'heads' - condition-2: + condition-branches-1: componentRef: - name: comp-condition-2 + name: comp-condition-branches-1 dependentTasks: - flip-coin inputs: @@ -187,10 +202,7 @@ root: outputParameterKey: Output producerTask: flip-coin taskInfo: - name: condition-2 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] - == ''heads'')' + name: condition-branches-1 flip-coin: cachingOptions: enableCache: true @@ -199,4 +211,4 @@ root: taskInfo: name: flip-coin schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 From 2fbabd9eb8b2dac8b4190468d71e30cd7cf96237 Mon Sep 17 00:00:00 2001 From: Tommy Li Date: Wed, 4 Oct 2023 23:14:15 -0700 Subject: [PATCH 188/253] chore(component): Update KServe component to support Kubeflow 1.8 (#10062) --- components/kserve/README.md | 2 +- components/kserve/component.yaml | 2 +- components/kserve/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/kserve/README.md b/components/kserve/README.md index 52775aa5c5..66f0e59b9f 100644 --- a/components/kserve/README.md +++ b/components/kserve/README.md @@ -4,7 +4,7 @@ Organization: KServe Organization Description: KServe is a highly scalable and standards based Model Inference Platform on Kubernetes for Trusted AI -Version information: KServe 0.10.1. Works for Kubeflow 1.7 +Version information: KServe 0.11.1. Works for Kubeflow 1.8 **Note:** To use the KServe 0.7.0 version of this component which runs on Kubeflow 1.5, then change the load_component_from_url in the usage section with the following YAML instead: ``` diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 2a352d96f8..4bdcaac7b5 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -22,7 +22,7 @@ outputs: - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} implementation: container: - image: quay.io/aipipeline/kserve-component:v0.10.1 + image: quay.io/aipipeline/kserve-component:v0.11.1 command: ['python'] args: [ -u, kservedeployer.py, diff --git a/components/kserve/requirements.txt b/components/kserve/requirements.txt index 2be51060b8..bdab9d8f8a 100644 --- a/components/kserve/requirements.txt +++ b/components/kserve/requirements.txt @@ -1,2 +1,2 @@ -kserve==0.10.1 +kserve==0.11.1 protobuf~=3.19.0 From 271d4ebfafa5a3fab7f100212fd14e1eb28421bd Mon Sep 17 00:00:00 2001 From: Kelli Belcher Date: Thu, 5 Oct 2023 15:35:15 -0700 Subject: [PATCH 189/253] Intel oneAPI XGBoost daal4py example pipeline (#10044) * Intel oneAPI XGBoost daal4py sample pipeline * Intel oneAPI XGBoost daal4py sample pipeline * Intel oneAPI XGBoost daal4py sample pipeline --- .../contrib/intel-oneapi-samples/README.md | 164 ++++++ .../intel-xgb-d4p-pipeline-roc-curve.png | Bin 0 -> 40353 bytes .../assets/intel-xgb-d4p-pipeline.png | Bin 0 -> 209443 bytes .../assets/logo-classicblue-800px.png | Bin 0 -> 17008 bytes .../intel_xgboost_daal4py_pipeline.py | 502 ++++++++++++++++++ 5 files changed, 666 insertions(+) create mode 100644 samples/contrib/intel-oneapi-samples/README.md create mode 100644 samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline-roc-curve.png create mode 100644 samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline.png create mode 100644 samples/contrib/intel-oneapi-samples/assets/logo-classicblue-800px.png create mode 100644 samples/contrib/intel-oneapi-samples/intel_xgboost_daal4py_pipeline.py diff --git a/samples/contrib/intel-oneapi-samples/README.md b/samples/contrib/intel-oneapi-samples/README.md new file mode 100644 index 0000000000..451a1f2787 --- /dev/null +++ b/samples/contrib/intel-oneapi-samples/README.md @@ -0,0 +1,164 @@ +

+ Intel Logo +

+ +# Intel® Optimized XGBoost daal4py Kubeflow Pipeline + +This example demonstrates how to optimize an XGBoost Kubeflow Pipeline using a sample +dataset to predict the probability of loan default. +The reference solution enables the use of the +[Intel® Optimization for XGBoost*](https://www.intel.com/content/www/us/en/developer/tools/oneapi/optimization-for-xgboost.html), +[Intel® oneAPI Data Analytics Library (Intel® oneDAL)](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html), +and [Intel® Extension for Scikit-Learn*](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) +to accelerate an end-to-end training and inference XGBoost pipeline. + +## Table of Contents +- [System Requirements](#system-requirements) +- [Overview](#pipeline-overview) +- [Pipeline Optimizations](#pipeline-optimizations) +- [Pipeline Parameters](#pipeline-parameters) +- [Pipeline Results](#pipeline-results) + +## System Requirements + +- Before running the code for the pipeline, please ensure you have downloaded and installed +[Kubeflow Pipelines SDK](https://v1-5-branch.kubeflow.org/docs/components/pipelines/sdk-v2/) +v2.0.1 or above. +- To attain the most performance benefits from the Intel software optimizations, deploy the +pipeline on a 3rd or 4th Generation [Intel® Xeon® Processor](https://www.intel.com/content/www/us/en/products/details/processors/xeon.html). + +## Pipeline Overview + +This pipeline is derived from the +[Loan Default Risk Prediction AI Reference Kit](https://github.com/oneapi-src/loan-default-risk-prediction). +The code has been enhanced through refactoring to achieve better modularity and suitability for +Kubeflow Pipelines. The credit risk data set used in the pipeline is obtained from +[Kaggle](https://www.kaggle.com/datasets/laotse/credit-risk-dataset)* +and synthetically augmented for testing and benchmarking purposes. Below is a graph of the full +XGBoost daal4py Kubeflow Pipeline. + +

+ Intel XGBoost daal4py Pipeline +

+ +The pipeline consists of the following seven components: +- **Load data**: This component loads the dataset (`credit_risk_dataset.csv`) from the URL specified +in the pipeline run parameters and performs synthetic data augmentation. +- **Create training and test sets**: This component splits the data into training and test sets of an +approximately 75:25 split for model evaluation. +- **Preprocess features**: This component transforms the categorical features of the training and +test sets by using one-hot encoding, imputes missing values, and power-transforms numerical features. +- **Train XGBoost model**: This component trains an XGBoost model using the accelerations provided by +the Intel Optimizations for XGBoost. +- **Convert XGBoost model to daal4py**: This component converts the XGBoost model to an +inference-optimized daal4py classifier. +- **daal4py Inference**: This component computes predictions using the inference-optimized daal4py +classifier and evaluates model performance. It returns a summary of the precision, recall, and F1 +score for each class, as well as the area under the curve (AUC) and accuracy score of the model. +- **Plot ROC Curve**: This component performs model validation on the test data and generates a +graph of the receiver operating characteristic (ROC) curve. + +[Back to Table of Contents](#table-of-contents) + +## Pipeline Optimizations + +#### Enable the Intel Optimization for XGBoost + +The [XGBoost optimizations](https://www.intel.com/content/www/us/en/developer/tools/oneapi/optimization-for-xgboost.html) +for training and inference on CPUs are upstreamed into the open source XGBoost framework. +Ensure you are using the latest version of XGBoost to access the most Intel optimizations. +The following code sample is implemented in the `train_xgboost_model` component. + +``` +dtrain = xgb.DMatrix(X_train.values, y_train.values) + +# define model parameters +params = { + "objective": "binary:logistic", + "eval_metric": "logloss", + "nthread": 4, # num_cpu + "tree_method": "hist", + "learning_rate": 0.02, + "max_depth": 10, + "min_child_weight": 6, + "n_jobs": 4, # num_cpu, + "verbosity": 1} + +# train XGBoost model +clf = xgb.train(params = params, + dtrain = dtrain, + num_boost_round = 500) +``` + +#### Convert the Trained XGBoost Model to daal4py + +[daal4py](https://www.intel.com/content/www/us/en/developer/articles/guide/a-daal4py-introduction-and-getting-started-guide.html) +is the Python API of the oneAPI Data Analytics Library, oneDAL. daal4py helps to further +optimize model prediction, or inference, on CPUs. The following code demonstrates how to +convert a trained XGBoost model into daal4py format and calculate the predicted +classification results, implemented in the `convert_xgboost_to_daal4py` and `daal4py_inference` +components. + +``` +# convert XGBoost model to daal4py +daal_model = d4p.get_gbt_model_from_xgboost(clf) + + +# compute class labels and probabilities +daal_prediction = d4p.gbt_classification_prediction( + nClasses = 2, + resultsToEvaluate = "computeClassLabels|computeClassProbabilities" +).compute(X_test, daal_model) +``` + +#### Enable the Intel Extension for Scikit-Learn +The [Intel Extension for Scikit-Learn](https://www.intel.com/content/www/us/en/developer/tools/oneapi/scikit-learn.html) +provides CPU accelerations for many scikit-learn libraries. Below is an example +using the scikit-learn extension to accelerate the computation of the ROC curve. +The following code is implemented in the `plot_roc_curve` component. + +``` +# call patch_sklearn() before importing scikit-learn libraries +from sklearnex import patch_sklearn +patch_sklearn() +from sklearn.metrics import roc_curve + + +# calculate the ROC curve using the CPU-accelerated version +fpr, tpr, thresholds = roc_curve( + y_true = prediction_data['y_test'], + y_score = prediction_data['y_prob'], + pos_label = 1) +``` + +[Back to Table of Contents](#table-of-contents) + +## Pipeline Parameters + +The XGBoost daal4py Kubeflow Pipeline consists of the following two parameters: +- `data_url`: The sample dataset can be downloaded from +[Kaggle](https://www.kaggle.com/datasets/laotse/credit-risk-dataset) +and hosted on a public URL of your choice. +- `data_size`: The recommended data size for the pipeline is 1 million. + +## Pipeline Results + +When the Pipeline tasks `daal4py-inference` and `plot-roc-curve` are finished running, +click on the Visualization tab of the `metrics` and `roc_curve_daal4py` artifacts to +view the model performance results. You should see a similar graph of the receiver +operating characteristic (ROC) curve as the one below. + +

+ ROC Curve +

+ +[Back to Table of Contents](#table-of-contents) + +## Next Steps + +Thanks for checking out this tutorial! If you would like to implement this reference solution +on a cloud service provider like AWS, Azure, or GCP, you can view the full deployment steps, +as well as additional Intel® Optimized Cloud Modules +[here](https://www.intel.com/content/www/us/en/developer/topic-technology/cloud-optimization.html). + +[Back to Table of Contents](#table-of-contents) \ No newline at end of file diff --git a/samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline-roc-curve.png b/samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline-roc-curve.png new file mode 100644 index 0000000000000000000000000000000000000000..73d0b530ac6abc47a648a92b8166b8eafbfc3dfd GIT binary patch literal 40353 zcmeFZXH-*dw>BC@1+gQbpn#zCCSAH9z4zWE6hR=;2~`C|q)Bf=M0)RHD2bI0Q9203 zLg)}lNFW5rw}Q{R_q)Hn|C~SP{5U&fFj$0S-DS3GUUS|n-pD}n96c*N2n0H(t)*rH z0-cBjfoO#2P66-q?=qYOejW2S(NqQ14PRdY{y5>TqOSr1HKv@|zfb+==|@`D{vZ%T z0QKkCyciJt2;tD1^>Q`_`3IdD^g zLW)G?&P}gJ?5uC*D%O{$_zt@U1vlYcMX(3q%IG(GS-`Y_*L&j^Ll6I6-k~eHcy#rc z<`Vs}qpLd?|37@ImXGnp7w0sC@}aKS%I{V6f2Z_*&nAekC-F9%ZN+BK(izki=-^-Q~It3T%I&JZ1H} z50Z$&)F_9>Lz#iEHsn4j_9KtC4SYN1AEjQHMXp2GY=(8u7Puh*q1Pao2#kBbvmI|? zzY~4B4x%Zf)Nq31g_VtP$32{7;Ej?_N(44F+cCUoo~lo;)>k02LK0|28c;)!1tUFw zeJDn7K@p44fhceP*};58M%;r4I+9i=F>%U!jq{P%uzBt%+zaK2@4&aMZ0~78vcIRW zk7x2*E}55C{Psk@o8>jU7RRf2kTeUQMxos6>c^Gw&6=*gv&yZ`8WD35)8x>O!}jl( z!g`l_q}T!P8a6f9YYz!tF;^n(5Qx~gp<>y4e%sBaO9by^gSoKcCO*nOSLC6@PT!E9 zIWcOqlI%GpE&#Im$1$7;uH4!(_`x7)c z!aWKPntVuE24mY-3vhXsND~MR2;>x@(2l+2@$NS}e5o*O!Q1_+NYkZ#*VUY#nC zhh^p)o`(1+!A&;@m6e4DbHD2#EM!|mg&-r2WHI<2#xj1E?BZ}k>XzZ*x-oopT_Q=5 zfZd%|PghY79Eems&M3HKKsrSaYInu+F#9Niy zGQ(?E`kQ&M*{^)F?6~6=`s}c55@%)C&bAguX7vv5EJk1@=6Txea>>C$3m#CMLtjU| zxnEycaKG87==DR*&D5}YB`Bn1mHUc*K|NeeDJbPq6HC=X&6!0=_U(Y}W6+?bhh>Km zee9 z;AnX1)s((sMNgglt*{VjuNeEEg_WNCAW+t)5hE@AYvfNHg%UezqRJH54u|v(U>}b$ zxp{I@5vw^rRi`}+5(bdrgKs>{;kO_7y_jYrWvan%FS`MYFW6hET5)Z@mR9TtRTJs2 z_q5?6goSLo8@eIWs~+{6>mCC&C}hQ|dba1}Ntf9jmP0Zok;6IS#34%f-5r+Wz(7gM zo;jM_vyjb)_eR1-W?vEZxoQ_4E~Vwm1xiM(T21IgO?WxhSI7C8?K(d*Xx5JkThx{f zXuQNMd{uSD141&_?_LGR1sOt^z0FQyfMvQ6UhXOGZ(lkHyYqQW4D_Dfnt{IZ*+^Am z3+{!kg9v491Cko-cwo>~Tfx*f*6%{~ObHSgDS6S1dmZ>|x>3eNeQ6PC6vyzxoc+Ow zrgu!ei10bEjMJjjoNpfRE`f6k{ss2Zz|BA zikl#4&a&iVAPcWAMK5jq9E26?Us0Nb{f1zW(eE#2EP68P57#(JA1M1;M^50NauL=2 z0t|P!Fb?VQPZ8FX zMDIMb`I3BSZONXuA6!9yeL5Yz4~-nft`QgzH=;tPa&d1JEmu6*e}^gGI2T-Tn|E@5 z`hlk+sriCTZ^88Ur8o!OAdCmYu=(X7oV-lr1wA$zP>r22&%mrh6KsFm7{0bi;a#=$ zQd;B2eY=2t`Rmt;tf^c<7HXC#OCmrDv);Vnf^X*S^l>2fx$r_%eY@VJfS48cZf#~U z8y^=%nFs;KZJb?(Kc91{U+EsFz6rW&pgL6l?ex0o4Xxckeg3XontMf*6@q zIsabKa{cq~M>qcWt-l-fZ#&Us$j2CJq!cmd*h_BCu2=G5w|xu7nICi^ZW!ijW|VJA zB9Mq#=%7M7v)|5=DbA=@p~VV{%yx~4ICgb6sIBuCxB&M;^g^B{E~zj0P$_H^Jclc2 z=#3Yx-Y_8HM2AKnx)~vAHh=eoF2q1xyf+61VL%K9bb!h0GCe_wSg}7b8@c?glvY~c zPp*t!=>8epg_PXy7N#GD>S#+RXIl-l*k>rUedqxVbAO-sP91NBOE8x3m}1<)BVv1z zn!|+C^_gX}sTN|<73zK8dw4O>v!&lepN&7!*84Dedaei28uKFOr9#V^sKM#*x4mKK z!yOO-KWK0a_}&t{@=L`+#oz;G{Cj%E!;?@--S~N*r?|qdRWx4v_fGRS$+5nUVdHDs zBohDq-gsItE_$6n99S#i3F{(72;+*s+b%@7uZ^zt?~$F&)a@xT@M`k`a+C6J%{G~S zH#{A6P9O`uewWlfzp1f$Ø?XXnX8+moHSqJIYlB*{m=K9@GLncgJH9Tm62&Nz-Ag zw4_(I6j!`so=-bIR(Tq(JL^}@@}fe3l_}3Fieh#(U0%BKgCKe@<`UNsQ8Dq4tCbRy zNS0yb8*K-O#f_m|rBQ@i5jl-qxYs?}xtf%TTah`@WZOLJhC>Vo4s-9I-chW7Dv|P$ zGRGx8U0)jCd8fb)u|V2-?;%;JyYj>CZ3Sbfj^ULv{a==7)rULoI^@rhH)N1fwH#kF zz-~7(n7kMW#$oVKj@>c-Colc<4ms=5UkATxEfmSuEQh&-EGKu~)q<{{ITMbZ7APLW zu9-fZhpC#4Y|w(Jk`e z(&udV(@9M`gtw5CT=|5O<1HRfUZ!=`yP-=x#<0%f7fnWRm7>JH$8f-Nfk2Py2p3{i zFZ9cpy}#6HTU8PmhnM|$$(*UU{P9Lad8aFu7o+I*B(Z9QG%`O>GCx&d zMk!n0fZQ%mSXlvUv?eRnmFMPW&w9(LYDB%BjmUq6kP5sux3k>2zSAF(RAtm(2(#vH z0pZuiBKi;H3)f8eQ}d2z%ex(hn{#jPwhdn7$0}_^GPA-BT~jE4I>H;cl0deG1tBNhOj+pA{G*u~zNWOiQ)Q7ve$x*n z)pPG4OGs!{b_)Gdvx{J3!#7tPI=`H4e2N=MxN;aw2nt+V11u!kbca7AxAP#p^->y4 z$)ch1tIMYq4h8niCy!v^_U4;HU!6knP>rv?li7)x?=3o6bE-ZXqup%+heq_wPZWrk z4rJJRZwyfiq*H{{Rong0@}j#l;km)dZgW0R>$)Z5I@Z5#s3!1ytST-6HhkH6(1tW} zYlzuZ)pIY0K3Zvxu(*<``fwR)aR8QndJ5KQbMo%bm|2RGs$oj#I22)B<%)YXcc+V< zN0(MDDr%LnvL(RKptk>{psAnA5IP`xXg5+NA2_KdE?S7)9 zFC=8im}_W{Y=8~Uur2jQ9O@oTcENViXt@%?stY&NFUm8CI*gU`%I)~Mtygty+r_H+ zHIK&#Nm~oi{&26QRU10m$X|*ZsLCPQFqMQb{mg^d14 zlOV{BU!1w4)Z9qOn656!c-(pD*Zg87KE4ko#-btohQe?(-F~@esyz!!c=NUsZO^|s zyKg=ll@*2GhRj%2*zXMvkxpV-ph{lK6ye-2c5CU&lkg+zq!c5Y&Tzdjnw#nEiU9b5V}V? zHNNT)r6+R~p_1k+VCjfLW8W6LaU8Vrgd)J`P+tuqbpKpke{S?Qq`ZIf|M0B}zI0a~num*<6!uin zh_JEQPc~4>wSLTuj73h^(I~vdCUg&z?>;!FKzHmN0ND}%srX`~)NiyL(*30`bd32J zo%gODefsgIucrT+N{$khBmg}6-5AxQ3($Erz7zlZ7Ig>ycPIYe*NMMY#4OC5GVuQW z>$B(2tsSgZ2~^frGNkkr?;LS<#Kj7u2g}PFeg_E6p|0>N{loG~JlSJ89qC70nf3YH zu>M$iY(|Q`iR&gNANswTIC~Q!|7kXN^yT$~bwU%ie9>oOR+%PNBLxm5|G|+94AxsW z5<(#e?kMoR1^O~nqDCVxpDBQS8AZRd zUg)nL?7AMP+=PsC_e(J)ijyE>he@MR7rGCIlPaeZlamHMQSe}G9@9`{ZL4)%C^;X) zB#ON2SQpxpUka}@h9&e|#(mw1y7ldW9`?vzuE!ZYkVYx+$1MoRKAfm@1o$nCWnZy! z*vRf92 zZUM!fSv@Sh*|(5UW*0b8I9g_|V{VTVgS$UjF(4^p(^&oKXUpuECJLJH3q#QRNpdj9 zK-X8WwiliwR3h>gIdtA7d_(hXzxoxsJA^3@!-4djMRwZX=u}5|;6Pb~-@zLiA*Pyz zxk-7EGd(#kKvHrav?A2gY z!ue+aL1!9u!SlLfr>TPkDxe2@fJ)&`)yfIux`J4@V`00NUeCVwEPUf@gt)Yl$dy;Y z$xw@c8{3~?gVOewse1238>4D{Pm?2udHrlGj0yfug*9<5*Yn!KN7yrU{bcJuZ~a#; zxGOiocnG?5k&A&CYtx^jEbc3}?>FJJX^r4f=uw=a`RncZ&aX>}ZGFO)>}j;jY%MMr zN~b%5m)wM=WVJ0NoAq&HhpL0bQAm#_E**2b0-IQ=IDoR`zw(UVToleno>Js8Tofn{ zxaybss+66U(Ckufm%h`53MUKunLCV6bPhg#ghNc&aZRpwex0%i(8}Q;Qb|J(9|1t2 z?rSBp13VBSbQK+3RZY~3O>;-ck#lk=L|}6aDY%z*As?(w!M_-3Xznp=ZbfuE7B-uvv^Z_jFD`(&#o^c1_g7AG>mhOb`M`G6zs?%CINE+<~mYJw2E-e(VZR3Xs!xh5+w+9onm#51$8wmz&iARuTU7)iN_U(RT3^z}c2 z{7$^Xg-X)XXZQA2&IK73L`o&)HKogpyCJ@>ULcL;-!YUcb#`r!%|8_33q~d{8nO-a z*E2Wv)_lmR(-IewzcXOdH#zL;w`xWVmXHn9V{`n8%G+2AB)sgam=ag*^U&|pKU07z zhvYksmsoPh-;y!B1}Rq<-V@8MVjn@qg~2k4r?K34JvP&DhkCfwsH{6B^;ViTY5qbK z*Gj;Qg4~xiLXPd!5Lq(8v&mX z>v8)sL&mPo+jFTAlc*;8v7yntL)QDB%qUliX-8BC`CZ6OJNIWY%8sbl!Flw)d9B4& z)q*X6&-abIx3G>~5rk5L@ZpiDH!Ri9hFqW2dq3!WWMjHfj)Wb+7m2S1@A{w->kwbl z1GB5*fb%G|q_GV-d3$4wTvY`n7WhF7h;C8xr^f$;b#FJs4(A6*JhK1d;?M*(S`dI^ zGOEh%#g$tajw18-w<@A8#5EylbAwlNOdPY7FKjTf6bFCxdh&8)$_V1~SN%(;ZI}SyWGcEgy!iJ)Tgpl}H_O)bq?1W|89 zvCW%9L-eD{zkEW&WzAUC$NqzC&oQP&U$0>D)CuV()hTn7AHkTh^2C8QgoPuA67D5p z*cbi+Bb*J%%v=4U9z|7;$E{zs#ojlXUB< zPp&-0su;_hR(&+f5cIn@3t5};Y zmm!c@uMcQp8yX$yHs{Pee2z7oQ7wwC@#QOFPmYg&J|EOEXJTen(uSYrWYlqUOl63V z**#FgKUYjjG1RiQu7tZArfQk#=;$=1v<)aYE5l!o3_rOA9+6Abet4Uw_ES#FDd3M- zX&&=Ox0#3Lu{oD+y%I=eV7YpUB{%0_e>M>-U)YuCibjZVKZ^gXUU&vC_Ss|eutB$f zfM$^$B!7d^ruTT!%SK#IFH;93qc-WjY}1l7-0^LH-CXvt{DzlE<$ATpo!RoXYpkoE zF-=Qnkq;z!z~IX!mX_Y^jT}-f_lg`&pIUQ2H@W_%9%6L-iOCkJbMX?n?_^F;aC#bKNC@SJNk)^laRV#HJNrW_|e9B1F_D{s|$X3l8#PRWlzb+_XRNh^XB!;^qGR zEs0<;j}=bKnsMIBnp)aQ>G}O@D`c?i3;!@8*78alLWF^kWwdh56g@)JKlB})O*zOQE!E#|@2T_Tkw#}jsH zRQ!%~Sk9aIpTu+JrBNZjx2BW%T)8dV+~UJhv>+-Ue#*V+jdnnt^6B8n{F^UzFJRkEpEZc(p(*yT*Z&>h-tzAA??MVvF-2a!PA8!$+nK+B2)mow zSutK(eFs;GwOAZDjPg{a6>S|?FswU)N}u3lJiZ=p)aynUTk|R5UEnUr=ih``+g;+B zC4W2Z+h~&6WgG4SW~g+=NReK0p2IfT=Vp;{n~AwlRaLjP!I}1wF?q_nIw8TdXI0Mp zzWKDwuggYv71YySr0BZBEd}}&ekoxdnq&05)f*2HYw=6k5C15@Lsr()Q>zN|&lFvB zGK^7`zQ^an=J2`p(>2|H(|u(Mp`!4Ona%1X*|YT~Q-8pAsf$baqOGvJWq$=bH|EgU zzQB3BB19!!HykQ#ZN@!NcFHnnHkVef}FAmB}1E_yk15zvn-zH5KFM_3cJ*8nqY{MNJJ|8uNG zNvw(!>#sN(okN_@NHoxk63ozYErHxAj?Py#GPb$pO)&L;ZieZ{UmI_^1xCUrN}~(y zC|Ujee!pY16>;WIeywjzP!wr_(SJgLoA;5Dl9T)2zki&Om8GMvuTN|qnC&V8SiUeW z#-`lTI8_b*wckzeT>sR51cVa;{!dF2nU^TwR}|J6a3G{q8M#$K|9n`8=Eh~`W zQRyEaoGK_+U0)+4nAP6T6|dPt5bIv(|D)i+R>)iXA?KBWKGQXJ3sZ%MgEtd2{}GK4 z(&T}3wWZ?c|FR!^F=C{+!jf0(*VDeK-Q{ssXJFS^qc6BFMbL?riv~Z$Td3&!de#U` z429z{q7cTJv8X_9LJ%Y`Xb{ii3cCdHUvG_#c5Sz`l_uqqh-%X?b*>s6f^>FVlg*QuzhGuXLcb6%D6 zeIZ?}*E5aLBe&4mcoYc?Skv!;SytG!Rx)cDrCfD|6%W^lB@QpbxvQf#7GMqX}Ie2a^lxMEltNzzZG1jw0>Q^P}Vbn--KeNFndQ?|IYD@D+Ao_ zaMuQ8ikQbSssQmvLE+I}j+tq<+2xfll4%Y=XWx;b12wE(FX=h(ra)5l_mI4Qx3zPM z(H^7-zS2**T7wE9FI)s}7DRY-jltg>Mm>@grYUBL=dKBZnw!?-d5(?MRQj5(3Z_|l ziR4c3SrcZUh49sJXKLKUQ$8@*}1dJSu zzWs07B;B85b}l{aS)ylt`p*`hyl?@g)rxsJ(vsOl#`xNKU2weZ!XPu)Be1lw-O5w2 ztUKD~+5R?K$r!Wk2cJQ)JExRx3xTj^6N(x&cs$8iRW)@l3Xq%*^-j&QkU_FIEj61Tc zmh*t>8C*^8b1{)gOa9{A4lz^ffe;uOyl6rFRo$W@z!! z!!0f!wt1_gVA+w__bongf7Qi=U5P#0QL#KvP?UnvA)t&$>v5GgG|hi5WJ#f9yQYQb z87>9;0ujiYl$62pkJ~%)sULD$hW&a?gNmEY%T4fQ{H zI&{gjo%v6{_79rUWlOILHfkhUrfS{dibOg>>UsWUY(~9ufT5j-2-W`F7OP~H*-0wl zmUNPI9xe~v-Cyo|=Mbp~^K1y;T=9YbQ0ESXZ@&r<=R!gFob~E1{>!QVjTo1Jr8!4F zc%UWiJ=0Q^sZtK~q#Q-sEQts&#XtYKm4ZDbp1<~c>o4P`Mo&Y_@MEr1OF?ecx(?8{ znhyTSnlf%mO#j7K0D3*8b>yJuRbF`B+Q5^xi8lX=<5{-+x4uI%*M`L|xUOi7rg+~c zhygNt3K%rsnAo)Zs|W{i%dLLn`>0BDMcGx4wa*-Bd*b=H5w2-7WxQy5l{P-J9=AH) z#{BW$odwk;N`C|_`O6-e+zfU$tE#RhxGG+xKSosJbCF265If;MWA&eYaX{vFUKYnb z7bohONju!1D|HN|xDkP5Yh(#DNK3jDQ%HU_S+Tt4G%d> z>$86)GG~X(j$)(Kty3i}m<$aFCg)x|HuVXA3vp}OYR&}q?*{ zV%A)%4u{|HZXT=6!~cDVB<=din=~{u;^N|OvUBPyDsDp>5B~}DEUfyrasPPS2~WMk zv*iI7Zb-4MXQPJgPmfvI3)L%hsP%`d<9OY}08^gjJ&I5NhFgK*jKL#ofotYpW_{Xx zZq?Rp#Qa;d0yc^YsjQ@U!s147#k`l>Q~l-YDgHvKspuf#&X1GK{_#7y(%H|^68K2K zdHv(L0XY8;wwIIdxRv_cJt{kJy3qsG^Xk9fTgX-K?M~VDs)NA73WxTZhogTjQ7YTt zAb23bc5cMZQi!Qk-Ne+P=9-dd%>ONLV8o)(Q-Q&8+f~k`rIGlGgMX?@L&~efjOX zvaylb-L0XhpD1?P2~G{I|7q<--3y|y&C6}|I+5EhCWaLHq069gOJ?1B=8Yk(Om0DL>`E4iRzl~t2VSYSYM9AFL(h@v7Dg~G}g_q%UB#x%MA z0)bBYaAl=xMi#sjjgl9D?f1XC9w9Dv^I|A|psQ(Z&ed-zZU8fiz0kXc!uMLE{MQCT zPXcAy_mQV)X}PbT*L8I*ZF&3F;7kn2i5t+u_guffCGIa7I^#&RT)1zjlzwlD5@#Go z(u#i-!*X7&66w2g!axzS@?}`Cd4HJ0T6#9(eeJcoKmp{Hj#R1z;JyQ{mH@qW4Gb_f zkW;Q4c@*~v@Xom4H#h_;v6oEoRYMSs%n-o}BGF(Le&2*wJF0E_7k4!`I46bwcP#Ha9^piJThVrfZ2GMCZ@mZli!B-OhZ_p)MB_b zE%I9C&i=vkPArE;0C?Guvi8o0;ovhiO@dXeZGve)-f<;!2SOOIRC3_tKsK4p@AWur zWnDae4D`6vv@3cnpmAix#N*+^%Sbe&@Yv&Vb-okteGAW(ZcE-CCoe&eN_EwbHz^)^ zzqG;hIi80#7NGC@;ca%ljh6ESR@nY0;$xrlwIXefjw0CbQbUoB_n%s*JK~k9r6w9y zpOuSl@kk#V8_RJAIe9a>!eW*~J#SW;*?jz|)HV0jh*ug)Lmn9FMIP$;+1v;SP1m)g=~^vipQF_>2Sxnxqf}8f zAM|3CWo&x7((`6Ern zejyY2?xQGd)`EwuWF`_7?0pv!wLBErJ3oP2i}{Yx4{dI?;OtQ37?MogfO1QxB9|& zXpJKb%#TGZY?S&f8Ps{_nB()1Gny!3!^Kb@e?^wTQezFpw z5*=N2Oj6|{hzvlO29fS!7TK-1l5l*Fm^}$kH55z7tNC8%YpbH4^(0;FwKXZZrW&>1 zZ$cU*KHeYd=nqhS%?9BgUjl@%ez(@5%!0}E$;-*LOaSlMbDjVN0ZQ#ObnsTNlzeOs zH||TCj4N)}{7SWU_@A*=Cr_KHo_S+|s9^oO=jY9euBpNRYgDLDE&B=xp>XE=cUNa; z)x7-tQXp@7kwAKP^8I{B*6{|OP$1U*K>cdM=3mA|A@1;iQxofO(EH80Z@Y$hspg>M z+iHAln{rsCDiep;<>vMaB?8AK9^03im`06N)z?oB3|0$pg7{7V615m_F3Pwba+`}| zMp?j7jvVV8n>vQrFjIrjlWxo-sAIyj)Rto|S`Rk_`n+WrNAJ zO;I}*r`hHC!Z^<6^Wqg!6%je#k6jzvfju9njZ4({=GnJv80l(7`mSmQ=&5Sbh@B> z$WH}KoC5PU^_0mMD)Ka04=eMANuf3#t6tH4em*qvPr|$X@08CA{yX};j&o8x!LuBx zFlDghi(%+Aix1o3K4ILoDM}N*|C{zW?yYW)?FA5s{yWg@jW`jlGN=4pV+2IchI00& z3#{~no$~N93!7eBTsJpdI({GJ454K_&vZOu8P5;@nVeB~YHtamzJNaD70`s~_4+Gn zfWIU-*u4omKKCfr5kZXCfv;4FE9!dKq%YHx7Qti(C;(>p*Oz z_440q;kki(la)A@;jT9Kdo*9%&ket)oCF{M@T6F-q2k1g0*dND&QYHBmwz&*1B>VC z=}5{IVT;iaG(sg-oJUvpz{FuVK>ob}Aee?_BR*ZJJd21}vr?l5F(J-rfsPY_kyc?<|Tu$M=#SpLK>I#Q`jvV{Se)VcrUY?FE z@^00muh^aVo@S+ppvcLB3DZxJu&!iY_3mO1*)2Pp_Pz2#R*}fm-fNCLexV5w6oGCa zmkUkl`;wFMeEA_^aik7Fq<>ikYT~f2z%#mHM)YORdLStvVi`X+IVr|tsgNl3JEC0c-+mfT{!%TPE-~>q9Y@{{x_8f>sNP!`H5ab| z7MGUh3=A9xXlUbm@{FsQP7U)DqQ4>{q8aEiUZC3c6oVs2Q_}$MfUV0zmiRqJVJM{5P+0RSA2mGG~uTwGP%5*gK*QUJNs{;$WEzsu+zp|ua?ik-o79L zGLOn>xyzZT{XTx(*06o}>5_Nq>zGn|8QOk_9hZ~>q&2Og!0vQWdAw`o3RvDJjmEH| z0LbXXQvK1LopRtA=a;mEndj87?_7cK{`$tfjikq)!63(t}Y>aand3)Esn;Q)a76&*Rpt4!WcM?ROVZ(5E6UH$<`XO@9 zOto+~!f)d4a?+KM1>zb_Xx}d81T~&tOR_Pq0Rr5|pA8ir&Php06300~kB%#G9zUr; zI2QUNH79%%ae8G{rYxosq5R!pDgA=OChXNIVrGhEZ&!x^Ca*TKvi65^AaE~khIp;8 zu#WRjW@ctSW81Lw-k^q=-4Z&+m!DFA!~l7l$B|@UhJ|=O>a*vleQ8^NX4(9sL&q11 z>Dx0)jgWW=AQIW=mM1Bfk;0Wce0(NGzpN{8p9M`EFS=-SgLtqM6HfYW+?AI@A3jEZ zzk^bvCom8q0f6V*SC~#!YK-t27LJhx=y0JM-@|A@6I61@U-{5n)kjfMM0wv;6^lGn zNZFrjJ=Udq^~ptGuvq|W23}=M=?M(H2y&7G*b6tnPSYs6_w|$_Cv)uJ!B$G-B%ldc zLOWt}c66@k!0;ec8wxCS&_F6}b8|E6@Gw$QU+<)uqR3yI3r8&djur%y$X zeEs@5niE(Na3?7uKxNKSMsvcqhsCHc0GDroSDVn4hGRt0)>UDrDlrb#&zLPNCXBCrplA^;HF2!KEP0HUJe)__b<%I?45oJ>`0ahaER~4RGEH$ zNmJ8^0zJq|Kq>V&^R)z*g{=CdzU*YR1$V=1_!-Bs)igl=ddtxqUn8hrY>;fU(vy>y z2hQ(SJsmT_OQxNAoC&Z&Le!Lr7b#vxrsa26MAZsDCGJD-8l1tE0SCdIDuL6=XV0G1 z3Jz}DWHB@Q%yk{)#001j7eJx#N8wAwJg`#0*;9&TWw@Vh?j{@1OaNpg<0_u!Vu7`+ z+Lq?#z|ri-v4GH+Osb#H`9`TJ5ypK_pS#)>eYo0h0$=`8!vz4y2D>V`3cyG`Jv~b* zE8~HX-arq0$5n36MfGsA(3ey#n%D#wOr~8eow%Y=yp0OcOOF{C2>bAqS!m8fz3HnF#_rQuA8W3oNwR10h`l+#F$=m7P)5-$a*5(-FX!Lg+t#8aCNVR9^#Fo}d4Is|BcmMwRox z#8z?6{>Eev<{nuSLjz+=#VTH!ToMU4jt^h$a`oHN4JigB-C*LGOG-{Y&qjYTTE)Z5 zi!nPv`Q5R{_7~|th$Sm9XDd?%pYLO?bT1TQH8H!`v=Moi?XxCIdqfk`Kya2$Wwy8* zFb~1Rl#~QCc5+;0l@;V98lyV(hh2_2`yu37Y6nIBjpZfNF~&wgAS`u=68eO(g6UKi zV}L?UuRxD8P0UpyA|gUozNROwV|hF=~YHeg|4No{Hp$Mb>S57Wgq zaGp&p!%RWaak)|XFH`1a1z>V_M2^^%`7|!*26M_jKG|Ux3YvZay}o;qzUT+rzK>ZQWwzqTO*f;j4&+zeSO#2)U5JJaF@o;~N9U~QF=f{Q z;06{t<)c7yS{g>%+S0PT;;hF+^Z26*U(8ct0-6_d2xDbZNgDssgo*W`>JjFLK2y^U zYG487ko&SDGp|D zwh~(a`XdSZV+b&S_n!SyX#n_q2{67}r0_)+(A^>~M%5312`cTvjY;{UNqwR){Z%kP z8sFrAhE;+?z+ zG9mK|Zc9*?8u6z;dpZM)t9Ap3c^)1fz$s~4(p4&{0>`AExCczlP&q~nL=%%tz8Ojx zw8#$MxSWYY0U&8#Vuk7i8(7V5u4sJ@p_l_zv+P{=_NyQ-g&5U_(!xw@u#w7r4!33K#M@#4J$GVSVAW1ZN?M5!hz2?R{>8%n7N1_u;5Yh2j5J}GczYRo zPK1P)*~Zv;#FRj(O9Q<90Ls~^Jx2yZ-^QEoda8CYXL$ugRV=-pye%WNUWty+99BarSPmGK!X}s$8b2$ zDl17=rtAS%_LHDHmtdVsBlw2vcdlpo1N|K2U`XbV#^|P<4-Ek5y*&M!)V3ze6}EKx z(p5CdPkLYwz*{w^%-I;v$Cb9VSpp#`Advc4kvrFl zzFt>N;Yfcesbzb+WAvT1$b&)lu=UT000kJ@J-ismJqS#0Mjd88^&dyl{8qgzg)gsx2kA>YDt=U~Z z!w!UiVY;w?oKG;A?Gy6LwFxZmmz?waeQ z>$%V{&b|_{KmkXA(!9fSs~i3OIzUh1c94>M2C3WG7bsrn1O~=JT_TYyT7BgR&l(}( zi{@Fs@oI(8*v9daXF-nWyTl>GB%jw0ETf}un%JyfuAfBwoZ8)uL5K(nj!y=)jETY7 z|4X`eRE+>?o_av9Q%g%rj^;+Jm`8ysOZt~D>Lw2Cw^OJ=qDWRqj!~S3bKG>f-QS@1 z2^%eDH|uYq@!TSy!UCyUW3&0Ub#-AAw$vA(dLbw>u60Lw}S(KI_|(7jM*!GF5**g3bFpsJNUxP$>Ef=>6~R z<|(PGn^6$&h(N!3$x(P)^+xN*Day{gIMm_OjG0_z5gR>0=e8AO?Mfd4p>lA11rHs% zNtBS2E34nw=R!=0!3+PdT@y^#<7wWhH58R_v|zrN((@_Dj+07~9(6SFFrx0;)ZYl4 zlNcA&I;kvgrv9?szBx}INR>TIbxtHKLMUu}N)`1q2X+u-Cy$Ib8Nm#b*~=n z?d_j8f^;7~tUeFulNDXj*Z-P6zO@cJc~Z2)xb8cbXB4Alo54^ne0H%-7@9gdEO-=Mb%n3$xS+c~A5-&>mB{+~uk4(d|OdzRV ziZIeT)n<&wsi4M8!b3{G1`f9d%xAC}KrC!U7lrWSSk{5HNejjSYITGSNC@&guTsN; zg;Ayo{kRGcAO6I%2{I?SFW@hzm2^!$@O!t0>USZ-_5w?R^b3?_C@g_SqwCuulwbf^ z0D}60@7>X!65N!To3_tK)$$;l|_M;IPVxD;wD@t^my8 z%LZuhaWZv?JOF2tPo2APVJ!lQ7X9+2Q=Jx6bI!rH;X!JM20O*u1mYO^#5TyGF!hBA z5Ilg*Hm^$!aCa|1|8y(>fXWlBMHiVj-bbms0F4}xkW{rXj6;Uz(Uu6h2rG^-3j0L zSur0u2W2NM%j*yT+8o5pobV+8{d#_WzK^NfP9z&~3gpyviT=L9w-5~_GDo*#tHalY zk4L z?n~z-_~C_xh1*jlfjs_wa!BCEvyEyRhs3qtxUY@x<}DRthP_G$fur(HVq>e$0(2&V z2~dS?=M$hQAPp*mio8lgMS@k6DI%3SXrPi3{>KkVeOS}`bpV!p`)1r_Z)wR3Lprgi<#WbMp4amw)ojPUZzz#BcL>GITU-D(v=DIq3 z6?oKAvu?!E-bnzm)& z0u;(f_Le$6hC4r`7?9lSvKIiX(zdkB23qe{HAW(ptDBC4q=83>(2~1ToW;dcQ~jb+sTH(W zgm9|E0hI!cfAtS(yw3A|p6}y$9k18xI4;YWTUd>s%#lyH-wUC6Vyb+Ao+jxNkh z(k+-nN>e&=hj9A~mK+7^4U1w&o_|gPtVxwf>;uf*eMx$V zaN#{YT0>H>9%RJA4Pa}Wo)dE4U-={L)nPaoRbm#g8hmqkG(-tWxDjW4aZU79-;EA? zdFj4mvR&pvQ8WKAo=lFFjjdBT_1^1e+m9BeL{IMEGwH6jo~*wiQw>`g>$Cci;m?|u zmZqkqm0n7O)hNM3^1Yr>)ob&KvYqgY2+(`5{DpvkfGNBw+X?ty*oh|hC8-9rY5iNE zIB?&{iE>*OfaQ=m9Nd@&&LwUEBe)~r-2~zVGY;M7E0~XDdHc;eottw*QS0Wd(`{+! zie;I@LUVJupumCXPDgeY!n#jA9r?$auRkN->#wJA-)PnQ9Zfg^o=YjJ)cb%Kj1#nd zf+8Z*&dh&<@w70E#uNB z+d{KO<%aREMtOfE@rA{nAgK{qpyU zcE1n}j2pk(Fjvrc**v(Ni9@!^nZaWq$6?5VWTIesVRj|F%G>asqT!$n;Zk2Rgg$(! z&ov~c>njPj0Nm6_9m;z4@9NNC(z~OwV_P&pynEs^Pg#qJEmYjE@?Ky@u^eML{pgaU z--8FsF1K!#@$>Uf8N*|zlg*MMvSPBQn|-RL0MO5ttvg#Eej`J7>X!}RytW+HX(wh6 zPknn2V8F8)@UMoCI9_+5{cWnYF2h^sm91yE^(4l~ez#@|^<$>g=WSJsYu)c?sHr=u zVmRWKSFMhZjvn5OE+1OSnSnuq>mT2auW!pbe}EB)IS>5&3ReZVyl>-o?HHdp>4C%( zaF~#{L@&816jrzT+}hKw;< zxj%75c^z0i@+h^6i?!ez)xR9z$p2FT6Tf&vH;uEm&PFzSTRksY&NAKh$s$ZIp7;RV zKqmpuXGS!x&>Z^@6{l6Jzt|&k+C=44IJIm1J7s*HXyf@3rI~u3Q(wCVNJyL&y-k0w z@$E;J4q#RsdSzSdM-Ahvtuj9;Xo_@qYsSgW$&$?DGPUo%cGvrDkwIuc=$w0;>h=4H z&>txM_TO}c9Fs|&lcW#e42z44`!`O&GWu-Wjnwy|^_g;OUT8j?oTG-AN?Qu>;GG1hK8t z4L|p>>Ppav#U2U0lIC1zLrohV?=F*YVxym5);37tL(h+Q%ZZQ^$8^R55y<- zTud@)?dc2Um2cJEFUOXNMOJm4ZL&&n-wUZ;Y+Q?txn)c(HekykTa8b60I{`C5$1UT zOFum|MV8oc#=^n^h5URPIa0xp&k&(x0*ZVMbGi1 zcYDCAw%==>ll95E0*x-b>rmDsCl!@o4Q*{jN5{oi?D!o3vZNgH@BEs95^?TJ)OQ(Z zmV90fRcxN3qM|e7^p777z$0RFcW_oaZ_NqGq`mXyc9^{mfomD+6BPa2Jv`bO`-S4` zU(N5rYV6#chv`H%I>R>a8}><}=CB?^hhV!6J7T&roUT>9I>ozd8fa8Mre+9~}n93w#+tm7gtWP{PWO1npy6xV=?r z2n-vR&nKBg@@Xrv!Qy=E`|P02X3JQmop8vcyL81LUL85OC#|p$O?C8nBf+Ba=}U9K zp#5d)iXWPW3m&Xp5EHW5L0UybIBhu0_)Gz8@Q#d_;w2PPr;i9xDph|ykA}wibPE8n z;;|RcY6?-gv$Y)^@Ddyl46wbuldozxy$@+ZR5ErxePn-SR^N6wl~YzGL4cg)#(l}% zHckwTvWfg~$n{fM0K{r$Sh=_&CFtlNd#I^N9+HBTzC$u;$I7VD$vZeNivoRzWA<;+ z>Hr^-CBhQc{IGQ#)m#|W!6Y2f##bJ*nZ7F>uIP?n?3sAeGMP?iXJ@6OJ2B&X4CHLM zL+6i#LH={!AmKKHp>3Fsu;$^^yL6h=V~cC4#l@OJ;c)p>N0V@O+~=lKtGpZ8U;TJv z8HEs~&LbSlX|M?aqRY)q2e9Hp5`@dd0%iGrRB)vzOtond7G+$KIn2ErdOD5rcG6SI z@5GS*W%rc%IVOO$ub8AAdKL+gpq20Zd5-z{c@S>XSBLDj`gQ?MfmMEHbpE;$wJ4wr z8X=1zSUqciSWZq(Q@G560#y}&)7V=n77d>S5o;yticd;gv5-n5(*cSJJz3e@#wULGVyp8{-uDWOpZA0#Ep$$Y6EGokF+`#bcqBj@Z&b!mXd#Lb|clMK{wsVF?iqJr9OKdJ9dc0F8Hd`tIO8XSqb4g@2X^mGtGaV27oW&o;K|N>_GAV9`et zop0Myajh?Ch@UZ|I>p8K_;_A^{?mYZkqY#P!3(6d{941^&F~x&;d#J=%|; zl`HPbl^As4HDG7ooqD_OsW^D(}Jd_7KA!d&K0R*vW_Llt5!oPkj*)k<-u^gEs}Y7kcP?x@B23B$6B0 zioRU^d>FWq*ep_FU&+paf$R)_VBYf!(XMlZGhsN=s-#66v6cxJ^(+K$c#&<_HDIhk zsX-yE<|AxZvg%(_YDODk(rVhxwR?aAmnRs^fO|n?WgY0QC{iv2oYv{HA_T}m6J0K#l3uiBYwjF z-SX*Z$&zXpRoQOl(Kc=7L6dNkiv7|T85MfkO^C7iOx+UeW6fc_0FhTUztOEDrdE_4 zHLx~0{`wj>R>PyYmXdzIeXxTk=y8Ij%aHJyF`pUMB8!3KEMLC|vP@8Z?jFy4AeD1D znVdq|!bQj%f9Xhn-0;uH@K^b_HebAQ65#~Tq=LfSrloe}MQz2ck5?E%$)mFSdc>I3 zSk0b(yfw4)Z)b_wDi5A!*2;;&rkGnbt{OF(7jLS&@^3@~kgx@G&g`BJOkH|>YjTVIvIR%R;1U)3o0K-=k|G} z*x2&d(rjCfvf6P$H!J*h*QV5~-RAt`+=;*Z>-I?e6O}=+^x&b2Y&E02(tos-+HNuo z_X@Gxk+5Fz9VD1aRn30&uSq}M?#~3mv|urMZf2%a?4HB7^b=w1;2Hhv)W{l6f4a1i z!>xby^%XkHW^cBhPozQ~Vl>Oc1hjm85yc9u^9GX!d8?hPDx3186G{q)%h`}yd9Q>A~LHo>Y zfewT9y+mK)_F;-uI@f=6g_O!NH~<9&HE>nnOJY^tSnp8#qcTs%a`%iBrM2ritVQ zyaIiCFqFeipd=X?CEA^yU)^wKw9f0j4jLhLg9}>u;vOpdN_umnZ&9QOlwqO?RuP0M z9`m^aNAPP1z-1N&0CDA0uaKsO0!XQ*GtSZRMR<6Az!|7<#o-XMv^-ie_`IxT zEa4k8sZ$X)!i-%`lL$u1o$O_Lejf76D~)}~fGm}?zU~gj7d4H#SvW|IJ~Se5h4OE` znRc0G2fT5hI2kb=0Ea8@fvvs`Ds+ScLfU;F1WcO&w$T6+PyM)qyrSac4?R5!H+k}6 z;4P0|S1`v>p~s|~sAYr*Ym(3Cg3S+oE%?k#W8k1LA11VP;i@dBi!QW! z55!ghlu(Vq%wxkW?cO#_?v8GNugp{cxp1daxM#f=S=p}|+q;(Q8lg1GMI>|L-V+oK#b4h=IuB-}2 zc@7L8m10JQB)!5U5k~bMV0r+^vhyH4SAGEdFV?dsDI{>*LQ+kM8CX{dHGKHe($bV- zE=oUG&oKaG;10rU$bqv9%l60PO_3D}@+fRfIC5(q`4QMT5_iBW1F*#%e+zhdXbPY~ z+e?QQdYmJTl)rrm-GPyxD}EA9@GPLxj&N$nllM?PuFFtMQsYhoMf%NA;G&&zumvA8 zgGoQX4Y`w*zvs3!_yyGKq!J!YoDR_VOUujC5^|b*%k;Srtc2{1Yie+akQ6@zEBWjO zm?LIoIvJ~L%VPLv6H&Z2?=}8dYhldUExw(-bIIu#?6+(M8cJMUV7=PI3 z+p-D%XUs#L{W0qu2?_1Jn&3=F<_{?3LIRndSh8p+K&6+T0k-6!p`lKYMOMXV4s9X` zMZ62%lGfGW%o78*)DIk6&fn++q@O1xy#|L9aKEtC&8!`z-L<~%#`+#zqsw{o3~*F^x8oKY2bUl;KR+rUJ%_LWNCMgS;qdu40hatHRTcwO*|6o|QUiah zH!oKrae$b0^s?AkKwDuU?`&-?Eh(AtgJJ^fJBOHn|Dj16>A05{Y6Ca0wb?(A)mj`E zsLmd)bTUFx+8l1I064V($x|Ezl0sfnGvXG69E@uJKg-aUR{HR7I)@pL*pzs)4(lJQ zd$I%tA+!giYLj2TcI`;%fGvv5H8N5x{10$ZS;W^u{!MK@=g zM5>5XQfFu9mA84n)5yGogX~iW4 zg;!`3Y01*z*v7{-l6SZuTXFG^zTsL1?YSz|5<$Q5ri6hp@4ryWJN?Kb#O*8DDrubn z*YiqOw<=WJAMpJsaQ!*{=D6~*6Rwh`M~|N}Y+>Qyhn|n6YnkhtD@#lAW@ho6-d7!` zztKJ^4PU`zB&LbMismtfjzKB^uRFpZ#Mf)XE-pHJ4mJ{)MZiFz8) zf9>Nu+mkO}zBISm7`>o5Zc#GoQhtLrr);dv!r5PHEEV`9YIDgx)#1p65)ri`OHa}Mc{^%GG zKM_*m9yMHYK=Z5UKX|xr$4|{teOSz%*Z(R?p!f8@{_(-PoJ9S~KsUjhIXOrvZk6i6 z?0=)`Ki#9gXPJr4B&#*0Gu(7!Y?TGA*7zj!F^s{Uhejd>(2l!y0BXT}aPXD|JYCZ_ zCWz%6?WpO1AIffn_)01OCh+qgWW@a+tjuTHJ!(3rK)-qOft&}@dU#aS>|x9W zv0T7dsVrhbmxck}fr1;_)2vy%1Ya4}_Mq-{`j-X|ch;MWel*xwK)hMCbj9V-x~7Jl z0#IaO9FS_1E$a5&dkDeEQ8n8i$QyY&vUlnPeFX&tofGg5LI4ehYauPutk24xuXu_u z!QA7f*Xf-nM}d5y$9%x7C4cTzk9vK0Ru&ueK?sjnD+E{d=J1=!0MvXCy;cbdK^;E= zQ96Kc($y@}LIy1KcVbWFbC`QjYt(Ud<y zvorH-CY0Bg6VBsTHOVghri0f}Hjazc_X?2Rhn+I{SAji)UNkN~EZnEjqU1Zsbqa5il7+KN2M+Wi&bl zrbmaClKB!OWO6|YVi2nY5PwC;`^KBl+pLkR)>O~93)_7Lyy*7!=Qi{c4wnva{^1$0 z8u5pxY}i8i748qVoyC0ahAKc0U;&Qhk|y#a*Z|^9Kf;=#(KKqgbpj*53sNwMB)g@u zKxx8pNw*Q9>j*?&BO(R6f}B{B8fivU@4DigI z&>E6vcbXdbCZNJF+&;hrY|1ItSbh#X%o+Y9q@*+8xn-}fKM#Bl_ALK9X;1nS*3noX ztDtvm4^mix2*@SQ!s7|30`q+UPpw)*z%JIL_xxDj8-QQAq#%mGz0$QzKr9``0&X?I zh|5&8{QhgNh_G2M*zkz9*3<;ZdjOHCSUx@MPmP8-l$%P zvc1@KZSb;37H?*Dwj!8Ym6(@#5JL&Cf2Phgh{2NqD0$9VoU8*9x1p+%GKFKs5CO>< zn}L5Cqf-c&e)rOu}{W7}a??I`R=t>C?XZk}lFG zc466e=H{26NuHoEg-1jn(gv&9k5;a99pE|qdPVnnkOro459tsRyA17b*o^=s<`ivm z^}y6k@9M9Jo<(LA%;yDMXZZRj&>b$bMLY>RqXO82?O@in#ts1%dhgYxCtekpP6QLx z_yeKoHSbWCT2*Bj8ykzv(?D1P=*OG4ytEWB$8^NI`B=0ejDg~fC}ZrtWJyHa`H3%w zcla==)8n88;vfmh^bFk}goTAjjhzjCcdxWLh+61mRE0{k?4-*tJLiFcF}dJd#=D{G z^ka0!{b@5SUnh$;PS>;w6oEXpo}QkD8B;iX&8hzlksOZo4_vs>HdrFTVWPjG5O94v zgfaoUCesPkVAy6qI>WY0fNI{2Hl99B(s``yE5BXj0m>JRG@_wmf^qq$*4~5)zvWcP z%?KH&-%E)_2VXl)4K8RGz*I=eY#e=$3QBB3=AWPu<*)lR=UG-)Z6evZ?9T@4JhmblG3q^A%=HNrgkw(!FE{ZoB6agtFZEWOPa z`#p5V{D*{Ew5XZexsN=G)l=`B1_~^orLk%(LPK{fZ{!LGH?CXZ*7=0xM0GF;0)?n} ztIW&8IWtKNoNf^j<0T1_ydol8;)$|ABZ+F`lp9q+Vy`M;ME)-mrJ&&8tBF!)oFZ_V zn#G*#_kaf>j1T&&1s$#*%7i{G{xb zDf9A;%;QbLp6}y^j;*zpp0-fm{2-NT7%*rTRaWZ#VQ+y3TfEojo1nX~(gY89Q_BBg zvM5OL;XIcOa+hg696?lT{A$K~?6LWXj!a>PFO!-sW&V7e6PzoWtn_zMR%bEm{I&`m zUNF~*0L3(Fjz%(i*oAG>@4KB&>R-xMZYAh1XsMKi9kr& zDm+2#d-~Yp=}jmTsgWp%>44-}#&5`0QDPrC>Ka4z4$O$4FDOj-dvO`lnEFBOwf{alb?=NUjn~b;a&D)Qe9q=12l8GPJg~ z4h0Z^ggX$UAb{Xne(I-|-MWw2+yE*BEVN)yugr7b3+|996r8YZ2+9+kC!t^#d=?sd zSWHuCw-iWHgye)>2dv=78-r&xt8b&(nNI|R6bw#+a-JF|mo4$IyB6R!5bqTQ-Dd_mn&=f*Fo+b&kk04~8a3#w4CCw8^=K~WN0jtdVx{eBl zkH}^|)YvC+r`4Pn%1CagCc~1*Oi_^L}Sci^1enCMsq=aI92fw|uWr^VxD*SnH_cJp5*2QZdqq9ia{;(cc z{!xa#R0`?sfgGi+gBTV^MN1n6*}oFlBIxG#Z>b+@I3Z3Oau_3%D|@W*VK6Ha+;vYS_S0cL!$|b1SjqD&i2p zk_J};6DEw)klWtLNsi-K7_wdB@ks>22LrjwkN4!_PAd3a1`ha%t)5=ECgQtyU(*rN zKBfe$msHplvfFKY^ABc)B8#7MLMp!LM)60;6ov$yXxL9V6w`)(A6>|`p{wm zE#7l-xVQp<)zZF_AQht?e6W#i4Az99XP?0LAqrM5qpMtmB}Qo`;UYm?0tYN$C37tz z+(GPI*v!dP_q}+E1}g>>(~OJ1Ti5zcvFKSuvo z6FqhHz^lOWxm!At6AR-CaY-TB(K`EX_#jo_1Zt%|BFaNN&_&;1s>a_{ySVUJ&g?@r@sbJ)vcs(v3r|b*Ax?!VK z1_CdG+%h8dp*q14I@e{F_lw-Hd7)o=JMO7y>(<JV#1=~Ho;0l7hoWV%J|dvEB99YuZn?BT zPA?N+ihtV#raI{Ub824WZfh#eOT5B4ww(KWul({3n#W9qv^Mht2enD&-@jj&JD?In z-tXr18@L#6rS1b|2KA;qUOBOJ0}(@w=FDM{kT2&(=TO>f7$g3tQ~7{=RFIxvdLIl6 zzh(2yB$VA{ghh|4*1^2P)}h)a!k~hiqTABS<#FZE;0Gfm@!yG3KQ2@|G^0*t7+)dK z(Wl5y*_8y{;Kc+BRHW<;QMDBhPm2Nc*zbzp6a&2aLQlXf!{4Bt~BzTK;@% zHv{Ak{NM3D1Z214viZpRn}0$iQ>xt(q96DR=1K}|HfNx&zv@M{WTf#>$^7Btz}^*hgu3HT0O27uNt9gm)Aowe1Nh72Souj9hx8vqVE_CBp4Gmi5s)mr=HJh46z((@%(`Q0ZH+G~E^glk zj0eOZL^8KE+z!BuVVDZ%%z5s|wP{2X$V{L0|0qvWxC+-4hk}6Yrc#_3uGt**XV`C!?1}H)6%yvyA z@%l?w0LgnR%KYO3njB}y_D!LZrz+b6ePxX)WWRS~lPYTT#zV?VN~kd$WgD+z==Fj> z8yE)y3HXF{$Ya^PIzo621$TG%C8q-Dtf@q5KfL2~9jRbAucEI$%Hp#oZf-i-ds%^e z5`n4`&MpwGahuPSfT(jIQ>Ap{Q)a49kvJc?E1TGwe zAb1VC0n!9j3#YtF!H#?pROmS4IF5Ch-~f6MfB|+2ynH^RNH3tv1bzkeOb{0P6UzCQ z(#VX(iV4Q1VJ%QT2rM}E303Pr0M&?W3vo-~JXv?qml+cS=nEw#VvlS#N9r74l*~+I zGe*K9)TJnITJw>!0b!BPvEW9qRzQxB>AWLRL%>d06Xf(HjgK!~Dxf-ioRW2q9AXh% z@4A|PTNg@USeu`UqF{J7IXFd*0iDC|akA%i2=QIyPDa@eJ4*!@tZ+42a1V#pc9(Z$ zVf2U*v$RqL0wX@s=o^}urUTir7jkK8K-w@tVV(dDcO+k2y5tati5g9s-pcv<2;(JK z;x8+Lof-^u>h$RIm?C=4tY2t641QVT1X31wwCAC74%EGJJd%t4BvA*b`w!KmB2q>a zb+UY`(04N33ooM?M1=*Wyz~;t_2lf_49_?KTkw~l4-*n-lG_`(d*_$heDC4CFK%vb zg0c*inJkcS+}xw*hiYazE?U$6@2HBP^v(#pGH3xjywXt1I&dS5P=E+%$Ni=%?VD8{P!e4AjovWGY5=L$K*Cmh-pz3 z+#^YAOOUjqp|-hUSE{#4f*7G-^ilBHz>DLQ$`$%;w6zUS^en}Dl|;z)z1v>?p8WtA zl~>4lbA3omfLTHGVVs<{wssr=dt+aDnir|5sp02$fU0#yc!N+qP_-!2wrUC4v5-sBKWYT;wG-&mpzz7Tj1RD` z6MnB1tI7S}?*7f6*u|{*FkcDs6F|%!~&F^45@AwB*Y*`kJ?<1<@$(*+Pktn;SHI5iJJJ7*2|8 z20ErmI{PwL+VV3pz*H)(R~lg>(cfwV?sk-oGvhv*)U7D5r1m+=_C{ZXGc@D#OhD+9VQPCR905Y_TFc?Bz zs+HfyTih=HV}uDCvH|_U#i$&9(8D>&kch`e&xY;B=F%;dtv6qoX>yzT%>4Y z#J=0x-}dL8`a2fUEOF|is2L^2*w1@Jno_j!cH#gp<*ZhKqMY{9=+ zo*+upLHVa=5kPEz#pJ_XrP<7#XU1xR?j6}4Z(h5Ul>|68sN%ylGZIT5bI&ygJZ>s> zK*pJ+Xm%*=N9XbI=&N%;vjMYmX=QieN0>Q(&FX<<<&lFu1MAYSz+wHnyKZtZR<~IpY*H^*X38U?|Dl0hcn=pa_3d z2sos$`3~P6$h5{e*F0C0d$NS;h*GIQquvd^cr=)cyu^G ziUGd5Ds$!;_)7mo0&9d;#vAjAp#XGp4uRk5PWQ)e|5U*zeUw3&cV*-0P;wA@Lb~T3 zGV&pj2g6`&Y#|dPX>tac)-|yCK`C!2si{oC{+9%UJq0}A*A^)WE#4(M9Lx~j+*@=+ zN}o~_K7I=T*28Ht9WDwCk@3*(WN2u(O!~vUa^ec%4e1cL$s#0m08(Uu5tA)xyZU{P znmFJungmL`g>NAfjAOq_6R5k5-fncEGFP*MF?a!yh?*!{h(1`&WqpJ z^RMWo1ASi=CR-;iC#Cy?d9{-I;2tof{LyZJiUTW8O#`?=irapHo-lj{y1(F0#}^tC zlHjrkqT38rody9V`*cGYK&9GM2kl2tZ{K|eo_REMV0$^911#YyB6ko3qGq=gU^X>P zO{PE1UT{ppb1+8qHl#V#!pTX)-UH}1f(D_pi;w)VjVCcg8dO7Px7q)pltOo|`=lerr<+28k z=3$M$NCS+ZI0dl(K5Vk3^Lo2cCnx*f-Pg|`3H`~t#_aG^hAG5RvFMaV)B}|VW;5UI z^TqT4Z*d$!<3@<@fRCYDlz`Q-3i!`j2m|3wKi|+1iqP8bpM+&Yxy*+GCw33@@bWIg zvU2bsdJhZK?)i0l&4N%;A=G zDr)L(q=u%Z`=h6fKn*}26daQSOj1D^sXH#tNgFY6xf-9L3b*~J$x3RJh z2>?S&E7l@!z;3yuYMPQ+cZ#yeOt+zSjZFO+!*i(l0a`eq`C)vVFy#m2o|+7!bFsPr za^!#`jo2nqu%L@9{Qo7iI7pIm%(&zqfbq_UNfw(NEFbtV7{n7*z7e+Qr_-o) z>L1vPezOeRop%Y0e8Jr6=Y_kk#mY75LRUeSq#Ss`mktp&*12Pe4=4Wx0t$QE`?C|? z_SYtI4g_3daCBwjU26>vCBDKCe|gs06j)sMTWjEX+m3|TTg`A)MQ)S-Gr53Xn;QW3C8`ZY8<=N-YstX%lUlzh4w_Y`6K%Fd`h_kTx>$f0ssABp4a~K(#Ot!6{ zF#VhF7VP6Q&3ek#EytA~4*ry8T<(%b$l3oS$DDV1 z?s3OUw@Pn4p-?Tcpbuu(CWRO)agN;|+iMrK%-XmPr)oa9PPqc*m5j4kF8bEwG^IL=v=1@-Hk7`h~=v`@d z_{GJCeJoUVG3YT)(5JdD`8cJ;qL$F}sT!E7r>B>eT35A*Cr}P(AjH3Vqm=iHLT}Vu z_^>3~C6RHL*3b(v!Ae#p@EmLI#T#elffmcTfq-6N3 zVT1h7b4FSch-*lrgs~`p>GhC~_CR86zRorl?%M=iQP5kCxM5XQ69P93!VdhhOM{?8 z8jc|pyc-JJEsqad$2$qKC}zA2gD({FWXQPjTUT;A|xVeOW`)r17 zSd;)*;p`v&7|TB-Nv~rE{KzV)S~N4^2||VaH@w`5A`0DXa)wAu)(*jHQ>#J zi38HJ(DFc%3QYy5?H~T0LDPbXAK*CqcplNJ1ES0cCd}0GKdHE~NK9gcksmw6?N(_nMFwu|79w*_Yof z0p2}CZ%@Kcwh?{$ZU4>NJ4J3$H0XAF@WS`&md1WAheS^~$X$-K367TH5^k%HJ>lT* zFiK{^ApLW~s?3;G*YTom(ec!XV_~)H+;Tn^oEE}k(HHxA*3Xr;NmTR`k=#3JOfSRUDB{dg8XRl2h^fd+nj2DZ0xrg$l#oJ$ISRYUBJU>s%)6p*j&cTWI_> z!*bH%?Qq56OWGDD-2(OjY_S!GWsdU{?wevK_jM88;9Fdg?7Z;n`#lfwbLUt*R^H8{ zlM~GL7T^psTM4CQ+J@3-_qwyC;7gi0|{yDbz$AO zZaQ-NcjbAv?-JraKkf8Jvy-rz6_UW_rVy{Q92sZW7_uK1o{~LqG9`m$57Fks8GQ(jasfIdNIoeo!b8meLIG^!MVls<{4ES1|j9$qoZeDh7kBB%<2(*&KPdR z&jqv9zGHXhd{a*nWKg=8?%id@Hnh*1at4;$rp>YSlhR4z;AWk;OyRNc#rM^%Mdnuo zu4q3=U~99-N>t;7g}k4I!-OK~HXYK;-()U==M#2+yVcTH z6d)A1i}Ge(qY>Q!nYZP=l+|iIG8fE`u)wqAH8tIpVdoUWxnq+_g+jf`c8-dp(Mc!L zI{#Fc$(8ii4oVL!V)nH1P)Ax&a>!x>(yE#FKE`MB3Rpali$%}NjHUEIn(5<<-PbhP zA{10)_o6t&66YV#p&xqvDc7Dk1#;hyS1Q>ZOJ{lAhjX#UwDNP@QQtn)#+{TNnD?R1 zg^r=4tJ$VKNm38SYVPd(N~j#L8W$Y-wG@}zs@K#)!%9wx^K%#-jOqQF6W#m6zhr>R zgJ{~mdQ&Sw%wxizz?NVb>)fU*<9V3|q2G&%4%I8#KU{zFGiVB*?x63_we4FPzZWMjlvx+)7QesoqU)voh+%*A?c1%Z0)yV`)4qCs;3eQ3>Ak@iHf?9l z+a9QtUJ=^NFT@-3Rff51u*tk!uiD%J?D3>DV*z_5!iW}$#tY3kIfh=cI6kiBJ(>F) zS_;`d=0?!o_B=*o>-J+*Z>{wAcLUzVV3qOFLo2NbVrxVO?(Q}^ z>22R-b6yhO7}CFP{#3Bi$K>utT3V7k?43_nW4N%pdGN1wqmh=pty{`LxA|Y<=?K|w zbE6QpCi?Mt!U+(qGSdazXmJRK>i1MufGY zNtN4z8r*5FM9*Z`pBaHJIV*Bm-J@ATUv95=9ToQc`Rvv2En_!nTEZT9pp<@*xr;u` zI;7dD$(+)hH{QGHG*0Ge+=sXCq8z4w<#QNRv#6^t7V^84I45OTZHG=w`UGn#nXF6a zr>4sy|GA=nZ>P-=`DXzqzz2u^aFXP{`L-LY-wp&Xi9n!nOVz%>(dZ3d{rCI-b(kG} zT$gDH;S*z_mm#RD7#2<@O;0$K&yi%XrTukK!vDb(9wu*RABp{{1SJFZ6urOX_KQsX z`F8gkr^*TI#kBeoqH(%~XOoz9XVTD*S4sv(3&9T>#yi`h)RsGr;9K9!X_l4EBnBo_ zGlVg2-86jQvc2dZ-gLcPEEg*l)gJ#bUw)g{MJqR+Y9&~YxRtLrcgpi?P2@V8Qt;RS zC*d*u{Al^@t)Z;DF*Y+@GlYBeB{8$v+{G|pu23z!SwC?iGAco7VnW)_;>X7;CrLU! znDhF3#l@2f#00!KZ;>H=ojFHFFDHhd*H*xSSCmtvH;mdL>2z4!de@^_dg_?LaM`P7 zx2BuTy(^;i46pIV*hg&)vgoRm*k@c7>&F|c@=dRepWe;a{~^(!dC*CqwChPTNtLoo?2BR@D~? zHX|9=c&*JHLo?2V^}DQ%r=kBTTUH${^HZ|CZyhl2|8{c4HYM-PE1ROOviX(yC0Lft zs`xibmKWHS=VAGT4BF?*6Xjc1(<+ON_RPN+*i_nK72MV6ZYlk>pLp|;f3|yYLSgr< z#gBzv+}TnUt!@?ux#R9~o@P1wD!j+8No{MIe)=RL9Ivb@OSXcU$AO^B9cb})CP zW?0*t<#XuM=N~tv@iD)Dr*4i))&B5Z6D{^&k71O5ozAGF`hIQj&Ks|Y3;t2sQNQ&9 zf6EGyx0_2hT!qZ2xFlXd#z5*BpjP`fQ9`hT_?8eQ&%T-k@(44R}9B9+!%G#9J%42_iI zI^56g$Yl;o8tRKSQW4AvhE>gKeRDg;`RBWrO6AL;=}LAw;I+U%|6Fb@ZK+j=~)^jhz{Qpt%&AJiVqmBgrE zm4J%0wPx)YyL9L)xO;Bii+Z1AFnF#!FzWL_G)dOb)S1wcI}eSX3n`C^eq8g=n|sNk zKH;)TE32}Gt~AZ**R3ej?eWBmN@3Tk%Jk*2yP`~2 zmuSDRo{r@`8NEElsdum~_W^v)yFf6nV*2K{jW-REla8(aE1J?Wmt(4_9mq0Tj|+Zr zl#X#KoF}*BZ7QC1VW#$A>!XM%in*R4HTd&{@mLIhk{}4XK1ot#qqiUUgxR`pR$7d3?)n+^O}jvcgO5V#{0cc7}(hn^PN> z+n7J^E3DLdK<(vdnaL$|bu)_6wyS#S(^c=(`01{+*_ba83<6=5U6z!2@w9KpE|0kL z3W-SJe@z`!QD>Izi0OFD@si=wpy}574c$If=P)zNll{pmUv>H2d1Gh;Dbf{VezByT zy~{c!&1~DFW;nZF!d{hJ($poR*2YPar@=n8Z4c{b)v&^*cyD9Zfcf`Pt~9S_l4T0& zy@k6<&8GX_bAD#f7EvZ-bu1V%mTfCqX7%2T5 z&Lc2)=P7QgIEA%Za8{wBswm~zx~bTqPu%{=xIF$ttT}5DH_i50e1J{S>@<0gs!I9P zO01;x<&VExe^q}~;Xkx!!}2j#^ozrHsjKZ?6P`<^H}}I3X0e^YwSl9cxRyFk@Y%^pbk%?7`s#T_-b(fyFEO zud9et^uCamZ&<_vlpahuyz{1Te!^f9r(Fe=M^j=9&IgvZ-r{s9#grgoj#_9|9bt8?lL@s*zq|&_82iUds zZ;ls6!kEh*nO68HYK#_aKm!QCr8_^qWmxIN= zEPZuM6IH&=5Wn7HsM0_>?%hac<&o9XP|R0;^FxspbFH&#TZ(f1F@cyOFWU7X!xYw2 zSC$}T^=6b&yGX~hjq@dna>fky373|h)on^JHQq2#Wabl{Tv5DVk&YWZ;}T&B@73jt zp^ZQU)%j&l5YkTr@Wo@%(Jp^_JLns3BuvHzQ{SeLyvxao*=o zvcC7N3u{AaM{I`P=2PL%X4SP+Mk%b=*KyzTOJo)uJLjk?uWuRSqBStBl4fA!Yu)i% zxnj}GMclzFw*Aigh%K&=<_>Ma3C7x>6ArgDL!12vTRfAj9(qBZQ1;5=59^<9TE#z6 znvl8U+i}_DN>-EkihYTj!FA@i$fy>jSnk7Avu?^UZ7Pa(w^&=uOAO4!?C{O0zmLRK zSGDbd)m?gZQI+|{qUqSW9&ARD!D8ORSqFibwpD98VM<-znE0SyThiRT(`(i&NZ9H>^{r?ccgde|36sg;kDc6#g6M}kv#85ODr#a`BJW= H@!kIiG|aHj literal 0 HcmV?d00001 diff --git a/samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline.png b/samples/contrib/intel-oneapi-samples/assets/intel-xgb-d4p-pipeline.png new file mode 100644 index 0000000000000000000000000000000000000000..d826cb2a76ae725fcea9d289a427c0bc490515da GIT binary patch literal 209443 zcmcF~g;!h47j6rsxVu}6yIY`mi?+B1FYfNe9ZGR84#nMry99SjDeevl^60(yx86VS z-mG7$g&t5uH(PmoCja4!)Q9hi*wT)}ym|>NOM+m06aVkt<*B zQDBN3Ym=IS>f|cE)N2HBjn@FAyKw+FB8JB|L8=j_$Hy4+oity7xm|XlOz-9{dr&x2 z`^<{i6fFFoQBr?&rA+hh$YY8N!20k07atK4^?%b=LH{qNx6Jc zk}rb}NcwjX;d{#^Qyh2qqh9_k7Y@z?RR*2m1E7-w2PriXkWM49Sou=K(GeMMOn9W5 z7mVJE68oQ#G_O92O(Q6Ba=1Y(=EH^I>s>#k%8P+A z@a;vM`6V;YFw-?@h z1AqkZz6tIiKQyQNEBU;`GPM9K&$N4o3#91gvbOBJ_*Vm6QRk4dj$H~HOyutM z4%p`Y(yJiEPapn#UF4*a_-7XjpGwzdDB7NrnFFbQr!^7mB8c~O0m)I`h;y-}o`E~# z@1TRMsaM@xtJilnJcpNDcHX8%LhM0QQb{xGzAz5kW#PM{IzcNesjHJUQvFt!0aUP9 zSc*Y5lD9V^3v@@fU1;0YW`!c}t#}M-@2V)+NHJ5?BB%f#3`sWEp!xLnw}$bLey7^J z9tSEWeNqOgtkVPz-yxZhdDTp=im-pq%|_$bhaq2gmnqnV@7?w3oij|__v7U_SaD?l89UH%i`Km=m}9ax39EM6mU~$%@6mb(qU}n3_tousH*G$JjFlA==*cTr z=wvL78~UR%&cqtnduebQc>NjaA9Ylh=0lXov|5k|)3|pH48LB%*i5jW-IeBg|9V`I zZ*%N4>Rs`Llw)do3OX)coT!jZB35OTBxzOG;w^dqBl=3X40!6SWnT3l!msou6GjiZ z8;6qpWxIN$fIztO#YtU`%Oq^A$2n91=pb6~!<`T)Ta+YWU_GJ@>LOJ)kX>CjGe1?3 zi@4lqQE-!zEEs+^lHk72c>mxFxPU{K{|Z3Hqquxh5+Nmz!tmZ;+7SY8jzrgojaSSW~GV8I?_~VQw z+)v3XDM``r8+$AEyLU!>*zUW63bdx=YVzGU`i z>b)ExRMei@t2h7Hx%J@AhmTDd<5!}aJ^d^q&=I(X6HNIRKV~J*C3(7+w_|bVH=lr{ zFNiKXKmHjW;}j%-hq<-}pS8>$fNz=M+ceIFo+5y9$@O8nH zknBC19~mi+zA`W{5V<|_XkMtQdcUZq9L@$nbEfw}7P%+R-e(IKbqSvE zLUBH(?3vzp>YIO7k}Y0i-#RXf-`c$0MmpvsXMTP_XEwm>$-XT%luqPhvfgD85vwAg z(7GaRd}?_^i4X$WXY|s4htLO!V+2DGcj;>$l+9p%4@msvX`u_rhp0H6;6chK)#?X+ zGeA{if9$Q;AOT?U%WtnlTan2_T9)UH1kcwPWk37|-64`^;a+2j+o8U(DV>f34B96} z8)xI+rN;cs7?t$%=VLR_NCU6Hnlu`eY$NQ?x)HLJq^dUbGgyCZnApsjs=zwzRR^Yr z*-o5*-$l7-LHsJ2?0iR@>qdJpc%7_t$70WW2K~%#^Ov$?;lkJFU?3(aZbfFiFTjM; z%L=61+BglX%ueS$t}&g*wK!P#K<;;)zrEPJ^?lg@`_8Ln-?wWh1=RPrwOg_VIXl!X zhWq09u1Tv(*S=qt((flh+)}HpFc%lstRw#hSu~?6-PXncn*J(Rrr z6tV{MsjqKFzk}S`dF%|II5cX8uB~i>jC^ZME|r+Ykx9`r!;v3-K97P3R)cc-xF7Qp zU5A}%Fb)ZyV=1<>rRd$wCfCVXDTrcW7`9#6JuOT${F|LH?;ZObYpN6P6Kxj>W52a` zkJ+lp6;->*puG?dGGS_3v~t-Uav~)61s$(I*ZsC8>N45yKr{waZ$I^9*inK7{iu5L ztv5=X2g_h`e%HBYFQ@ z|Gt17tNZ8)`21lEeBR2{(Rw7b=Ac@aw1{0A!xlL!%xyZf2AxbY%~X*58>~zd#{<4& zRFIVAY&^B2i&f3|GwNDQ%_6#55k3d4PO~50mW<*QgXv7paC0`pc!lST>N4zXfHt`8 zjXzQv*oy#pJ0GzPp4@Sh%hA5TY>k!m8v%lN!mX6N~1sEuZn zDJJg+6&h(v6mbDbd&{NjWdv_ADS{!rVf&0-H70{+%hvXWmpiOpkrlOxghz~bRJHWk zo-9b7?n(-HZicjU!>BEIfG@3nGkkE3e{4Pjr^heXjhB|AYANrnaxKd~^!*atG+pi?wC{j$Ol$ z`sH$Swrh#A)=l1$EGsRB(dB~Q(X=_!1rhR zXrV^J2OfYw5I{e8VCtrR-y=J=rUtBOIrjB* zW4#ss8>ma_Z7_nzfb|66&NjTIrI6>N(of)7onVHDS>}tyKUH&HGUeJAomdT}`j(zD z^=J&Rm5@KnM1t>#ltq4ajD=B%L=+eMX;PVov9dVZHn-rRMK`z2_ZQoX1h;v)&#Kl_ z4aruCew!cnxyIXmvpDzJ*^VXaQ7z4>W2n*R0veQheo+nAZxvk6L+hIRv0A2lCEcLV z_%y?A67~%QxG->lx`35W&Er8gRfj7hi0bA6&Af}0tTG*uKfZoRvCv}cSnb?glWnoKt<5XuF`Fs&=8xYexNe=Tw@No zhZeWZWVFqIM$MS|>GABtt-#o$1!JuIDi1yKV2Ws1ai@qW)#T^E4RgpAy5rEuu!PD` zN@`m-B_>*8lb8RM|4-pq-%D%_vN<@e{jg-?={9vkF-``YyehPaw3xizWQkFZT0eveGgK<;HsqNp{}LRPJSW;W*;D z*nHLR!jn4$^(#;IVpiyS#pJmGGOJ)j))PTmo%ZY&Bx_j}FP?%MG?Tlwxx1a3O?K|qtcif&?jCZ0-K z>Gu?`Qpti6GKR*pa~Jg{-H%@qYxzd4KLw&@Jc*V!9$lRCA6BbXIZLHK+fJ9fK4~uW z1^QCqWPZ;RQ(-YbvfkMA2~CcYaWehrU^d^=uxN>ml=i%#Xti7={YJtxC)=w!ZaK%3 z6V+V=noA-QKVl&E0Cj)Z2xgyk{5_25Rd#sySoL^NJ*!G1o~~q@QIhkOH3M2ADE~(_ zHbbjPJtJ(WT-p#7jZEN`7~3;)6$nbwXp31xfmM@d4$heBXd`H^Q?EEtHkL?1_qb26 z9Z~^acz5tqCUo#^7D8T}$cl`yo0^z{Ur9`ED|aNX1Ch{iY9MvfS_JiJXzt)_oI_40 z-$=`|x7I}<=97n6vyS^L8X}dUTMaY^P^RS(y zWk6+ZThsc@>3VXDBZ|5lx%*b$myWh(vm{^R(-Ho=Fh!x|hH6%k&x7W|c)C9sjM{cT041l#}|M?659)tqjxh2|yAf3k3e$ zS!G}@UFlIc;BYM8{`nSD1#F(TQIZxOqdYZeRS=71+3I#|0OQ=53-S7;Ashau9n1Eu z!j}}2bTZ;}mn^1F^Hj9xMt%a#(ssAyOH*0zG*_(P9d$t|#LLxsE61O9?)L|Y z*a|WeY?s>%gxH`zM;wICmh-!2Z|-=zLQf68gzd5|ecLxE*MPBpnj$Ktmx6oG3RP4& ze7v*U(Q;i?wf6~oajR!9F{~@GaC^_OTwT=`zoarDU#B>dhyzM{B<5vSChB6V1sudz zl#@g8gL)z~+G7SZHSwPiJ2jE~S*$jqO=fwF6HO|T47;&<|I06?5=iv}?c;KIlX?2t z@92R0qOyGrS>G3L@RYr?_oLi!50ZZN0jl`1ZMnq7a%%| z)?0Becl(NVi;aBD?F3^MmC}+Hvy?J%Ckc`Jfu_67QG+?7(zC0>{d{?S{xCuJi0UIv z?cfyC`v&W|*|7SquohS1{dY(-J{^&E#?1mayi&U_B zPKHi?74UT)-Y_TOXuPv?MmAdj(OdBn20J@D51%oM_R9$#4{vXyrxXLDkr%+V0eZ+Z zP|V=#{-a3;=|^VgKym^!-}ckmzT^PHG3rV>>q@K!$h(e`L~s7`Sb5eHJ4oN~612|N zO|0JMe7$h8rAzv$laS%9jUM=sqpw_A)t)6)w6&|2#j{y(%l|Tm`7^{q+h*NUsv=?Q z^7Oec6w8tXr}_M?KX0+DrPe&Q@e(^r)LLKPds@!-&)bse2T5e_LfZN}Z5E2F6%Xzo zbcix|E+%0pRxdfgV`3)5k3-1=^rgF7BVNVGWng^qSQXm_3&XnYH!*4ViK<&X7c``Odh0^ zr(xW2C>4!fTjPWIH;0*0H*}_wV&X|MrcEXzJGyjI|Jc>5*WNP?yId&gHy$fUN;(zu zs{Me~Pf{v1S8J9~TRj%R<+pqr9x-EuoJeQ%p2+a49bB`6JO@rHXM$zr5JKKzzRhVW zLgR~*L=2Ba_lI{{-1Tx^QX!4ks3OWtaurEzF zb(-t}0DC(6r09LZ`MmQ>nvo4X0xZ6uQwkEC^4O*%7XOPe$~iyn(`9>Lk9-`aTFT=9 zxBHe@U5?7&Cp&WFk7*S!YcVGlrp81Qr?WfDh6F$EVd+|0938oZigz_x zym&u3SKIs4N*t%W>d7NEnT2u}6yiH$XXm=RbVubQ8qRj`m$AFNaFFe#V3T3+pne5d zDjkmxX4lWxa7|EGqYCQYuN$`VH^jB)rH^ERMId_R=cZ(&19hhJ#`K_{a zh1sKi2i+hmR1usp4~HHfGBHn$97$A%qgsI+Qg;h!vtYB%H?=JAbbsB;%ScM{?HJ(i zD7+=5QS4h^=yVQ15$ZSIF5J-5?3a}ED(U}N%VuV>cL&CHXH!??Pk1GJvOU_|qKXRG zmQ#tAPWJ`i`N=3Vc5EsISaGKM%I4MhD*H`gQCqT2G|buZnwgaa1lzX78SNjCTTLWE z*<9zSJ}KC6Y1kt5;#T=%Sb@fM@zveJ%jF|FNb z1X)$Jl`CskmrHa9zpQDNwqJ>rxosW^R=5(08MyP7(2PEA#{oS1U8OcC_^BA*OOCVF zd2Ipz>`D7!kO{{a@E@zj+Ok;*EtVx!%PA%1BprL)9G_zQ$*tJQ|DctL6dLw2ecbU) za5;U;X4T4L;B!!~vBab=_M`V-ax;9SNNs#qPWq=cfx?H1bPO+)$x{fX!yl>g<>%wE zn&!uyMk@eWXZc2ck+!q^LUQW!O}^%AKeb$XN=a+!(D*LSU-QRSCn#iUXLtQ50=vQS z{b;fjd@{=I*Og`RHHm0w5enwKBE5cnLlHTx%9F$T6eRvX!oT%EPCp8g9^=jF{>cxa zBsl=9)Ic&aGxZbp4@>Yy)EPQ=jnmVm#b>e3>!jvjH8jtIg!WdRg1h0dDnC_N#6LP4 z9$s0bf*-?~AjCt^eoOTB=HT1we6gOVd`@U;;5(#?=Jm{S0o3K5 zG6VaPQl=5o{}J;el?nk^)ZJLfhIklOsn^F-Z#Vp}f<+Tg|1x8j^S4?}uAMLZZNCv! z@V{nqB&W|appZVx%7pa$a642`RrWv<;nNoq%gUS3u23(Oe`NB4=kJ+bs2zbS5l4Ex zL;WS1=r_s*9%JA`w=%`9f8~aat&eKPMxvVO4Rar;4$#MTrNfTr+;vdde|LuMDfBvp zZF^4@J06>b+7x0=Lq3fvz{v5xu%fHe2!qJ|#(bVWKirND2DyL6w-I1!Jk>Ao^#8 zW@eJ0WwRhOhwutkg1-mjL_0GqH@2Zzb9GGPi#98H+ert9D8$inM?zNeIxvNY#Kr}pOp zsq%gyNMM*;lZySe_Z>Padd`UKw!I2)N{C*&+=W=AM0k|Q+X5=c$f4L(kC3Pc@CKYa zpwyVtPvi$5xWBy4_k2B^^eQoL=zcA(@f1y#8&Ih5hv>xK-mpV6f_3+3Rl}zRk4Pi= zQn!zb?c^Do@+tM(N{;_qe%?vk(eb%vb`~&<7X<#RJud%9VMrtKq(iH0Pm;^eTOkgVO6EojhTYS2^gSdIgefjfnA{`bdlNXY$ zLg$2byKeFC3RwJTMPY9}N?3Lmmbsj0s}Hge7TwlS-8Iu;f0%OIU*wyoQeCxhSJZUn zNAs(TE|&P`gMnlP)!akrH7aX`m8-d<;W($j+A^-N1&9OL6th zaa?uX`uN@?2eIz@Bi;&_tan6O{(S4ye@h{&ac^~2JAN{PtMbe>+e)L(^ngJ-$J<_Hyi93*VCS9 zH7cJ;>2fHVyNin&V;8Y}E+Kk8kE5rHu6Y#8?&o0%ne^I&RgP!tHF(@942K)>pLZ+N}gg zhQ;Mc8#-X{9~ST!Pjfs4_YUq zP~RcjRfGzWZLQ&Z06e9uKWZhjUB65FbrUI+VYmCKht22mLgk(7QN-7g7NI6HQPI0U zop+y-UG2m~b(4No(6gbH#eMyGb8~vO?~-te#Mng zp8HVOI1aHgTy0@R&!k9yyWU2;zCz>eUTlCKx*`EdjEuY89D0&JgOk&lmlZf!OI zb?tQNI`%?l?r$URb=L1X8XwxX9Nv-^b%d;4ZpC3o8oXBR#Bo%Bmr)Mq=&Dx=7S-HsMt7|Myp38nj-aHaqf zf`B+?*mm77*Od5-go>R2Mv;mAB8bP(zI!71jXLjv(c`OcRd%U20f?HpY5HBL zUB`L4UHK6xO6i}Se9s2iyRAF3UW0f%&n$U717#oX&Yh%2x#FWTRfo?!5A?@K*TnS) zw^wc6tMAMtdrm|6+L=au8d)bE?gXE7w?xNOk}szupBABy>;lxI!$QH7qU*0xAXP@| z#r8%X$Lgu~CIzh`uH;6rcqg#+ofYN2%t%`=b8fq7iTvv#j&duiy1 zK{Y?B1+N$1I%o!7kesr+o!>5~n9JZd6wD!9#0*UGOwF-2&Rj@N$ViA94VkCS zI_>D4+&X(c+Z~n{I3GokTH_*X;#C{SdZOH^Z0!BHAlB8ct(?EqpD;8$KDU#w<;_Eu zBHLa@Zm^M+`nX16I@)}9edpsRAf)tp2X@6Pa!`XyNkr^!2OwZPY_m_VJ+7nAwcp`- zIMYnOcK*7QwGr8WiIG1lWtK+S8QMK@9?ofB>_mR@c!El12V)U^)N8>A^ETd^ZU|3s z)E?o@9V@mS)%7UvR$_sU|Ar3hhE6lCba>e7inwH$cf+KL60ALKAX}42{BE~h25q;N zGg%_xp9dQ)4vWDl3;6Ds+CW5|=Ug>q5h2O6lMaYQD-89)ec`=p=G3L#pH`-ub+f_P zfAxu3dhi3%m%=>WG-m!P4n(P6UyF}yl~|HNf)sILrS7T=kW8To{I%yQOdLPxtDMS~ zXjuvr8GT<02KJ~J-9`z0Z?ooes?|Aa3U7>eOk1(EF2(#O?Zp*Bm39kl4&KN4;R0m} z#bgVa^ZdDOnykKSa_-um+NuKcefrj#W)tAtt42m9*q zS@Vx}3T}>QS~BZ7EmKG9=%b0O6mIOd*jHbcCHs z2pi8%ezk(O_+wHrmQ=ZA|G9IsO!HqwA>X(Og5}k4JtC*sn&t?bnrfm0S7+#-`pqk%8z0$u&s{ZNtygRd~`&h>PJI|4c9?k&7gONWdcgvTY z6(fB_Y(`_2-BWR1(z{atxApFE#OTwf@K}lY8cGhJK4`k*T3Bg7W-C)j8{(>kR<>0S zoI~$D-{EjSO64b37QGiKVh4Ov@Kv)MZe>eXfPO!=5u~_>5yvc+D3oW}itcQkoo9~< zxY8*R7mw4c(mg=k*6#{)5276omI<3o-~F68cK_M(r0B{n&X@;%euA9FK5Jzv8O;2bP#Ud}WXhv%^3fC?Q6ayNB5G>L= z+wa`Gzw5piMN>HpQO98=WE-s|lA;ykS%`x_aF+&$<}Ny)y}1dS6VJ&YWsWe?`ehH> z-c1|dM-fDa%*RIb!-Z_4{#-T@tfdq2j_yJ|^^as1 z^G42!9qWzsg))#978XJ}h$(YT!fuGT?$`hgcr1KPi(6j|JAELK?LcpX1`&!&j%~JW z0mh5i6@B}in3)O0@2wE?a@Wh=?Q3QSt+)*yi$5{k5v9xDNLGc zJ@`LZwMsJ#Cq}qFo>QeiVtcS&i-KJFHb=JCwZttI=C*Vt?Q)aYH+=b9>n%1J2$~eq`0P8G8+*vLEcNItM8q+Niehd{c{}tl6vzR3mq)y$Dri_lF0Mt%c$(@s$d9 zJL!kDy+@XI-h}MTUBnFs-8w57D238Bm4T!8oK$XJ5r@lkZGau#w}_M%%f<3>@^g6M$;5i^8IW-I!kLEEh;S#@w(UgQq} zVsmC+Ay=`VGe<~s0us^aXU|J2uFR>QKD?Po9V|4XS*oGXJ75~*V15mOEW4^zUw+u& zXLb7Ido2LfyXgsf@kWy3P@Yn3Bz{WTs4c23G9h>Aws73$)ngf+A>Ey%tU2$Y5f}5G zI4aT4roEf1}#8yytqJF7mxr zo2#cAn<-wB4(_`;B`cSH(H{*$&8P{lJrhkFzrq*Ep9i{2Zu0$RT-{+FEem0b>x>5N zrWp1lj5H%VV^7u6r^}JQ9q}3QUw-h+>gVKLi1r2dG_an zvaK-c%NfI{7P{*yYA$`v(v&*`3?{tujMhPo18XKL#?OPbppd{+fT1R=TwUd%8;e~mIRDWS4K`F)ha-gCH{u{tNRg0kbdB6O0i`D_rv4&x-g zC*~(}M{BnmIa62bqS(dcbkwXU-Ir>WGFldiP<)JyMz?7ax!qi(J*t+{|4 z6+J&`sUzoN<0DdR!SJ1%hf6eNCip#7{}TLqLsEp3bG(-jTv_a_R29Y3nfv^VajeNb zW&FpaB`J9SRaa~IH?2Zl;>lS_A>7zu=ei$lo_M7A1UepZJL3M`h1@6zxRtR!4MVSM zs_WJF70T0(lJSxoCh=3;n(v~098tr7Z`)UC?VYT_b z%Gq&J_8?GaGZgAJ6dTudJL+yUm(0QPEcixu!nC)09FyODp%_>D(<11|-Ygw(=VpMT zFk<99B-b?{VM(#Bb3*jI{&ZHA6~zscI6FQyx>Ha4I_w`y%Osyx^hvc|qxfHb>ZPDF z{?aw)=7!<-U`zOwRWGV9(FM?8%2hFT`O=Y6GRmO&@*8>O^7APR$BBm0pn3Gn&2KiD zg08E1C319bl*%%p)4My_8?6d%a!!7n8UEbV-nMc|J$|eRLvv!*7O%5kH3^M8yIByq zltDFpDy$Knb>wP00GV^8|7qi%PMsjuOMil!)F54{K7{JiJ$@LYzu?l6R}L%N=Fo*i zH_<-Ig8u(Bb74#!J@^O-UDG-0BmHBt)$F7aH)N9s|P6xyl;W^rD0Q@!E6`PSK#FN*)4 zvFcmT@nZ!_l^bA(u1)QK(08v#x(e+?{qD8%ZvN5nRUsHN?_*Tog=?6@J3+$OlWYs} zJ!JbZn@n`EK!ZlQz5m(#RD(vo(b;8TWyjW3E&u{aHn`q>VU!(tRBXjGY^lG#w0+O{ zQM-ovz%Kb-nxJi5|7qq<%4psuA{Rl(xa!UmfYyueQgk@p4#g8;n`*zQL>Y?cF2dXx zyxKm*&Mrq2_B5W&Y)GAND>3#(*5VT)O5nd8i2ja1WaJ%Wj8I74Q%fv7X$&QDnNE;n zh7{NA@mX@8^~Hr}k`)wdpyhJw8189)$VsEta`DG+3H|M6IMKArVSHhz*8xE3z53c) z^Gb6cTAOBj$bXy~utKR5zK7y5o)mHqYy~cqU3)M2*bQv%;wFqfv@?iVYQcOsvPH%4 z+X&E^w>pe^iujj}u2g&{)7NLJw3KxgaoX>nM9SB)e)5rRPMa|iplt+VJ1N9ey8)PQ zVS$*PU2z%ie|$AvT20Uf#Q1T1)A2?geJm*KDC<#t{q!Oj^;5ai!?s&`1g13xTn?D(BswraSNh8Ss_VJSQX2<|dv1<$=40ZLu0{w(gV z+&|YIW=9fIhe1*_kQYAI$dy#VyP{ki3p*f4_X?P=#M*5lS!}dzx7~7(?;FQ!w;l2p zYC#LPhU$ly>IUvQ9(fdfOd&^-b#_e)h($(vGi{(~frzdRH~* zZEy3IBJCUY_;r2l$0+gUzV$~s!r#u{^8A<2`bdUJ{xi4hCcQL32U zp#OV8Na}-jYC3aB_?}nUfNL;TJnNYFcM@K!&9W+xM43j#73YG6m&V(he#*X)E$cZu zg9mEf|F9i}=JdS}r;{GKf>~xYJSALif=@9S;o(gBfSZXcf%>8yo{DGW9Y->OhmGBNc(PKv&V8V z5!oeOh>-g&Y=0lco3P&3lt8pu_3b?M)%W!DDnI{d-)YP#gN`^eY%db*Dc8OaNZ3h3 zTu2wkO&i89xCxqSO@A)IwZ0@u`hqxPQuskJE#9CnZ7KXpsx#}T_jsj6m(bY-3FkL% zb5IE-9lY)DUNY$oA3oF(FB?Emn2p=$)#_N}fNzG3W5TrY_jF zm|3P_HLj~I|I*x1*I52-0{(2WF9d1om-F-|03{dj0Qwy=o=R4(EJmx4m1nWzIcS{L z&tK|ik1rYz_le7}E898n1PJCcm!tQUF+a$rRcV{AK3$=eizF7s)zh${jeZpg z65nxcKppt`V`uo2n+xm0NQ7>#Syep-!Q8QTOvLSg&bfhjFin ztuy?vlOZJ5;BXU?3RRUl_ZQT-wL~rw1-v_5MzNU3SbNL3u%K+zf0W|%AnJ#WxwfvI zBfEcfXrU^_IatkrEq@j`+c0_R>vLwx>3_q&0n1NeUUA!g0(!Dx7)?|xi{g78{|Po~ zZ9N4W7S7ImrYY5wEK0dFk+?e`_E`2IyRyFdnI!phEr*o@O3w@y>V>I(?%^Y?8f4Zw ziPG-oaV75!n$l6^cCdeOvU0ko6?yNf1~TJJz6nzf02Ht4du;R!dWIzr08@O`j>7>- zYuxo_7zQQi{=9mmjW>D?rYOCcpDwW~%Z+9;Z#jR0ve^`bt51ZN0^^CEzcpB|AxQ3! z$^Hqzif&SoR$6H*JfYtvV_o|4(4i>hg8sLD*3U%thM3nXQFLcnFo__@e7-ZXVoJKn zy5l|58*pV#U~{wwpfJ&ndH!^E&lGSKE0Qk@dsOl)T{$8QZ3j6k*FNlQt(~hlrWt-_Sh{zXq#(c=npcLz5 zlan6$xj6prw`zV;pN!$sCH(9opaeKL4k02e%8GhDg6ac2Pv;}zkQpgVtG)x%!2K{f ztkx=`6nW#zVxT2&+EWA$Gnu?ZO*&K+#>g&0x=ts@D{p65abYH2qs($TnP=K=N$NQ@pMB2z9V}O7=EDn$GrUEp;h#nM8%!8|Ohf+eO{u0LQap%YMY{qw7Om z&z)Zfhu|>@@LW%izXzpr;>>RW4?emZHat5)yil_X!Dd9E6uKz+1PY>Q;L`1BvSCGT z7x_#5`ESKK(i9_bab;yo?D-z-iX8aPl+Jj1pef!Z_m6=h&YSpEn(cJ%cZ0#f;p3^- zdmhIKQl5+yb6|_mtskr~D++~P0>cHzj@VeS8^euhPAk5>`keK_-#tq)QHIMR@RDL` zz{sp+!=pefvZ>L~=bBxCC#Xwmt;n{t(eJHPd^)iP0HoybvHb)cM~7Z7soTa>%>M61I@L|$K^b>$~+F-k~oVh5$2u~6E%-L4w@)gqzhJxlH-&t=7J4+?? zWzFZhds)Nol*2C^VEHeJTW64{RD4MBs!76>EIJKIwI}u0$DLB(;Bt-ne5d>SqH9&Q zU*RDhlQbWQ7QIs&+2vniX=El2UGPJz)cpo_eyFzp65BG)*u4N;x9Vk8zCKK%dJlWs zYiKokIBvXGJYSI8W|7kItZeZ7N*U)Baj9P1V9hD!@#-*it~G@wd24?+g=*Dd0?Rtt zqca&%run>_P0}E+cTKd+)%^G0`-rVHkF#2wiqIcy>yrTq&zaZ zJFwn*h|#HaZ}N*h{LnO+=UF_j4Hj)Y~l~7)jw}f-Di$;hCO) z`KkpxZ!&{XMy>Rc#f84KPt=ZZA=UcaJBgfi!N1@JYr8%Ek033lGF1SHYxhc%g2eBT zcRphGemFw z6`SeQ09K=%#1%VH>~y|&<<`q$S%?+iI_s{x=)nsuJ^fS(_gM8LCzrEd$0;Y1mZcLb zD4)dpOAt;bucALANS^e)?ZnVSChh!POgDcW%Ea!Hz17pbw%t1AcA=IPuEAD9 z*m76@MJm(E1Es4zoek8(^GUwk6P8F=YiYpDewd8EybLoMf;n?4>d%R=XxW2saIo^f ze%OmyUqb<*Sneu0q4iUV8AtU2gwNcFo9U|o5GPA>w(b$~kk}?hyu;+t8q!eV z*5xNGCm#z8kVoHdp%-kLvoOBUJ9y_QMr0DTt)*wpQSSBEu;0;-wyt)aRRVD z>3LG@?f1(G1dmV4d{ZT@uU3h0DV;Y*8O$Z0&R_Kg;a&uVss!9-Q*wSuFO1O&oT}1G z!yPFJlK)!PqD2<^i|0&K3 zc>b@5^QG*ywC87rhonRvr&~lcfKFS9kAgB1l_O6K>^{O8@?b->&Y3(XQPk{?6B)|w znfiy*K>vMKd|4NgV#4?(5p-qD_)aKu>0^<}a&_RLGSXV!TY55Cro*Nmv|Z=PMnv|$ z3F@|kXmunXQIGf)#*N~e!~}B}%ah0Lg9gL2@kVPl7j+FZ$DC}YNnIn?Yk*~9klZkb zxCRa1ql~R=&Y-#V2LGoD#C|eJp4}zR7}Vy)8Z1271@pkdIv8lz{bkW(gP53COs4G=a!BEAGJ|tna$CVY zkWwde>2WQxz_gIwEYI+Y!U z8OkA)1J610Odns5TyuQ>L>SdnYCiC43uc0{7LYW}?LqWbr-NN&%DuYWU}?i2?W=#? z2)3k`j^)Y76h0f&)s6FX2{!;>rA;P=)(Q41EQ-}X>B355S7-so^w}ws&mN5JV}}n| z1s1AQvO`2v3W-vj#vhA6PhwpMaXjg=p1lK?SIOf_aV=QVw1`KRtnv!!#Pk`?w2g3c z-rJ37)fIn;&6dtz>G~3h^0a~0Afawe2ESs_GT2C_614Kt8HM^%Hc&GI)pk3ia>G6a+f&pT+y&d@Rnx& z@7Z=5E;=$1yj;+%Vusa{S0Ej<1o+iJI92n-Y`2U_v{39UkHM8FogY};13mq5jIUBc zDJ(SmST2n5OOj1hiU6e{JUB{L>}w54cc<}el z$?>F+2U51539`mXTtI1}zilJka=ULulm&b*Czh>TtD2L-M|ma5t0(fss~L%gi`nAj zy{*WEv{0UHAo5xxNmLSHTNs4~aQ*5_zkJuoZlQ&qv%-T5mykB{CSI8@{hJqgGu%r5 zY^+Ph=V-ek`}TUI*z(@%XxO`xm)bPz5A>qCkI+D ze{FO+RvrxJBcKP4f4OQ%&Zl=q_FMfd6}(o{c*7F;N%BD5Cm9QSDe@%@wg&z|N)vMZ zwahMUqEW6DFq*J;(D) zvfhHol*PUl;&jRN<7OYUnWIfJLH*th?zML-K=~)f-W>N(k z_hUqN?n3QJ>lxHfD3y8GxEyjzO8tTOY-iI3Pt#nscTv@C#X9tn?Ww7|7i1!Yx(j0i z|A)4(49lZwx(tEf?iw6|O9<}n1a}A)+}(o{+}+*XHMqOG1$TGYA$gwn+r6%}AG`DG zmXV&B?yjoSr%ugt)Igg_SqD@&>@&5{=OCy#!uIZ!cf!`{Dr}xJ$JV3{J}U`=k(Uca zpdXJ{+8Qpi>!S+GBzg55>VBit{b);tXClFR%$}`x_6`>BnkbJ}EtE@4sI2J6UV7eo zEF4)aYrhj!BE0zT4M^qIxrm2WdO^lxecHYz&#SH2x(`(Gl<6K_<9$TRA?Jtc3*f{Y zroM2G(dfnP8RQ}=jRDtXOf zcsB_to=~>b(7|Oqi^5j89J>-VwAaCzA|wq7pl$Bwb6FyK?M;d_1qKcRbVtBF!G(>? zV(9XR>q6j(sa=IzTA1s9y>v}YvY*Zf0ORp>lWyMLZXpQ#4OSWB%BFnU9CY+dznOYl zOZ_AVwf>lVsfA0G4n|U-3cL7gEN_;q43r#$rq6qd^~r6Ml_>Za{hMzs5drgOLc7&s zm2aao#HIbi)ssXLKPM6Ki*MEB&#G#RZqS5+gQD8Ydf|dxVk?O#_=e0USId9FL`Woe zBwY9@khr|_EYe{JOLa;#x|0sWcxt1W za{YAJa>C3<;n~ILs&;NSB2>2)nHCcjRJQUl5ai0wO{bwU4r&Ww{3|iu=T1wp2JU%G zIcT=UF-@XI%kXBi!Se7Fc^8)P;4bURW17_Cy3U0cl$118JE`qvHcMjbYSN+R>f5x2Bnj0KXj+5@9Rv*$$ z)@C1y*0!x}gEiy!1r;(+dlFJ|4>yM|H@&En1^4h$gCSAQoR`QSnAK*6ZnsP9+}Vg7 z%M~5|=2q!Yrl!U7bg9fBVs1dLy~TU121!xnCJWm+g8_R8pGK+M7U-jPPL|JXM2)k9 z70COA;|U)3(PZ zK2^yMyD`r_u0qi1|7^T0NYyJ}r%R`-s>RTO=O5G>qe?O7coxhJ z*^v2}-J<|+iYgJ&<15dq5ZVkCzuF#rsfr5J$R<0pQkm)g0-CJ8U&%y%jRucP$fdY* z&r_1=7&9P);tPyH$~ZPmA|Wjj%qpjIU3ba*!}S^PFEO6{-gxDfiC#XtVLJLtk23_K z8=Lgs<#Y~0wQ`?1j0dn0l1pk!GFpDqI@#)Ud?%x+dRB8aMtC)}^`LsU-IEWb+bz|D zt(DikG$FMexR!D{S#AedPI$g6!PKzePv^)r{OqA&$OZjvkqflG4#4y_E~%5eXh`KT zcO;YYpH$+5__TX|#66!UU0j?Nq{xfaYqS~8X}MhH6_+T%8!fg%^Popt6Y1Z7FCOvz zs8Rmqbf2}V9v!U8(r2~n_sAwRxRYq8=ZZ*r!h4|3i9`_ zj}EPiOS^Fd$8<9~Fbqch@9c2f7fr0QdrO0;4)t*2Ke|sp&{f#6&8is_F4fp`l{szV z!pu-E3tGZfDCwB)d@pGx`kwp))Wt8g!||#us|*9FO{WpvjWVvWaG$6N7{Rr(A!jyb zb?Cf`b*|gUVhH@F(tfLmc&>ZbJk$W=&tTYq*qx66n( zVObb$!wgS*o}le$#4E0btiCiNLxw6o>r6>DLM-Sb;F&imW3y=L6OY&v^tJedCBvLWM^^9NOsN|1AnNvn)!xFTgiP97+y{no$EWq$yWfX+$pJ^~uEj~{XAVhzR5gq`o1+rK5$%QSw> z=jH}Y2Bt_h29^7R&k03Z&|CDY%bj17nfGv!ylU38dSG}2QA=C4xSQvQ=Jq82mPUJVZVY*|JF}* znpzmWDa&F+3pQ?YI0#S1HIzB;i}##b3%vp#9MFSjpjEtjWmC|c{fv=o9llj6eqeDF^{S@96D!P=%J`%8%m*iC`lUxTCNi>V)D zub}B&ztD|5;JSu%qjEvY-l_V?7c<;fQkay5{W2`o<`D|+ux@Q!sw3SYVeqU6cq2-MTeXiQ1Q*I;sHbH(qZ}G$;Ma0N@sR1 z0n@Hj>r2Sll#eI}y>lRLyi1TS zw#?{UvUL~xc`Nv)Lb?Q{-LRvc6EP1uTFFJ=L^*-OB`Z5~wjx)~*RCO!*)7;+3I2R6 zgWA@$#|NyU&WY#8<;vd|)UhKPIs?gj-`S+v!Oq^0H>7%aB0n_(}O-&hZ`_kAJ&-?sFiT% ztK>#b!p@U5Yy4ZG)FXZ5vz05yGiTxBG?oupsrS~NHD+#EaSN68Q7AW;!?yzh1ANQ5 z`i)+~1j@eJH3e$QBDPh)Dr+pX?FB+^4`22DNHZ81oB?-#$ozF4ZEAJpIC!0*VHnI^ zk?R4kej~!MmcCqQ=Gtu|Ihriu%xrHOutbw4m0J{8SK|i06>Qm;SsGmv!$kS&ft!Py zKH4?&xKcU7`n2or434#qm9srKv~1S;N!@tXa1A$`J2e<-FjxCd6=ht+6&!q(r+)7+ zUlc0vtHiX5r!X4X8jWe*&1}*DUKyt+j5f~&=e-@t`{4d#BoCGHm|78Xocid%y#rkU zeun)FA+YNC>Rw}_D4Cq0EDK7G;5L6NHC+$skJ?k7M#sH(39f;AxKFVy=G@_CU8m;g z`!w%vVBvvo=xnTAZ45%TIsu+WW{gMpzHkIWZ*WuU?tcGcuZL)S;)UGPRrq}Ecjn%; zCD2eQH3_f_Z?5&sLKW;P5oaCdv*O#Ni|)YsU0nqX8P=jw@shsG{-^r=-rLchvUYU6 zpei1(!J(qXChw=aQ;I%ju*V(PI+FX3Ay=&iq86> zyHkq|{53TMWJGfs{X6{0npunLcPPe@_+y;x;nRZJ9HinFdLAat#_*lZ4XPKjk`UK> z5qFo%2O`ANRTw=(%DI**o~?juxkEIeC(} zv#$@VS0|(>7QKt@G_T7ZwO{(~?gy_KGkIE~YRuzpGe6GV55nYZThnB~frm_FgZczy z$Rtv--E`u~$Lg`#=GdgvxKfOeLJE*k&2=pn?&--Zq2Sz#u&oJj!*yr}lT*!QUnt=d zUK)+%eY;+4=N==2Iw@UfLQdz*Tvp#Peff&5F+BG)2*u14dlbNZJ5are$Q+;=UKDHM z?(xi^XCC-a0;L3>>H??E3Hq9_uy$kkO)C}8s>HzEp(H4k5a@S?^0Y!k6!1Pn~f}IDxIE5S1d_&)$gr1dy>pql6`zPLNjEKM94V+j-GMpF1#lI%J%M zsXN3=31dx0eIw8=5kmnuJ%$ee2VMX{#w>9mZ#=%$V=I{h5ds+eg>9(0SsMWp!iTO583E+(yS&uW&0*??fb6L zZ2H{@vC{7AXoxcRR0xF+B-DL-=ro7~Xid^fu=1A-PUZ;;s1mXK?xp+YZnu?ciN@Ck z{Cn}$m9xeqc?%keRKQx;x3}!g0$A)d%Q9B_cDco3wVzaq+G$>UzlqC3!_JOaz=PWo z%>?{X6_A`(Iq>kWIDGd%E2-0QE8_`L_ql4b=^oWb*Ty9SjKFtFapDSM3b%uDf0bNHaez?s?`PTC4uItFcT@bo z_*ph@$2Qeu&RfLY(7nKEH)_gyLpP3FP_Rz-vhgpU&OrHqSG4#YKJ3v7(v8EHtC-F0 z-&(x~a5{m3L38qf{Xk0MKWUemg~7dG%-2dj1UxTw#k}!x9(?Ip1c`O_pS1xd)!%Jz zk;qO2WEXhI&&ZkjCobFxM$B#b%BHy>CAMdzEO7K1^PLji$=Oe_k zn7eO5ps$8qOnDYmc_KIpV7Mpzb2{jvs@Pn1X6Rq;G}m5?jk6%A|4{EJ=@c7g64W3e zp*w4b!ukGTUZQChQc2(~vp~{-wlJ7(sBsBh?v1CNSyit_4Nvs5mMk zMS2%VvfMRq6P@>ebZh34#Uf|pcv_D{_EF`*A14bSce%Foy9rI%_(YhwY?jm8^KI#d zm^CU7w*Jund}U>$K)U$z*y(1l7E8ULYdZU43)_NdNLaTe4pD|8L~-fZy(B8pA(POiOO`IpH&BngKFR~ z_u$pnPuWC~kgtDrf@aKl<%bUQ?c3}xL^WSsTFjOZ5YZFj{w?%lbl17EatFnA61tR-9659(K6Aq*0@lY#e`kt4VUk zic-G>9|CxcsA+=!6DFRk!;^4-dz;9DGIBgNdyVlXd5&3%e`ty`!uz0v@wL37ww`pH z4zDzrrFM8~UzDbTbklZ|0-?J`3#fmseqgyV0a4~+llcm>{B;d};T-ACfH)KITsOa&`{YB`reN3+k_l zIgD0)mud-$JJ)Sy>a_-h;#sqfqBEES=9O_xwej;`nk=GW(J-uS*2Hlo%5m-7V3#bt zL|^7WUXCqzO;!CP&1=8T;es_L_|lj<=+r0abz@0K)o$|>y}YQ;R>Q58ulQ^i*De$P z(@$sk3PA~Xl=My-TTuW#+Kf6qspZvb^T-}u!BFh&6%$Fm$Jao?X`x>o#ZV896?5`m zN^w{_Ko(eS4_M`{nFvst!^w-;c$b%h%QOX(F|6SBO6tC5+iOm&N{Q7}Q+47>AlG;?gL793b9DFC`QFPMh;w) zqQ=jHk(W2e{>|)8sGt0|_Vz=QX%0R??QMe$z(hro-SZzPLmK@4 z4uh7LF$i-_N$BJQ?gF;AV&7D(^FfBtZI~J!5$(r48(Ws;-!|BXbFZP6o+{<&rF`ZX zU=;@6{%IxI(czc2eVW!aveNf%8<}y3^t245czr}$SD6=I|4^ipJIM{?I`AxW4oY)EsnWV{6|JUQ;bIJ0G-4UiWMl|2)D()+-6p zY|H1tK6Sh%kJ{n)gtT!{GYiopvccNKwQ!_`mj(5EMX$v7o&iG!-R?jXie3jbDjDDYnqjTHKF0J~`gXUX3 zwKP>Lc!?oV|9u*i$&~5Dg7y#Z>IAjMA%>AFJ%lhFMDjuHS0;$Yab9S74-%tQZu>xf z?^Xu(_=7@;w(18oS&Ukh5*tIQy!Lqy@RFkZ9eC_3(#{yPxH#LV7)0ObyJICn}slM}Z`X`fB^uJBdqWt5?NzB9yt+(+R zVXc=UVA(P;!kaRm%me_IF2CM0>-HZzYaGiiB6?y_$lAp`S97oN_XI>){D71fTfDw& zF7o+L^yB{j$TsNZ0UP0eJ>{!0RZZ z`Qz6BCT_6IrRE5yzt;~9IzV{V~9`dYYm$DOb zIo!pBd=S}`^n*6;`_sfCX>F&5EUJr!`{mHYQ+VI}tG?&3TOb3_YN&=0sw=9my%QWv zXL3$v-M_f)DJx;lG_PD6|Ih6$F^B;C%P=q^Eoxjaqy?vg9+f9+A$F{~M=H*nhf~pU z2nk(yeavk(Bg(e>hhTA}s|vAYN=Zh1mUJ&Csf9kvK`K$%)gk2`ovy<;fa&X>81CuU z_3+30SO{a|52XAV6ieJqL@p{qZswZP78j!LcB@NhiJbL!hforlp=|8XVt?r{R!7689PR0Qxw( zGh^O#V??l@gNGQhTISIqy#pH*?@U6^HHZ}zowN`IyLAQ*?;EMixJPF{^(_GJY~%G2 z8Rx3Y;_|}XZCsH1ax@j;q4m)(DLLONLstMeLWR2iE25z9t0i`v?*XWvH-TlZT1 zB=C1mGY@Ag=u+pWBN99QT-~MMu6oHaXhl1kq9~%E1&2fYm0NE!CVqkbC^Ft~0F6M1 zA;w~di~C^coPFCsF{rVi3;JH7T}>f-3gy7gSu^vOxFR9lQ8<^y$rTZaW)r%$r|_zE zN>Jp1j(k}8!=(0;HM6u$q6#e!e5K*8WrSp(CgO8;bJoOVm{Q2OiMJk zoSvSIn1@E_=&N`STZ?dc!;)4c^@!znj53++cgm{Zl!?cV>5b9|__^B2N?c(EY@=~* zO_@H}@6P!GlBYjPQN(+8$-Dd_3IDB2k)qcHL43+^>WFyqHGl3s{jP3}!7fDT)el(> zrcpxl%7Z-uJLX+_28ao>l9G5}swrT6qXe4mnulv$U?t0ca`cy#>k9yV!uC;0oZL&AWK`tdW z#G-7GoIaFe6$WQphN9k`g&N@fb|vw*dCY8A5vO%#8H;TBS+go`c2%8jXVbS*O?=|w z0Fz;Nr@{BL(&2S~Z`!lV=?g5)TwpvGwh4t|jF7dT#b9u2V;W>I5gKXh9saC#)XY5^ zlwpxlWI`3>D~DuxH1R7di}9X((Uw{CB=s8Zc#jX8~aa8)+c|z8h`@^NZ z)BGIw)L~$9)Vr0s9V`zvBc+`x#ql{&R4Q|Rvy_1?^lO>u%HxMWD?Z8Tq?K7|BB^m9 zYZHp&Gt=D$1<4mzw`>*Q@3!a?_J-T;$LQXtHkn+zi`owE>|g6j?+_@bwHRMz(YYYu zjnS@5KYWMQ(i6`ISH1wHa<-4yLo43Fc+1HnDsp*?zb7NbWlPbGt$(=3i^&bQ1}DPkIP*RR{f<{B?EMmEu|j(8%)x)71=$9_+`#))4$D&Ea# zuEqGC`YP|@mq=OqQ>9k4=fv|uO|-Bq=YH9{kE}g$Au7yPb)oHvAcRTT!=yv)vTo9? zOs%7g5;A&tE_{r)<)&<#)$g}B!!Ca$J0CP!YLvL+f*bAIL}2hHlIW>=N}dIMbaf+h z)pkIDwN}_DNK&w=X)}tPg-lYCn70!W8UHi~u9@2}{NbBL)SQ|X5RL~1mgt53s|NKy zmVW@mnJ9lAC^GrK?G;$y2BaJQwbV%eH(_uG5GPUoJk(S@=&Uuq`~K|gUZCqz;>i_) zwVw4yu@0bF{RAvbfnY2lB#}Ojf%@M_KByCjw$X~K!1#tSVgI~?tj!7|SXok}rcK5F zWYYz|=@D(Z%pv^^U=+}RUH)f{lRd~{2yHp+YR?Sd)*l|2+C{`k2QD8;qw?=<9f_ks zLYZxb64rm%7pPNxS>g!{l9~p5lTZIT1cPM?UJ)PpydY0d3lJ8_NSJUrIyf>d9M{jXC|a8%}k zM`#UIXyV9~lbb`#pnAwiD<)3Mc(u{}8#bp<{-67hwYQ%EB{Y3q3e zETO465(&sLTJz(-I0#|B{8wV!!9fv5=>5&EIUxMx^t&z1MVxP2m8myMNpfd!lU~c5 zeRGt+(rJ2Dmp3m2758CR+bpDCA)pzy1+Eoai(dhC{a6_yjBz0sCXn3?a_Cj;Yd)XP zPD8WuJGBNjqb`goB64x z{vK&hOJaeUK%Gtc5d{}m$>A(T7hQ7&b9Y{^$?Eh}llESN zR*{2gj}Y`UqttZI;Cmv9uj;XpG! zCj^N9SdO6&4z=USjZtn9%SWh9VSya0WwytM-15FYdXMK;evU}<-P$ljiUn_AHVfjg zunc&Y*j*9_^B#M?BPK4a`-)osaG*2WSe~gyMnvCPuLuC!)8Qfmj)xG5vAe%cv z$QWqT7ChQKNj6m(pX?DmZkEeTSK_%a%hwUpfao-b6H6{&uu@H34Qq#4nXN;eCM}pC zWlaxjkoKTFTUv(B!kG|~ODzT54bhq)l4y(ddHor>sypfmLR)+)*!mfE@HAp=Yir0_@K`0Z60g{9*>&q$<8HiGSla}7 zwB&cH9I*a~=2~H`4aQ9QSVhV%p(^cOW05VDH7M5i5{ffJB&hxBA-WK-)6gNc7LZ=+ zes@|`(XvdPgd7I?cUZ}OP}t!OtX;<6VH%(z;zNdt)9_3ypu@s8Y1+us0j0MNS+#cz z$c{Q%&xog!)#QDec-q91jjuU&EFgXDD`KOnGHuEjT7SWO<&}BT7GX)a6tA&Z2ZPYx z8ZO-GFK1+a0}2F+Xsm7P`%$8j!}?2SgClc#@q}_#%SSya;mCkB37&DdvM(U=ovLQ_ zkkUCXIH8}B!;A9Gl9M)njtC(iO*JKQ46c!0!j?;B0;ak?{PW{YJ%(xI2` zxebhWu7*rz;Qzu4(dbq)yAcmeO*IAKP2$?(Q3k(qjmo%7sSi+OcPuxvb~I#n5oGud zNPkUC;jk26#~EpGFbQ|Di;vwi9Vv0UX_CMMLqX{gXZ8Duy{FOXvu0((fY%y1KdZ0+rawe~I5^>QfB1gspB*=;2mInY zN1@I5u)XfB?3-C6*$~TovRWPmbV43Qviz0-FL0 zVyL%Y*&%VAqMPtuS%@G@Y4os!pn`vGpbok3g+wBtr`7vz@xD1jfG96WEF0qHrUpdU z>5;z!P3;9DsD*hrhsDGqNhA{%`IG`!;p<6fIEQYe(*VeSV)VR?{?YOG_WG!F$V2sM zf(kCQ2h3-JhrBXQONLv+8_#z%lD*`*&q4G@ap-CXP1!;jEF{E2|KJ9}E<8c;E<4n6 zrVRuH;}{@i4)mmbx?N40CejEKWRnyS)F?6j`)CE{YZV!@Knz2s#$7j>&sHmCvtIoVUL~o!0#jD)ePql2R^icb(R~ zNLii4@C18#v?;srzX2tU2rk{Ykz^1=gc2gcUruN_+gN(PVj9Kb{Kma9^`XQBQZ*+XOe$rYV6hbRO`p7OT?%QsZ#NI zFE%}&i6&K;8u*W4g*-w9G!%ZLMqU=t%vA0$41k#0%5ux!bZte}x0ulnmZUEYY>}li z);hw;Ea{PK_V-ms@a68-@yo9yggF9gP^-%e`MhwEAhi!)IUUUB!RlSXGxPhzo*3m5 z`z#R2#$I7%9Hf6`-5+L@fQbGS0V+mx5K|a29SJQ@u%er;zToyHar=*8stVH=cuM#A1tbWB!0GP_?^}39-BJEsQah0=D`it{)p_ zK)L{b#m-C}WFiw-`BR*~^w@DnQXyB&Phs7OG_C~99NNS`_KlJNaMb$w-%H~|B!N4r zLisvn?p6b`eLuO44tD(GMv^Fvu^LDcnU`@gX&m?t_}lEPP~wi^Is;eDBP&fm#F4^w zR_p11qyxJ?^vQ_svtk!ZBo_->uj$##Pw&o1Zq6(YNbU*SB+wxBV%cWeptt9~Hn zRnklomg&Jj@z)W=K=F8W_$-Ma)f?mCVP%Z$dydkZY2D3P@{B=$tm4OpWKM5zAzE{N zF1Wr9mB2dh8{2Ni=N%2xa==arW9w_`j={QU8vM8VSJ0cV=wLi@cz-mngry8uha5N& z3vI^{g-oH4@<*q`_$MMJ3#~JGZarifBD3NsZYM;Shj-_Ojub;0K8_z1^WvS;o{{C= z3FUP65qtnK($`X4Aqa6YB{3;W_hLZc@%wqnOxV#zYjTi16e?ieN)E^H=*5k<#LtgS z=_mu%2__%Lc%u;#H4b{5WSadB!|9$9pADmy@6Wg`LVb&d+)QXg^Uf{`iphU-wBCTW zG`7-$w?43W+-*R#xTi-YDEs!pg)V|pK(Ode?GS|Q%`x-*n79@jR%<|nxojez7|~2* zKFMHj&*$FA4S~(rC*XYCaC2*4SGD74h5S5QXR^RnX3;C`fq zdy4TDV5L1rRK(uYi$!p?CGh+E{y~BF|0z69BAc{DgE1m9a!qVMJ_fb5E-VE$W%Uup zW1-;cv1$7Qe+zcw#yb8XQsFK*QfSguz!Fb##jz83gfn)tN16|50BvSoRovYa1#htM znOuI0uU;{^?l8Hj6*VcSDn|9RJ$M6KpoCfO-Dxqm~Z(DX0;NBZhvcQ#&;9h;4WVxeMd0lsND3w?IwC_w~X7g z!}jZS0+$7BZ^%UR&qHf9G4rKUG+3J@KZG=Q6bblQ!720uYUgE6b;{RzphI5T`b-}` z(Ub8kLPE5=!|SD``e@#8F%-1=O3PWf@QjZr!({U&W*>7|t}paLFziwoq*_w*U5r@j zxmnUk+LQuIs}QE&Z+>}+xsO+lb*Y3{t30Gz8&UDW_Yp`wjp>H;al*pE_a0l~37!qd z-mObxn6Gm5>bNBfBz$p$MAse)q5fm2^rNu>b)5E+TNK$3f9msT_t@E7>kkLwd#UPq zTN!jDUEBg$;YI1;+z$m0VtpVpP3oEg~={$>Dp#@MV&dJF+5FnM1 zmX_UR*zEL!=^t*%KtPjen~si(y3UNs%}W!x$u$EO*~#S5C#ODNWk!*bkQ}&^MsE*Z zgf`@uS(+4a7KeyQi}P7nlyNI?vW;TUVsh+ONwS3t4_xa2l>}<`RhDct^K(;`!!}5P z6EUVrS}FhyF_o!W)@m`D3@YjE@uwf}n=(_n~g zuB|;J?GiU9Z~jbDG0k=RTC(=}h4JCppz8dE@#UFxm!4{9?{!$hvPbUE(z~=+vlP`#5yxZgF_p zH7vQcrd_wJePPD<@c1>&*~W7L7jmE_As5}00V6WfL%kUWTpQD@?lY8O>?t#Zqs(#+ zc_H?F$DGi8OGJ=BUkyptpy)@*UQY#mY!9}0h;kYpqp2ADA#fIf74U2jtOXYJk8S(= z`wjYM0XdwwpW)g#A=N$f2f`h)c}TX07UJ|$qmoSSRE97@CNPBWZQ?GCV$7;F$b~iJ zv?4;<=%Lj|8*xqF_D#WY*FmLV2J>h99Exvx@AYj+B1C`pi%a@YtHISvlx zgA4VJi${rB#byWdX<#wv`e4SVy4u!jv~O-Vs3xlk9FB8+XXm@U#>Qw*&j&3nEu}uy z!36E38UYKqB@e3p)=#%NNQ=O2 zumo2aaQt4-ANs6#LYExet6xBq0+!y@c0R32q9s&(YN5!ucdN$T+(4X z1~?s0eBLN+h%2+5l=QkUU^ub+3bbu|_O=o=w-dVq1H!s?BpyqMUQYJ!$!p+Geyn;~ z6sXH%DbUu1a|S@RKM%heoe zBWwvFzR;TX7!=u5Vm-QqTDnM_vIyFE!#31%0o5%P4rt$MF~$bSOT&$i)WaVvYW9xm zU0{%FUmb1)0D21Rzel@RJwBebHCJeEmL+& z_{_1s&v}Sxtclfqk6LcS>&0;;kbn*%0eNhrJJC*m*2cTMo?Oj!_nWd4^mrBM$?1_- z6We}Zh@@apDoP8d#zemqzRXN#1BC10AI&d?*hC)!xo!a)u&>wls`v4U=A0}_s=qZx z`AU;&t=Kqt$^{kS{O2{8dJ7$BfU=T88adY(`}MWOTM62~p{xEdcDw8zXCq3lYZd-y zfB0^}WMjEk-GKk^U{ZElDtDvxNXO zD*lS6aL=>nMNuq?KUP)o2N*Aq$xCQ2h^Q&SHpn9pH>anl3m_Sr9AW)A8X0*o@7Y0) zlN*cpe7hUAzDi_)HFeaIeG_K|GPHWc>eZzpZr@mx-eiL?g{D&6elPzh-iY_}?H)Fnt6wc#<^AUd%zSMy8 zPZ+7FI#Vlz(Bx4?r>%;it_bTtj|*8i9-Djc!ky}GR(qWgW;BrjK&So%fYLBWWN~Wi zsv!jg#!L!%oQ+lCd=uv%8pyDkO2gD4u^6D!_3HcWxeZe=tr-$R*|Fwy5e)$W^6T#3 zuwtO#=E5pOi&XjcJhHI|PdCyq&n@<5|l%`ha7Yf%5Ba4CUZw<1l zjUWC}YQGd^q6cN0ul~?Q3XIFzK=ybl*fhfMeg%D5qW3(P6*L&&c8X)T{ad9$(^9C_ zPN_Ctrbwgc;#zr|QeuWA+o3DZ=)k00*kv?r-pljC#+Zb7WC5bIMRUE}ELaCMi&5 zBQXXpP8sGI9B!PgIO@7+Ku!j49zHFpYcFV7GT9VE1%#kJ zWh*s^&wx-#kOCo4(F3&)L0DLriC>|D`+}P4!NgtmJW4gr647g?Hj=s(<2H#LEeQ}O z8nVvVOy*N2{f6XI@~AB7?5Qpix28!>9y3Q<_u+H56cjaL)!mGu2P*wOU~VZ{&Ijv} z1KbvyNkAOY5x^f6f|~9u&E}#2pcH-Nw;B zxtpuWWkLV=TRKFME6&rt9@bk_xm>H2D0y^bv@rz9RxdI}w!&s8O0sDP50s;VG-#-P^MBE&R5^)*}-A4*#_vx3M+5QwOxNmld>~< zG>E!o*&RJSx&6GN2YwpMmjlLjrb;+0OiZ&)!;=x04fVmFd$09|U}F6udHWYG4H--? zB}ueJXXS$&1#~6xN4n1xq~{^aZ=Ai7-jsdpD>2QPJJ*e z>+F-mHsWQF$9Mgbb?irTjuToJQ0P|Ul?Sr@P;*EcJ4Q6|OMZz%`)Ulg1s0lWi^zQ+ zIGvOJk{aPmf#SGI4|!YHE+a$Jj|+(uMcj>hRRQav;Kv&nH`fCmn^mQdMQPFpZ>o5& z=D68S(cRMnluvH9+IW9+!j*v#Yc`hVPM;|G@bz3eJxoYJA>^;)d$fpFgb;i5%Zr#b zJ+2G^jgF8IsV_WzJV3n8{BX@hdqBUj%8ht(a+GSxT?L zkB9S>kSQF7PcQ6FXMw<3fjr>YRCC~~ZLf-jk~6oz|4T7&m+3a{n62%)dSgU#&TTk^ zw{vTrIIpX$ZlWjUy2*`0(J?XJ!~x-l0;EfA?)j_ARgl|ij4qxApKj;cAK`%zV5!xW zj#jfN2Y3=>JFr>VV|&0uw4+eJe}SF~5GT7;c4T0l~6Ya6wSHpWfU!?^c%4RJWXy%92KG5WvY5Pj`wy9CJj36b$e&Bo0nWWRZ zj({}%3DOf+Js?Cvh{q>H%hiT%kjvP2c}w_|PPLs=ASB8#nln2j!Z7Wru7VASR5kJc zLX!%$2|oCp%XH}%M4wWNi%lch_=kn`(pp#G93j~B#6+>1z@~+lgv0JE)H{BO1oVdO zP1i7ki-#c^EJ70J8jN@g)Y(uiuZGyDiXHODuJwt5ttU@Jlo!EY|{nWIv;Ov9Qb0S48|0n_)d~H8C zZ@3#zWGH@5Ng>idRIDvH=!fdw-;{SEbuL$~jF@hXUd`-Y0L~|D=k~6?_MluQQpUzt zKOd0W;2UP+FVZbTOa@~8KbCG{#*l_y4)xOL-maqDOT^Mzh=WID`vaOuy8$^LqP?2? zLy)MA@zD`GKF`w`=#JycPoX9Gh|LKRyttF&jYv^Zc7Lf?yFNEDoEgpZDgH@^l8&WC zPR9>>9pQDDrF42P)x2)H5?9W3IKnNLL>3Opy!At#tzlOF=vJo)+VK1PH1fT_CjS&(!6n6&Um0LJup zu4ONubfclKwo#}V)ca><<}WQ8#IYhx6z!0QTPx|my4AIX%>$kW?MWetjIH;P!S277 z;&)RMX4`ylFJt~sYZ%mh@ca@b$odUwT-PJhXpK&HAzexN9%!0JrD{{&GfD{2OssFi z9dVXyTZpo?pqx2$-^jpcl_n+Yai_J4VacwcfXG69%@-H{q2?+b>{^S92S^^z_&!06W(TffkD@)VSeQxzUm&#mX;!KMVPnI{rt3yz15>dSI8!i z!x{p{r#sfCJDZo-@C^wJ3Ett$HTNMj#BLGxzfGpZx@<0VQv#lz1xvW_t!LV#jm6Ux zUbdx$cbz8jd0x_z9@6IXwJwdO<0gfex>+tVkU7BsX=%d7=@F`6>sw=n|6{Drn2jG^ z7mR32etie+hk@mAuS0kp1!KYfvEu9{&pj(oo5p3-Gb1A-UoRU$ zIRKGVRR*W5a4LWMVgMQZG@mu^!=*uAS8v*5#vLD|C}&WJZ<**PV0fuM25mO}o^JVp zXf!9Sx1^ohcR%Vn?xTpKCoWeK*rC9EPFbV`MSNKS^$e~R#gnS)8%OVLo3Q^53_I}aw9)1aNz}#YH?>Xl}jV=E(MuDliF*TpG_V>{)TKtyy^ICr+rNBTCMl@v5*GLEM@t>ILopRQ(`?Y-*B(PA zM4~V<+zy#XK&dopF-E??yV8R>9pP#K9>}KUlKSS`7^y^q!relCHKyXp};Kx13 z!U4cutS=ZpaFK};Re1q~R9mQ@1**k)7atC?@=+YN*f!SjSX3C8Hyyg{h=z*_epycJsj@r=qD%FviZ^;uKPk_GGn~L!elM2 zT-Pz1csw5OY1Eje0)jB`J*Dp>=a)0`K^V@Ps87jEN^ODS zv;zqV2_@3IJ3B={Q#d%(xjdv|3~-Q8Y{9j((h71l+}~#>TrFTU(|1sAF-*qN1$%xWQ`Rr{&b!FYNit+N5)%w16E_ z%8$jzCyjiRIKgQ|5YJlCW*B@{ywdifP7V9%C*t!^pxONU@$l*1?t1a&f)T)=n%j}5 z4d?lu}3j#`Yv;^OBdIVv?Uf#Rl1|zn+vwjos%;ECh0)i(_pRBF z5^06CrW~C;xoK%~Owkk)t->K5o~fKzxzet+r{zaSxZ7;bYK>_jE7}*5=HtHvR>q?G ztw)DjzTE*_)F)q?(a;rbYwPm@bAFt7fJ zZ10FPg@wTHSO8?fbvMz6@~|wAr#x zYiz)K*w%N@+rnAXawjG7)#B3A|Y}jAkrfP5+aNZl0z*5 zBzN~zE`t3d@Y{twFrgRVk5%HIy43nH0?+km`m!Co=u$WPG2#IHXm`0ECOl$G=Sqc2 zy)@01n1|hudg;u*6c>RlKv+vxe-Zo3?-U#WUNj3BFEicRHVp+s^>xt1;n;0#T&$#R ze=M%+u%(?#OU=Y^)){&xef%^R6=|60&~Mh-et_B1{#ym7wf(=b^%g*NEN!%CAi;u% z1a}AocXxNUpc{90C&3}OySqDVG{N0ngS)$hH{_i2-}i1kib_?6?%BP2raxJ0b$@k< zIJvipHCU|6Ap7G5oaPS07emd#%|9RPm%~;;iSYy%!0m6OS=?XWFvHYvTnq4xAy?Sy zu6@(cVKU`z_uaG65=WxNK!Sty)t^C?1goLjM4B;W^*pOF<=l=>VLmUU;L>&D7m3r;5l$DlZe45&@&Ay_fhohM7QbO9nEoWf8J4BoP?HVa7t? z1m~XF<-qk>$pEM_1MUldF|J;3BR~DactPD#c8mc86)H&oRO3^2>K{r?zDf<+W+EpS zdp3+fp_GY;s)^zkbCzs)P*)`EuIZ>@_z?Iv1qKeClZGS~o{L%8K0qb@PmHz4S$mV{ zT$~!Pr%@8Egcmae3K=sNJ|9oBL`mfLw)P7hA=^Vcm?p|A%+^eUj^Lk;o zI3IVm-L94vDuZTyF)qwbh6N%!Ir!hZ?A#Ck-=jbu$_g+d>=HL&77JFl+bl_$pRc{U zU3+fYj99vgE2+%;>2E~erC~yY5ex=C0T_dNHIO4oZUJ_!tG(;T<1sxhipP_Mx* z=x8dbMLlW2^` zVzbQ2{&dj!FM4l4lwJ?uvg!rYORu24rNd3W7PH0d@PX@)Y(xMq+TX@HW%-(ihFn{m z)vOh^CWqAt$6q|#;WI59$me2@3OQEnIfs}zM#133nL~+(@F;*)xJ_K%YcK^LlmX3>)%5;Ubzq0?R$3<8oi7Cl-DEUv zCP)$;2gl~FN4TP8YwII$7X}94Y^zrq2iuz{iVp0s`vD+&M8q;@A0-oVjxs0cOz*#c zz710_iSwA)OLWuFNU`_H-xunM^-!%K&jsQHHJ>_6B0m2QXtA#yMX>m0?booq5(X{_ z{~U1uA5a*-8~(J%Bp6X!x2)^m`g;&W5*`{Ju2hLeGMbtAIOM97(x5eG-vQ*STVlIl zhu$$T%@WZVaxPu-g;ve>pBRsyd(ZJD+;ZX2$6Fj$OV9CHmMztQcVuZ;0rhqOw2+jb zabWfDK!6BAV3iyNPn^;LZ;5jzz%kI=Cf=m>0NpD*6R|xC-fOmZj25qf)Y{)^c&x{r z(%oAT8McC%kW)cABXm_JquuD@OobS-v9c9r9Ekx_5#|wE zKp;reBDj9N(ezIFgkMqipL%zXlj1;RYO%1sqRDQiI98vkn8M6-vKtu*i6x#HKe01C z*=@eYGz`oXS$P1w(LjhvOxzstzQYyWn<#!PhtqqVu_c%cK{U4o8aVk zQhDlH(1I0)%!K-?pZo}cc>RAt`$2BrZ&4LwZ>?5W)Z*fO^An^6H8wU3mOG-6jRe5s zGG;2LNs3TW^ed?RUmf#HV7REacXl>BH8=roHOiS6@5b4=XmK5guK;77#*_N(>@1eu_3TTGlH&T;B~%C*oLr;QowMuf1`b%k ze6^WMHYuR?DoVwm4E{Jj0U>2)2G1j~_<_|fD%c6ERaeE6i}CDSz<}SJ^7)sQQ3IDu z#(Foe_5pkIy4h>K<@Ot3X>!H7W;(DjqtoJi`}>uE=FZ0mGmnz!ZX0nL24K{hwwe`D zph&ZrtZW1TuL0KT9~5qAusJV7EEfrnvp|a+<0^Zjr!Q0JF9gS^S#fxFCVoxFEh?_A zpS)Vc(-LkYrW|+vNX2yND+8cXFZiALQi(vHfqPbOpY3@^A3P3wT!6d=z_YD~$S?p@ zl57BAGr$6p$30SUaEJidTluZp*LiQ8lS-z8NGFo5`zD_kJ-{NtAmb}!c8T^se;#wm zwFGxW5b)_uJ%9R(*Ddh9`0smt=5nIc0CxbUaKmg3)Ccg~$4f5bXV0H-JZ1|3(=#WE z$Ll$K_8gy-I8?R6#XNBN)6N^5gn*EVQ!4lnmRuhSpy}^B`j0fk70hk%~_5W z?dPFg%NOmh70MtI9jKM4pdr~x2B6e|5>;k0V1K(cP@v@@D?bi%_+jA=(NHGwdtQLDDNsy;_=F-}dKv~pHQQ37 zt2!3mrKA^839>1NjGYO`QD!3}-{OTMf;>U}&NzBY&i&Pa*gu0Tahj z$IiA*4@(Le0GWH(oMRbOWwZbg;0$&qe_ETiriW zfBY3Ey`gy-G$`j_wD8&Hwb!q*;CxX}=rqT`(l(9@+x2*z2}VndR&F5ROq;_R z%b;P+kI4<9h+IOqiPU`w4mx*#V)*l)82CB&6ZtyXIc7w)$;qb`c+=AkzTAfSaa!3K zashGwXyzEbGBX*QT7;9x-RhS$i?O{+(fJxn%>tS+uQ;>t6N6Rfy)G3gYgWbBsc6%M ze9T9w9Vff>544hJ%tP-Ce3oX#b&WTYmgvjUGx$|9CW5|KZGR`X1cDV*EbRtqlPlmq z3rNYcd(Qo=n9rybmP<(Y0(zDUHEj(ixe7IiKsGT!@DZ5NFJlh0LqJ+VHRueh31kfb zrLCaz_+@Ib25)Bl52HJ`Va>%N(n8<`=WfHX!M%0R1Yz*MbHixmRRA=5ou(QftZSfd zX>AaSn|u94p4}YoNGJ6Mp&h!C*q?q{ak(kD=Wvm5b2lBqogncW` z2Cr2&y5d-%4&rNQ)nFaui4BC2Rzr0)h(_Rw9|l9bDUG=R;#o7=m#xZUlV7>_2{gza zl;Tt=0!HQ$gN*@H8`|^bg*r6b2_$)Ol#4g=_xeCW0Q@-eNSr_ptk@~+(`qpngLz2j ziDwtyA6cKg-?`D~=K~PQ5L^ghbwP$|qtRfeT;3`%T!A-~^!{hNO4ThrV5<^<<}($^Wi-ZnEH& zN8e(2YWVzK!|c!B;#k}lp{J%@qq`rXHjW@p7Z(|r0L}1H=3^Bb;vS#lo5hymfo4mu zmb7usrnl}e&Dqv6pVb!{|8nlJtOP*mgTLVLL_P5}o_3S2;^DU_POCg`ykd{8Vp+U; zq)1LDT!oef&)EiwzF+k}`C;SplodjTz#QV7t&)ho)AnYtO9rT(qRP z0vo?R4Yn>kB-wBM_Qg3p3=yMgZt=Ua&C0P|Bu=R|v?X!Gn0cv2; z$e{i<4hnOCW6zO}J=p@bP?X2%m2UgKaUoGu@f=vOr)a275qd-w5WJ&M5POfX|r)imLtgu%s%Jq zi3WqWO`P-D&gmMqHG8(axE}ePR+XZAJ8CdU`zF-~eLVASl;|F4huKzHOx#Cw27S>E zXQvaWzjRolv9UrcX9G8zq#YPxylLW$mUfA+ui$V)=jM!nt?(;EcVjoExRJrVC!|K| zBig^UE74^MIN|43)o2gbp3Q8LEnzdEX^VV#KLo|L+8U5!uxr0=(CTNfI)h>0` zeXeH5Wj75fI9^#-Xn~Ho?b{ent}R+(ej{^8(f6kx>FG4{T^dK_FgE5l9i0e3oED~% z8t27}8#EPiK3~6!5H_66bfxsMprZm*)ojwBC08H+*YQws5jlLsa9lRIXb$8BydKG z7?d$?-H25jknBtduwWRX2*n^i}zh6ju`qLDI&{Jw5uTtEm>f0gcy8NBUnu$W{|3)@<^(8v9KeqmVQw-aJqt(A#(3pH-;Z~v+oORxv-37z!Cf6zonE3kX+C24ev_!2G* zJ_@fiJ54m%3DsGwVbf@``gL>=0D$fKhN4{g?>fqu9A5Q7J0eQSVtfDTs>*z5X40zY zcxTal*+YYhIB(-f4O~%5rSi1{**F`cVl}b6#cAt*N~rluy;*$xI)Fsx;{IOB`OA(syMlAl~8hSWr%m*&Tbx;JbMSNr+(7=s_6vtEAg*m5|(p_;Yqdg>8F zjfY})chC93i})AESK<9|7Nt)ElAO=CiCF0ZiT=hQ=3hFBblgNLGHd|0tLQTKy*^O;ML;|B=a3)*JEnbJN$ zzxiL`zrOID3O$*l_(kDsPQjWF*PKGiP=|ZsGBtjnOX#zkD*-VdHdi+Du+$dTz)Eb} zFIr;3cv@aK+3}TB*en)--grReKUy3Uq~orec`0$8V(QZHo*GZ=oX3%o@o6x$v`OcA zhc*TMCyfp_5A$@n?uxUI>cxT}f`57qPkbIekeCZ98E%_47HhZpn&qmE?h2=?$JWbaRtWQ$wF_Z> zL~pxL$n?Z9F~!?GFGu_bGCgCHhY=#sn6StpI$=oX+22)pLZI{|A%_Yjc)=HIQ zB&8%iU%zngW{_yBsLk-Gl`ED?o>qE!oq}&?XTZB@x71@i@W>YH9D({3yWa*x0i2d$3L?JlxoIK&y(o1EsDNQ#otuCt?xlya%$at?T2FNIcYWYTGt zIhPyYxCDw-IzRd@iQRJt$Fc>v?_0PfC9A?MtOaQI(0g}>03>>=w}BpZF1&8i*&i&2 zozuc>7kUCPo)>7JM5V>Q8}(w{p;r#f>q> z2Jt7LLuUWvBz9dNvK@t}RVFDlHTK-}{Upp}2%NYJ3CRu^rQ`tS7-nXEXrd8BmHoCGwWGwE$TU(&Z_!Z6|U)GSYQaNd~*!Lp?GgTXcg{FNT)e zmv@T`?2!19X4Q2;Cl|`|YFF9V;|l=QM+C-Q8xbz%%>3a0R#X;^aPONr+_}J&y|O1GnBy}~pW@NnC|4(#wMpdq2L@t5De2#I896v; zT9#A4i6W5cGyjswcu(~1HZ=iMMVE&hdm+V!D?iHqJ@PF|jPx;lxVC4pDDQ^95vaBo z)Ue{uB22zjh{?MCZcu)Uqd!xc|309%#Vj3ae~ntM{s?}I^_NvT)QyEX$W?6fLTX;E z;h;MEJC8xvS`WV6cgM&Y^mXX!F8@`01#c@>x(?k-(pcZ%Ruvse;d9w*MHOQo4YHnq(oKU&@iNDv}adiGA6sIQmcLQgv80vfq(B+BdE^^)q5n^$w$I`{UPaGRqN6G?iIV%K$s2Y>PiVv>J#H~jC` zV1eu1@*Cb{oOT$yiQX!^_KQ%hE}inT6^Y zB^*z7h*Q}(zk3N>jR|%CMAX*SPM(teZr7zR(i&|#hsikXB{*xOxJyATt#Xdd_>DZ} z;<$+nG`H@=_J$oz>2o_eg4gr9Jce&N55jqj(ZdV6f$~WTx^A`mx|wYKv`$v$C*Xgr zQAOuppURG0L}Id>XF*lAGVf-2q5OLkFsM(cT#qRZMxfWWbPllwav7ne+} z2o`d3&GaAII>y-eKV{PT7Tcvm>b;UTW(lm1;ob7#hN|h5a`F%5^{qe!>)%a!7u&zW z>Lsz~R8%M!A`GtcgHFHUwwOaQrXBYqq;Ia$~wMtej3~a!mFt*C@{|cyRE2s;- z($lz$47)1=`~f&kpqVwlwl?+Y58QSr(#avpZK|T0iwYjc)Y6iTC1@|>#~D+8aq-N2 zdzIq%Vz2C+pew`oy&ZtjpPz3+KvOc;Uc|_->D=0i8^(|pa7|K8gNBASa!vjAz6Iy8 zvyF-NsI+5kx&1(U9;0(~893)iVLlglKGhCe1a}wKkdsARkLC>4S55Vmy`q;FrRC*a zBKO;)K!q9X>1d`EZ61c&%7+KCP3R9tL$8*;fGqLGXg;g6gf9EWX*8f$rtfc#=fUwJD1j0vz^m}zQViiYQ-^E2 zpJljk9|^RlN2tA7&t?G3!`b*hEmU!)+#Hjxg%l?jUlK-z3^2)?#9;qhfr?i@14G_l zH|TXiuexg1EKCK-G9jSZyqcB}wEx;wzd>Bs+A z{L1)M;$;$lJKz5ed6-k!XGBg$CjaTlQ&P$R)!z=rJ=oCDu>a~Us=r4t{W;|SY(VX3 z6`LxsE{*&W^b+9RKN8`YeMQil<)r@a>ko5wu8ND$p)l)X?T89!)F6lJG5D(wRiSkL zJ3oj1#!Grxpi`Mq)21}231UAq6^ZgEcTKaI%DdI_X@eg$HY{u*aKej zgdJ9l8=R5H`P{BsbPmv4OUfV6Dd8WWS6R?#t&iTAxpEG`u1(+ zuj*jjsNmo{V&mYr7=aRLwH3rh9bLNGJqJJG@uNb6Ue7= zx+i_eRYapYit=*28YErFjdxDyIxK;D8}G`n{oaFARSP)L@R;-D-y}J@h#32Y(J?Uz zrH718Pc!Ei0u&l1oHBc*b}gAc6T~NAuTiaw@HXNY%Q?*ZZZOzQQi|y^D77lEC@Reo ze>OPv>dMiHjMKWa%beQ=MK4A_J=k}NJqXsfiV4wzdyRP%2qT^VhRO2dthk4RgTQ80 zM7|eMxS?TE?|ohCoMG#qBTMU&w@L- zn9lmxZPVbTZpTaB(wX|3l}>7r@M!2unSlui)*>oqBdw5F_RMmsDv&=VC6tJw6u3;q zr8+;-FG1^IZ$GUb2xbr!6-BosXO^pNSy{w5AOb>({zt+i&T?$O=FTTi%L~SNj0>eH z?oZwV>bj7t$m@DBiA0gol7ja}=(As5WQORrjJ`O?Tq=8xu@o0Jk0~jbNZ6@(R|D(9 z4w5c&=yi1r+gHOA-54+*0MgX4*r}`ni zf;gWmP#ymNwY1)?u+Mln3Pg6NHAeJ;nlgJ|_WNV3HZWd7D@MXfTQH4*L)itVuypnO z)usMVH#o_<-a@4=$A)bvyQ%9~wy{XosjbRCG*AcasLrWY&)LmPqi6U)DP&VpbNA>B zZflwOL#E`WIp-He?ERj?h}ebc4#?+Q65Xvt)8Mr}jdouYdlIthxQc09{!{QjTUf?4&hBKHC5kGMmUNp;H`g}F+^)#4eRkD|7Rua0tQKi%@naqKMILRYR|M8^|uy88nY{4EW{Eq8!PNJqAZ zf0kH~`wg0uyE7~ww-KO()4IJq9&_7};5_-%0Xq9}O?{Z+Zk*JC^N`WLNjKp@o>9Q( zG2$6AyR9GBfVo4~^?$~O_M!dz0gnKklXpah+l{*Wja-NhC$A*w;-8>D59$va(E{$z zS44-4wPW%NLv)I{x9n}gAXIC+lW23!e0-reIWhcu*p*AL$=-zafii4i$tt<`?U$J^ zjg@~$RbD7EXd3#i)X&*6AIK)i&u+V}u;CtWZ>I|L2r~0cc*!$eBMyM_|t)o z0R!Fjw9TuToxJMGiC-yS-R^@`OJ9HdhlscH+;?w`@G5LyG)MYc+4fLZ-tUO9c zBH8y#wzC%%OcGaLA}K6|6z((C6n2~J?W0*SPjiYZ>jbuhgbwZDANB`(T%CS|5?d9! z>MJn`%)r^|cDP!GbK~I7p!WgYU56fApC=b$$cLc@NeYl*$aUy zX=$V#K7xQF323=)t7k76JaL(J8 zh%kQw65NPizw|aMonTGa>>~aq>KzOa-uw?tP^YG$V?pd}kDY#W0!?cl+5XrXfPU$Y z=X<$n2h@YTMq1p(@}?FRvH%aZ@8jqw4NkK&0U2!ELu`kR9#J0+$)q7;sg14x21~3v zmqI6+clc6S4`g6`6VTcv175!j?-_1~h8})qU}dGfu)M6g0k8xM(|!ZoT{N^^6>T>Mwg?wbZ4$^$I zPb`cGZJjK85`3mtb)!6>E^wpl9NA=`>(Ey*sUD5rg{t*-G9-q%$KWn<;T=PhY68Uuy&sNZ0mWL&4&P1b`@iR#o zne5h9EhjR@eouw(ORa9PoM`Dlz_D0YoE8%sJCoc`nEU;w!q?!6;t2tfnwFNxk`mTZ z02nYR$s){TWSo3$u_;0%uc47MDqNVKpYD7#U!XN1FmGgJq~PqFz=?*%yceZT@+7@? zbW|`Xxm7(1$%Oq)92*vmT;BBh8nNFbb$n?`F5jq!7^qyq6fN+bi)vC?!7dvZ$juEc zfE@n-UVs37S=k2IQ)ytJ%|B_Vn0{Q|G{`QhTCqA~$NK}(;o!)Sf(fyoFIG66(&NFw zL3U`UOlN!Arj}ZY^u}^crH&pb{y^GM7gV)$E! zR|Am#>($uh9vGkSi;1yWE$Ewrc-=tW-Wji7!&VQZ*^snt1SB%V=?R0}OK{5b=TAAr zCXc>>iwg^%hX8ct*VxEN_OP_c<79Zy#f2ge6IfZ{DQjDy?-qUCl%jpwc=rx#-aw>B z6!^=EcFt1?Jypjt@CHxgN6V!ofW!%cg<|Pu`}?9Aq$G!V_m{dBX&?3sj;?2ymdG#G z$~&FA`ZlLEU!r|^IuvPL@=jlv^N*kB27Agd9v&VHHwmtxwvyf7-n6RzKqk9^`c+$5 zxyCMce0*G3SI4Z*YPEvhHtgs1tD~o<;AV#6XsE2>cfsfetDMLR+^O*-831i?kLAg{ z(R0+*{W^^d>;~Q5HWJ-VoFPlXaDw0clrZkRcW}T;+W{?{{<(utt`3fII=TNh$+5qW zy_=x8f;Vg1@lAT2$0M2genj{56?J;cBy9sy3d2)LH@&ng;lI&!Qce$YR%AV_FJgFx zuP-Dxz%Kyhn~DbgSfD*6ym!(z}P8U&yKa zF6^0#TYUUI4XUlFRi`!y_@t57+G0=INDP-U1Z-wfBPAj}i}AU}t%5sZoVOy!!Opek z8g>G!`0g)WMsadV(~iYK?uiH?OCki%-8@(0+FWKXvUJ*eT1E;-$AAS|ZIjCjpj>30 z5WSv5WLhSAauI(x9SwJIEWvVMFS>HHKpoleD#O75tBS+i#w%|3dPNH+^6)9grJauw-f#3wh+!NKBx4NmTRTnc zx~+TTn~mds7wClBde{%WFr|V?`Oh4259ew%8K156(=>@%huk z(JVhFk=~~Iqc&$)d#)NspI1DW#QS2NZEX=1mkjcY|Bl(B?PSVVhq4o9Aj--a zww!GJM8qzNq2ub;ZnWmTkVCubIof(b>2;06S9qEc_G}wEV}N|TkR-^woxSLgeHt&~ zVm&NG!15q5hY5B0evh>l9HB2LsX&3J+J`>qGo*WENbV$qYGhM_;70a0IhKBk)$)vr zk+N~A|GceI3SvU&DwEL~L9KDH;P5)rZk$T;Ng*qQoSxauXB%T1O7>()68{ui6d@-) zbT8asO^NEWoq|hyS%2D2=@H(IgkUr;qq3E6u*M-`GQA*Dv$q#?lG_p*!i{LMui3{V zoQgb7zNRhF!z^l|;~E>W;!faIOco^S5fm&bNkI_nHa4cD>-cTe^_p1w>)w8phzAm0 z)}whS`Y9Gp4pZOg6kPFh)~(>@Z3N_Ky27MErkG&Qm8%(o=|OxIbOLFr8O*ipMLUOJqCrE4lF`ZatmCqxJ*u@C{n8O zaJ}`W)8eGn(;+M@ptW%w%;B1VIsCCbGwSsn#asm1?|1o^WNKy1uB z%FyoY7tG!-8VUGwEGpNCf1t-@-IouQL7jUBWsZJC2opO1R?>KNdpoo(aqQs#hDTY{ zfG+}yTMTy7zw~2KP~#PH_$1%c9*w!>X=0mjgGz$DtSTqZ&F}Q9%9oy{?KE8Ew&yoi zHXOfg-0UR9^C*vn-iax41*!qsIxz6K7Wsz8 z?wV!&fyAq2IlH8ir0}o~TTFo|a6y6npTyOHjlYLT{g?O$?Z>bTSr~FvdUeY^c9w`Z(O5yR_z=6k$E~)%|3=9KK6( z`7Zk|1=KE8x#phO6;v*rU6~uX=s7EoC#Och!hEZUldxI2Aa#36lVy3&WjD6RZIo!4 z^LT<-dA6n+oFOQHjab|3GE)ILScrAuyrDsk7-~C8OHt@IYs4fPSX8kfX1~2pk$F~l zW&yKvE{|5XO1#Qo7RtFK%zpI{QWLsY(YPjn92oX18QlzH?8{n{%-w+gSi3%O&Wi!h z7)@djc~5Ggym$5GWW+zPFWZl=Y`nm+#LEO}CrS0erOIINGWM#Xih}T|&u{04y+@>} zj{AOvhch9UiBYRe=Xh zfPz`t>&xy^?tR!3d$}WPvpwSGdGTZ0@BYo5|515dkD}-(i-gPmOAlYx30R!(VV~ha zA=mVq$;XKdp$j;42k+*mMGJe^(K#QDLfOE+DbI}dYlri)L|J(z;p8pSi|G#Vm6i_e zakNXzSyx%)Xscn{&~1iA?y9;}@J|QYOt5&mVpeit$m)-)i)v2JDU!5+{WuzDv^JV6 zsq-ViR>fw#^q4)2QteUL@lY_1rRM*1;_N4DE+vjN?UhR(v6WNo>i?)<%zziAXC^L( z)8;%ZH^q*07r++)8p{$3c@J|QLGJAumf?fAeg^HeTb2k(9D%&57cdJsbvzwlOHG^q zxOJ8$xu}*xrsEUl{=C1{df|v^yaVTz!&oy&!}iy%4P%S3eVt(P*HaCxkI8sV8H|lZ z!I(SlT5v=Q>a#!n!>*J|ifIe;NU7eYr)0!SZ8o?dOSq6$`3{!=^ryj^Ye2jCQoB*= znfuiiRP*oT!nnGR3*UGh%J!R$EP0+W%&G_YB>K2KKcf6n(b5sAtNnJtpha4khTpZ7 zZtl_AtEj0+FD;6L&7Dps#;DgJ^78`vP^nHJRRMX%xrcwyE}lumjs`)eSGP& zxParNge+M(VnBX-BqzgdPXl0LGCeW89{gZT$Lt-;Qr=SzeSAdZaOA%pb<4uUm&>Uc znUm8v!2X2vJsV<)E0n!`TBI%M2P>P6#3?3f?Ny+%Afwq(yNqz7+9djVN7mi5#RTWk zKX4)`VQg%+Lkq+3#ZP&vvS~>DHp(zGfLVx3Ft&+f7*+%(iSFfqxFEDCVklg@&Oi!l zd)HV*i`d1rTaKKzEFMm(mV!qek1SvQhEeS3oqHuMs%B>hEhm$$aYVrcngN^xvxUWS zP**py1F@3*$X6Vmmq?q*F*;|Zr8DOIEsTj@EvE&M9pq2rm=qQAl=&D^yO1!A+d~!g1xX> zg^kKvH{D?m>mmE@AN)2wuYXtR_yE}zf;#`Eli|C~9ITz*GuH|N;!IQ9jAr-l2Rv;! zA}>GQhwW6gxFN_+F*k|!qtA-q>Zs<&$}#b`OakufyCwF#r}l2?>G^~r>|*A`jW7;f1W#|T#r4Z zQkg~YBv7O&A>|&uWm8fBL$E5~7G8hVDP=RxwAL<}&UELIPsBq_O&iAlMk@R1L(7L6 zKP1bK$m!P;=P21bM%>gCkqx-Lt=I;{;lbLH@3+l-c2$&|LOv%^e<0nTiJR`Z3&ljz zge9>z^`ssqEtHww`Gh=3ov(d%GW}(SMMEv2>sJi*`Hlegb@!higZoOm0Ni?foX*T{ zTE4i|4Qxu#Zr7^R8r5o&mx91D8@;-@QF3vq1X3WkZg6d4+)%PRs@>+M_jYImfWIar z036Itw`O!fUmHHRi|c$Tz?}LV9*&?bFVtd$w=Pa)QN_%tapfIT;}K zE85zm02V^<E1Kj>#ox;WD;9DIIU1?UQJVz91%hI6slO@R;%%bg^7fb z^{p;QTYIi}PY-|x688tP}k-M?;$o7I{6h?e1a6 z@b|emkIjz7vUzAsj{w}Qg?7WjxRF~>pb89oy4V3w56sW~&Pw*mW)rfq^0Vh~Snu3U zn#bt&ci0^s`inikrtCuHGI+<1LdI0M%$Pfqz6?>{5GtIWfO<@Ir zmd`mcwL;E@1_12)PxnKE+XzP%?u$nTbw(h(!R@frRs=WF3rRFlvA*K$Yzc50hu4E| zwfy+P0?merJTM=eLp?vFmL4a5wiW0h@^zPtDuUL80r>*}W^cI_)i!Cch+d8_Ay0nC zws@WzQft}&eIj{)g&FM-teX_px0{#)%m4KBJAP_BMU{dPq#wfGiIBDD|;xao!Y zUixZDYfF}pr*==jpwu^zJq#rZVv}&4=8fULo<6|l^$b(5e0>5$JSEKUW$%j0Z4xe6 zF_hfM9GiP?dpt=Zy^d5%M$-!6)<~hq+w50ANhg@!vykFe=y{``Yr3lTlol#nClsUl zyc}ipa`+|Bn8!YG=3_-zf)E#MvX(*Wu+W`!&%3vJ(5R z^9aKcR12T*Xv-!F2&BSJncvisjVMwI2`p=`Xq1=6(UbN>?QKbhMB?o!~J`tpLR4$jS<{?i_7M* zZS0SxRX80Mo~S0mv#~4I1N{c_RUh|$^_}GT1>{)iJVJY6pylm^KtCDeKtw{{(h29f zwV2ZbwY7vLKB;WO3Vnk5zIg%m{bmzPo5?HHmnVthL1gIW4cka+Nx^gWOLO0O;jKa7 z;;MVNPSH@#QpBpjt;+7#u3!{ssf3+2{Gl<4X^%gM5367a&WG&VaqXVVX^)fLtF}X4 zaWe&jP;4CzQMyQ*9GKN=(Eax>>^!2{f2d1n)v=-P8$sHy3eN@AJ z2g9A?_xyk0wDq?nJu!H~8-rj%4HN-n9GcLq#*Y=7n7{F#HD!uYC2$3M-Zu&LAO_>R zOUcQBQ247lk^#p7;IQNgpZ(uZr#@EVx5&LGiZ(sL%p+(wU-dGTvDNlVW)S@$S=%fY z%9&ZTb(|CFZ3?4;;*OF2l~LV-`aelP<9yG1udue&I#|f{w_Rj*VVCZam(Hldr*k+o zG&BHh|6clvAjtncw)<2Y{}uH4+ei6d!Y`_?PaDYOWvMk`A>25U<1&AQV0KQiF;g;5 zuxrNr2#CUg>{8~H=MYL<b)ES^XJf4cq zLFhU!g5srP^WcK51(Nf8d0Zo`levBPcBki3i_+QsmW|Y( zloS|)#m9dEX}cfVt#7hBp;=onbH)}{9(2N=U3tEpnOR!qr=%Q9b$`F+XNM4i$G@RT zgM~cmOObbZodr8zK+zC__AldV%;As5P}N9a!SK#wg-IAR)u-Eb{cb{c8}SNyV61UR zKuga524O-l@B|J3SK4519WF9jgAU_u_veS2od zm7Uv6KCiaJ#=aT9IbNDh?(USxg8jsDwJPxrB0A##?-oSLS>q?R9xbr?r_b4^)o3SL z#8Y!@{0_f3N|qI~M@d<;a{^YZFn9$;qHH~M`65_epUCUP+Wp7CQYXq!2)2+MANvOP z(uS3(H8;D9Bn?mOjHGUjM_og9BZZ?gUxCFp#EQ;P49*=>dw5*TaN$f}~S6+ukr6r5f>dESW#zU5bBv-t*mwq z@fz#uhC-&LF|p1k&Z@)!44jRPjuPE!a9dhU4OvE3mO{<3x3||QQB~>S5C`z)uC##c z0ALZFoNvA)GQlE0&NZ1LAiDJ*;we<@5&qH8Tml%{n;Y5p?~LQ9IC2w@d6mfK%H$Boo)wc zRI|u}?%DQcU_c4zm+ZIhx*!0K#;tFvqKkX`1zDQ^_76ZbztYyfmfO#OxWH71)lp~@ z!>q2+MK>F@jLpMfp*>qC+vl*3s6D!%QyYF|XSZ^o4*Nu-332<4H`Pl+w5(sEL*U{ye}^X;@#U+xXJczt`y%8D-&a5fJ(2fqw~j`tcMFmi)$%pvO!DQ zE(S!^R=y1lNSjrnsXyzygIyeyyt4}v%rZi`k)0B@mT7`sgwqW$jS(*wB3J0(vxbKe z07Ujn)#RjNc#9Q~o+!Gzr`U^-fBvk_%{5Y#B_+8?n3Wbbl0vzyw9G{@r8b zDLU^dx@1%s=Fdn!J-o%;TfLw*v|tQwoOKglZ?UIQxYpcM_CBuykkl1dN+3&y751!Z zT51qWCoRVU_c4*QwCyHxYpz+^kFSY#C%~hnBeQGQ^K(nvbUQhw5(>;kt8Sp98cpxg zDCBXuiPy6P?e7=ds{E+l^1H3Mk+Jr`yO=lF~WcD8ddAmp;6_1^T4uNXOo zUk5sL670B@Y)8(=fxe85Y%X7j-BtB^EaAunJ1mzTUBi1UgZK?dgEcugjwx{G8HnM%St;D@8o0|H|es|BQzhN%NWXl5faj&fIW1? z|KoiKCj0X<&_m|wJL0X#LrXT!1~(0jc0sJ|+%Zn)+tXqzXzNF(tT6~~E-EUEU*&L# zqjty{RxuTJl?_;ES8IMzO7cP#RzF~oSc}2xhtK`EUx!`26g=ZWobW$kl9zgso@^yA?UIghwYm? zeR|TW-p2BRie$@y!(&r=2k~mpmX|*c{;LHUmibjogkx=xC<+d%PB1-1w~9J zpr$KLi-o;hOB!vfo3)Ka&(StWs)xLN)l+@{SGIyi)dVAMiA9h}*s(@;n}otePA)ad z08lTn^g^byF>gBms_7~W{t?wAIYkU4dK=$w?HErL$ZRVs)uub2lqEpIsuuq;7y26V zu2e_YsDz&|%Nv!zwpnlE(+TRU)dfEBwf~NB7mt`~#!ts@PXGaSe=sm0f!aIwg^Fsb z!6 z1=J_0lo4f4fSXEtb=@29{$pWh3Fa!abNBbZeG~=)ZZmIh+4rOX6R9ti*e1%z$oPec zDP2+VgN;+lHsG2z-vQDbH1hwE`KE*s{3yvODH`m(^ysi8n4QYX%3pd=uZY!cfHv&E zY8UCHalcEieDq|2{cA-X94?tzSu38 z1>g)`^~x^Bz0761c^xw6_o7BkN#ATmT~puq5aP>V5!@=#o1)K(yI3c{IHK)h@`W$l z$o%wbT8mSdyD`F_h2>L1p(`*r14ze=kSoK`RCcik7qs1zBMZsJO1P6_i{SvT>});s zS?f#XWs8(Va3kGimFWD)Xv#w3QS)hM`!Zl6j>+Vl3_u1K7R||C>WI>p7eiJV=GePT zOkV&hC{YfN!;iiDQiea{py7!T%Fa5akoeE?LZaJOg9@anD$&{X+4fiMb9RWAUX!$* zUi(L0z^F<--kv5r0x#-4LS8^Z0#Zr2Rn*Y!jpMC?jZFu)%?3u}s|ikcG(Y`m*;G1@%e0?rr2uKLn81!Fr zMwn}pQ{e}b2W0@opFiI)Bab0-0Tve-3=TH7Dy@lpRB9+s(Qpn|F~#%qTVSR?LKZ+F zFoM|Pr`Po=f3RnwtXs{}gULFv$5?v~@uK&;-{oFDs9mQ!6jc7Urm`dx;X3%^h zg)83_^an~d9X+N3S8kn6NvI7E{Z8t#=m_1G0>Uz2WAFEUEVX!LKt8;ivWT&9#`^=xxQD&_-JavCAk46`!uuA_1ZOIiN zAbNyt;(8}BfAr9;wZ2f^k}1#&=bn-2T9f%of+Pb4yHdq!WZKVNR-JdWmFJdmKvu?s zA6f0CCyO+lIn1BllN*u#-3x2Cm{@jsyZFa8M&QuLk6S}CW(`(Lf*!XgIbM(ETcIpi z$@?4N%?s}`WhKzu7pMmzuxE?_v49tGQpp!Pzzs!#J_6jdUC#wWaMR)(L`(Ey~HVXi6{f3uCV`6|Gp z`%i#}X`KIU9AW~!%WRxIEh8h^W0)%@I=W!z|KaPcqpEJZcV7jRmIeW71*E&AyOEG? zknXNUH&P<7XhBk1x{)sFmXPl5J~zJ4v-kTu`|R`28pAQb#ftC!o%5QX>zYKpF*oDm zm&u-iAzlBkvW!WV?q{OgXs758Ux&P7p|MAnBPaPW!UJ$VookaPSo5_XOXIiej3C zgZXspl|18THQ$-LP1C9s89>$lt+yYa(Q(X|$)A|wca3e%9lVROy%gCqn5P!z=6UPu z90UZ%!vh0F^(h86ul_wc!C4Jue;Pcxy0$h7N@BCcW`fz;a@lhf&@yC#nK<_aeES?V zcuBIa?6=3cf#!k~?&n7ajycNWdanZcy3{(i%*on9L%qc(&dz7lYEA%7+#E zqjoLuU1&FJQ;mfo1_ZQ^FH9C_kNn&{rcy}G+W}l)?*`$AlFd)oU8{Gwsl4v00MuPr zc)Kt*(9C4X-!}B+WP_2==SGfcZX1^0=) zw9pd3eexOnfb-Q!<6{wEeM(EIjeeE=79fU?$HwXo5s?m%Wu~Xz0#Rsq_%+o_HUnd0 z6%J`xdm}wCa%Ul7etM;3ZQbO+4SSQ!URj!+&Yx78?7JpBl->j|>~S6D8b&Wf32vi4 zE!(6-z0u~z_46QyDeV5{fpw3~&CRGZhz=XTl%z}kKfA8&GQshYH^}Yrf{`QlTAxv9aB%}_} z^3vY81F$qgvw(Av^!y9*Qdp-v(enERh0$U|wVZY{HLZCR`rwbzmC;>#zR3!vPtDEs z6?QSnHnxJtf4;q<$x?4a1yMj7oNy93z4YOKqO~Ykocc35p|7Tgf8#c#$a*5OqWrI` z2~+`L?|%GXS^M0$i7`4cVaZFEYVc2o`4`PwPVo2l`d%=@5HfIj4*iwbRb;D~NX&xE9O?CM@DufO(5y?b(9}3lj2r+Rj`LQXV zHAPb_-$uSuMv~v+YZbaY=PSxiCbFIIh!?JbTJs4${d}iK4H49kL4`y;&j9ZQb98W% zi*8nl`zB9gVwzu5m+pORLSld#whbROYBZ6=HtUkme6P9e9I?`?^bdnb$zwXC`?-)1 zl=|--99W3B%2Gy0+t}HlXR@-MfyN)RMl3NkAa7LD1)t5iG>v>ZmBNac$GuVy4C?*2AUAER&Wdx)z`G<%PS2$EU%J{5Q61^iGTCshy53-f_BGn_}fC0the^5afg7 zKOu`E@W`v?V(+hHcyAW8Ht*A(eMzH{kNCd~rsdEs%DW{9aUhp2M>U_Rhdtw)PsTX7 zQ#;$WW*4%68%(zpz>jiQkD=Cnx(t*$?g70 zWac+1RouNFgZ~_mvTcrgv+N7w=Yy`zan08g4;bgO9t%)0RTYHZUxP8^lBy=NN*Jp@ zKVS<9Dj}`mt-mj~rX4-8ieev*-sz@>riyry)TW)|QYzY&MS8#DkriW`_BjRfGGqWe zoCvtZ$)gM^OA9?CNt~;afp5k^%oIR-9v;_V&;&dP+~=ck8Ma@<#R1Ttc6H^xy;~MMnlwDcN!sn5 z%mgGk5s~#cteWcTLJCuWg|0bRQ?|C;wJqj4xt#adF+bjtgKH2&1puEZNg_!_3H&F$ zE6gPK(~~9{bksaR++yQ4=V-y)Cw%o~e&v&yEhq4Vq8*sG0X^pIJehHAYGQ(Rxtnef z(#-67KCv~}4D%G&5VDsLgx$C_wFZFNVP>HRqL;@P1;AaMly_jS-?R})i0Ngv4Mucs zYqN>Tz%KGty~OGP<=0xfxDo?B1Q8&Ov1CB!BGb^&8p=w2-`#BQ2SY#&P~NA!WGcIE z=D&DQeY^c$VzBkG(bDj)D=U4Wm_9T_IH>`O1on5dyE5SQ0Q=~4;uNs`O?A9;0c8}z zJLMbUba>BCflUSxsO~i9l%%EN$t+JlKIwx|)`$K1wEz;?`wI^AVFKTF*`FFL<=$F2 z8t4H4-=?22n{SLuB6-pp#~(f^UB0S#v0FR&Gu`z2)*$+4A0 zkCozjv&!h;cRFpX)Ac21WGbu3URV#|TJOD@_3W0fSF^t9JP>RRX;BlenU9swplCXs zkCUNY#%ZV?tJWTH9g%h>Z_?U-a+X?>SI81MsB}MReXM;^Zn9RR@qM8ou8-Bk?9hv< zYIn|NH-t?Ajn7AeQ~KEafS&x*XVET~0*Ui%gtKU4-XNNq#P{Y~;`J+&)|Eb%{5kT` zp>AnecI9S#M(K^eh_;D*!=sJuxi<0!mtOY`{rbARj6zUEk}bx{6v~OiC}nQ0WY(U* zK}91FskYf|`-0(UEDBO>r?i3TH{`wRtzq8R>nFM<`yQJ>U7U^`$0#K$;MUgLI|_vO zO(boSw)YPVe9eAsN{@q&zt5UfOn48)K@HutI%RTr9vS)R6N=pNGQm#hMidc`0+XZBkqQq#28nI zz-NxviXQcjsjxsTEbDSSN|*oY7jYpjUkYjTw^J%ibb4nDFxkiDC3n?*@U~{6 zY4QDCChl%*vT-un(sjP}<5u_{1Np{5%bToOu1~RpXP>j`kw%_Q)JS`Fu$DV#7ni5o zV1ig#Kj?4Qy(luhgSsVT`{qsOk5Aq; zAMNfO$l-AC#PNy*(kWVBb-GuQ$xSEhX=l^3DqM0(WaltVmFdXfxa)+Mp(#oP6&dN|vvb**dn}V3`I*?i~oW48O0>TeP(X6vf7kw{p{jog3ly)(Z!ms3&w zEa5EIoBE4QKg<5<-~;c4QcK*g|`-mF`VXsGexlQaw6 z?_NQ8!X^b5(}wLj89VRF-E5>_Tk_kc(F(q`4Iz8i@_%YWB6~_Qp+o%I=lFT7zA|+U zQwB|qRqoxI<5UwG_3yg0>Ww#J9ieVr{&qz{F{IAvd}2cnvobmCI4a4B5;*SPoJvG| zmRLhM>EE+A3qRasOnB;AeVb+Q31w22y|JC4yFbVa&NnB$_eD3rA(->2b z;y*C*x*hf~z&toac!8}Qw#Q9Q6x~-ky#N7bE|5LlQq>(RspPQKJctOYO8&HMrc&tB zB`f{vlhKeAE>6pAjap=2tp_V@inv^{CXX!iOzzdwA^6}c_mE!%+^5a51m;ifo$-K{ z2EhJ?3dRcQ*Pu_bS$Sn(O78z;ZiAyp&fZpU{l2G3$dGwzb3_45Pt|A)ey3sLr^YLs9VpWp!upx5QvJ zfvW|$lpSMtpIXaJt3MfMV~u+xqWTRt;`dTQ8n+a&ys=#@O2xNb9p3XZYSIIb0ui`> zpof4b)uSc&eX~W?&7kNlkIQ$|EaJc)lW|v{P*Qy+v$P^|&@2KMI6=D_a8NITL_XB) z^!YZfkjx*de0M%ySwkA;ugp4$o+JW~yFNdRZQJNkj6>11sAG$$f(8GrM&#reo) z5p}){i1@L_T)x$^$?J|*Ri3brm2LF*K7`KML+>A#hmIQ`T9j=HowLDy{y)nom) zT7lzppHipZTZ&|43UqcQ>Xz377WAw}oAV{-lB1^ za!big{%g$&qXiRJ(po472fj(&fL=--{t@HbU(%N6Y|zl!>ip|Qc0zVfNvowz(B>v{SXPl@jTD0GxxElQBIRHS~VktvYnC!H)xB_Zfy(q#^#IVZ_c2zR|}fhFeV|_6#rM`V)Dv=)7m=>M? zRM;jEeOuow&X}!4o8US{=Pr#g6S&QET$(~7)la`Yq~nS05e#+q>&`j3-$5+)N>!D+=5I$><$y(L;985c$! zYL#DmankQyxJ{q)B{`KYWWa2{?&G@&_KTvrC8n;%JN|1Wxv12LWvv#~jhC#)=ff>t zR)q0G9$VAVY6|G1Dgt(2hzkd^N2)7M&!z@Ue?;S1=v@zO|MQ$uY{G;oc1A1;oQ@q# zVer94C<;lX{veC6wMA+GtFCe=a4S{dsO^VGpxJDFa6%~QpYfc@OB^96p;3|Fy+T|D zj8N7tjI+%|PnOrwgKufHcakspKYqFjZ^`k}L)lw&d8o{58%Z!~dCwEwBBbZJk4=Ou z;7u7UI~p_f&Qy0f;M+b0batwQ`%>UnZWa z70*KT2c8qmGIS_*{e@1-UcecypmEyU%p_h_kFZgC9~GK*3?aNAVzD$%OKK>iW(VXL#qqAYvpTz1|NcAUTXexAqiP z0t4<7eAiY>%lTryYmr)7Sd_d^>?qkTttLU$%4D-)TH;^Ns;G=iioFR)7t0wLu?GSw zO`49*x*j&qy9O~z!%5PY`1o&1y8%=u)HS|f!v9kiY!WDsaBQs&)RcnhlrWcz%A}eqrRr4NHV$N6pl`Eb!DKU%!2mse6P2SaET> zy1T{Zpb9GUt9{ME9Vb+PAUL$OJs6dm`}XvP#hR2XRRZ!2D5Ev!6Z7*%+T5b`BG;@r zIdwXZxe4#|t@O|jKuo^H1IT6pl#aLJoo zZPJNm%<@%*3n`rrCscB-8%S#e&>yS=-*H1h{vAU$?xEWjO zGIr2NkwM%-mAG)XA=5b>-9#UK|LgARH2q;l?x2M}#9QHZeH~_L*_r09?VHNC!NBfQ zevxF|Y%2IgAw6Eh<9p7-8lAheO)k{PtDO~vv8(ixJ{;mu(iYFl#Ze-y{cO{PTT7lP zNWwjE?TWJJG)xdh=$j49O{c!XH6kI1Gix{cH{<{uD+2_r44-PrXsrWYT_2CMyMOs? zO!S*Gl$197f{X8}1M4(!I-PgFg5X|(@9i8g+3w9^F{(q@2=w0DKuWfpJ+6UAbq)BJ z83*o=r}uZbqFTuIf7*5|YPk{LuM0<`@1D7@)jaE{O3e7Ctl$%@YlT^|a#-q2GM8{@ z;F&ugeSCrRdvS6|&#Q%MtJZaEG3k`p1b;2pB=t$ zL)z}BM@r>KGh4coOqcw5PFyWXF3R@R0pOiA`EPF5!TwdD}bG zu)St43yS(1d=J%E%17_UPttQrZ%qhM>GAFD(IE2t2ec|+pO0Jz^;e2$ufYAQIo8w7 z67ff0vWGbT3O_gZt_h>79 zmJaesIzxpXNPqp(YBzWNvYXvdxF6e1(136}7j!VP^Q%QgEwtWiT|@yB*Ia{fN~Qlo%h5_3Y~(uTnle zH7zE_%y7TPu0joO;H}5&Ox)tWwen-S;C!&k43RX84&P>x`Akhskc~INr+44X`|(?{ zbi_EB`sUB0*F;mweC{FndmUq^`0n?tr>9{Y1X&cP{MX0JO&sQ#Z4hti(U52c z3H9gqiPZlSsbC5dsSTz)pFVv>_Uzd+amPTf%YAKU4jx)|34}mDTKJ53K4JpkRuBIE zeapBt8B3)8;DMZI#1JmH`S$yl=1cWR=UeM61&D!wj^c^!`^`ID7)BHhc*};UR%1XLR^P4$2P_*+A?pC z!dN_{S~5UIH>b`OVCER;M%8pG!FcD#D=#OXqQXpR2Aoho1g}SNET=h02m;xiH|yzwjtLEexs?4Ft3aU;qJ2@>fR2g!hD1 zCfNz5N>J=xsFaYrEhcD(E>~I%L?r5c%@}XV)`oI%YMr0fj;hHlwp;x!^8q`xHR1$~aRKAsLj zX1=6?Q?2G=KEZmP_8>7t*8lKu;X=vYrQ88SZcujE=VmQK3`dA~_Ab4QJd`ji=a-xB z|DL)+xpGX66BwK|;xf;#{hi9kc&b^MIH*E;^bt_w)Qz)^d+LOaS&ql?Ej8vPUoBj7 zTp!X$(@tFnfe|-&-T%txnD94v&aW(M(y4LwK}ZZpmifFPl9b*gesGsD?;wxP>2 zJ++ceeNcPI0NXs3-IYmLhPh1kPp%wREAy4o*c*o&@(|Fj;DNce1)6W z6A`>9AoT0k`nmJbG7dfi<>~2Z?;!^nIuzpyFcF%JqwvQDSU~MA-tHF`yywba!- zJIkXztCj-H07r(!Rs$*eGl`BOXsyk2lNvC7JD!q(u-?9UAZ)9Ja&ZJZaQ?DZuZ|q{ z2o~7>hY(Rhu5K1*+~%2}X1|pUZ;ES@ikaFw@StpG4<6kQav7dC8*7;LoilG7-XvLz zO`JAE4)Pw34}Bc5h2~(i0_BoHe%89q@5d;mn2o9$SBJ+$BD`S=xX5oH^$Q?^j*j_W z+Huk)Skz96%(8Oqo*cKa$;pP5t;+~{+BP_}_9 z^^cgGREnr z$0}ouC@xW61KGF2{4R2VZ~sDE|MDl$4zg7AtmqN5j`SdO6MAGaN|+waP7RiS&(>fA zRiXy_?30&t541*C1LWKtB|ty@;}4dC}v)kAW1~S~7UR#?b{c6^f4rzeaja zu0Wke-EQ~UV>9d=mR|5Jd`PX`OewIgT+zwH8v$h23&m(F#UEdA2G28W|-r!3CU0l;Yabp9$>JZCTFB2 z7S{i1g#1u}luCUz#gRefU2ik7; zz%`{xV_5_N`nyVmtP4^VxblZbKdHq3(KovJH9-P4pUR4&H=#*XT84FXM&NBtsOjY} zLbhbKWtUf8%T9Nq+~kU(HC!49OtzFC_}y_4FM+{0Wz^>09w$SGSF>U=6bh25-~t@k zxVW;l2T@Q^=)E%S+64CV%F7?yq=n+Y32jGijo1j#$fQ)T*eezT0Lx=zV|h`jLh1^C zsc6vd0Uic~s-0e-F{t>a3Ub}tVS(PHgJgN+OXg35EVV&VPNd-cfv+N{h)LVyg9 zbi-a%ioa#6d@bf}l$=)&dPIAEI)JM5Oi4&|gD#0hVb3`(FDjIGV!Syw2$Hl>08Thw zh0aDqFvD{Ho^oud{Y7tD7UY)V4RO>kUgCRmKHc(BS>?{HtzgWcBasuF6(nGOwpIuJ z0=d#4o_>IT97edOb;{MdXqoyEZ3a^ltH$us0Cehd2=>=UMy|Iq>6m zQ8u;L@vgJLf+p(K%ROR99T>T&hLPsaMCaZjzf%STq5^vp$-VQ56*fWcpPBU~19oFs zdmQ3pBBK?wnx&0xU3?dldzTaKKMX~`?w0uZ0J`E8sr&-oE+eHf1SzPv1Oum0uV}Q& zx%8^SDB)at&N8U#>P{%pe2A^c{&!=cX*VUWZ-XnFf`ZLne|!r@G_yzbSybPW?^%=y zXS<0UA#AG-u|wp#cGI_|fM+(~%o-9Iu!~DapWN^FS+jd`QP(Rnm)EpOT+YcRvVt+? z6!5{e9)q@op|+jfX^(JlMPsQX$VN&E-LOp$xyDb@hto+$8rK_JtJZRf1AV^{A_5gc zIoRFBn&}y33ai|*18?43Fg21Z|6DbItJPQd$2`lEu3-;?-DFh_tbNq(IEy_i(a8fm zmXfi`!~%Z5wxnBPr1G1 z&l`Woh@PBG9>#tCEL_*Yq{`3jbN?A38r5lYcyAjKC6p3aoEqBqz!jVRm{R$_XcL z;aU9D045Gjq(cx5O|~b}NER3MC+vxOc1}*8=g4$0lRvkX9le-#f6u^I*}9K?5b1Ds z*H9wf(t-lHd)y|8{kjm(!0E8p7F>Jmnfq?R|7~{(meS5bB&6(4w%ag#c8}{}Cb;VR z^(lCT2*f`%WG-7dIDI0Ol$BS5GijlzW5?_4S6V2xNjNQ?b(N=VeLa%jKptDULZavG zVBmQ*0|Vj{I+Yp!Q9Ba8PvYA&o)>As`!UghqJO=Gp5sMc@%fw_Op_IYXQfwG-9ij+Vt04#9 zmi=F?{kYub)K!!y&;Cyyv9z+hSSzQXX;fMsR=b^b785tdxRm1OsD0K%PLX~*O$Q`=9{ zNXDBYbI1m9%p2LchB9$O;s-BeP9-%af*lnUGER*^H1n1BpWC{A7P|dr&pRYCn`P43 zrZ8%MUmPr9nm(jWis)CkVG3yo1|NcAzK}*n$t$DhDY52rNxZzMOpLl~f6F7%dHTo5 zb@b;gDLau*r1vx*UU3#}DpH65shA))NM&-_E!WuI;2_>ytRyp+Q2E;xS4$3<$To)t3DU{vh8K&7L@ zr{w$7?&UkvL2XP7775q+Um`|r2IMxczdy;+U?wwO9Q<{9I1+HWUT0EoZHx2qBg0-o z`FCLi_pNDHQV(Cc`kf`uEi*AB@JfUi`Q9pY1*3yAMimo*jJooDZIW*rw%NxyMC?!0 ztFm^EjJJMmT)0DBO>P2lpJu&ec2%tyj8$7PA4$JVHndT?Ae>fDJPohW{z;sYuYs-Uv8gKb_{fnL(l@&T(>s;y^!9T9(6n3b> ze$IVEG4{KMsqe4kV0TqE@mT|i1rUcD%XMQpl0>Z+!_qOtmBXd+9 zv!gUgwFlFisPC34El|=Ue|3ZJXGfoyyQR-Mh);FEV9k%+d* z>zh&iPB^|#ucGlIl9;PLaUsg|6}AtR_15Cs->$TL_kW5{2R&Uznp}6Y-?=8fAN-Oy zD{fLu2vin$;^w#os@w3Li~Zk{DJ;493;Sx6OP9Od`+3M4j3c?>ksyAnzQH3$DT?vA zysVs@vZG@PwWRcT(x!HuLp&1lNhC|&Pd_pO!V_g(-4c+uWo!rnOGmmQ*T63VARCxB zM7$C042_MyJckbnY(WOUnaU4Ir=d;s)(I5_A~s!XS!xOv$aWha8TlL6Eb9cC<+x{! zQcFQ4nV~W_n%A*{vu0*y+5buALPeE3uFL1*W@cN(!1;pF2eNKVvmAA^caYkh=4WTg zXi1@B3t^yX2DZMhS>zR50_ozzh>jUb1lvOI%Q_zQdb)7pewSAne?4?uCp*;`c5^W#s zXhx@^r(b`}j}H<*pI8)-Tv;($Q=!j7UWBO3(Oid^qvT?zl% zC(4p#{9+${K~GB?dxg}7_>pg$=<2HO*miB*Bx2D-!#&Cj-H^Kf6SYj)0PQV;&&t8Y z1zM;>ZX$0w#*%5NS0duWYq>DjpqAm#cGgOtda6=?TiT|NI8+X*sHxx zpALzL&k^1!#BWJmoQ7gc9I9Db=DA0hm4{lXHjN7UZ3dX?cs;8;nBw=ofdm`^p!x zl!{C1hBSD4#0u>i*OT@`XCg+#-ienDS%LzOq-){48}u z_a?$c-9>c>Metx!*PAVFnt)CCZ|_aby;8aX(qY zkp8LTxX8rdM$9tOk}_Y9#8iuhiH-cMXNcgk*bl4E30~t~zhJ5Ee$!b6R0>6Dp%KtLao#kbBeb&F(PmjtBq_XeSoAB8X>#ib)}==mO!ZJ&iV*NH`l{BG zDdL)6fl~1WB;YZr7W=q4StF7uszrv82GVVb!Ti{a?e+;cU)8vH#(zp&Ppec~hJU6- zL1vtg3~~tswiR0-H;~huwfTE3KO7ygxEI(f3EARH2ec5eD$y?Px+2{3HGK= z?;s~Z8(}}dD3KM+iI&H~pcvN$4WEpLpqmHdZhtbAU z4TpT=POnSHXS;Xi9uu3+p!Q+nUrTCnq{n?v16=TPFwbPY&M0-3j4aMPEx zIQozcD9RGd%izFFw3j^H!U$MlA5kOSFE{BcZB3v3dcLt4ZZ*e&?$+Kr0S!FA&+o$F*23WBKd7Q!R|3==G{etl9)Mh{I~A@)Wf`2Jl|~e zF<<;=h-Bja)&5g({KBxAI3uEct=+HLcIz`--*$pY`#WdL7B}4m1?k#WZf9K@Z0W!< z4>7uM_=u+0MZp~Bnr5w(2eHB(%eK20W;M+UB&8e)@BMmtN_)u=p)yDZQCEzs#9E<} z21I&{c2DSq>B4a$+Ti9Qx~CS6T#byD(L(W{c<_#{`rbGLNd`2&W=zpjJv~sbNgdbu zHgiA3{vT?QZ~KOFk9y5#DBw0f7?B2gt;UL|)Y;o-;h|()fq=XmhtAK0=)#|ZyNns| zCxum{(RkIx#ur3O*`tbm(X)cPR^=4O7^6m_c_#Qi+-#QfiOOwkZywtb6E#}G@AbXP z!t~=j1Jsmq58X}AvhHCFqH3j0^zO-bTEcL{P1L8{f!$e>nL}EhH+-|d ziWHd2_d=(rW+R_+lldqrW{w!QEwvh7kBzZnqYu@eP2P!PDYEWFe?%tBG$Vs6>}uTL ze4(T}I68r)nwQrTi9oh24DD6&9BMKDh4`6j-?OEb(I}lVm~VXTTW7Bl;m}x(H(w=; zFoNPe{l#$i0Ylbl9X`S4*)YNkr3^&wwnX_7Hm*AONaCO_W9(wEaDxRAw2zOSMI>nu zZ26WrV?Aw;c&WuE%=~-Yyw#2vGblqU)1-vKFRh>nHEDt5c6PBu&ijaFR&{=bw#PrE*nK#KWPo; zM;9u_u^CXc4eMI#M`eM?E69!A!+L&F#Wa{Cp`{G8VqnZ(^g7^UX-DZ!#uM(a9dykd zY2E_~H@py#ctg0oc{Nsl5%Q#e<6azfZ$Kdvdye&;2rSXB(-ot%Nu~rGbm1onLy&{L zHI-BY9EP7^%ofdVe!3-7O&`vOS8NNjX;8wRva*ilyVis9UdU+N zVmK?$f1*dn>d-LSb2xOuX6ID`b(g3gS6(NwCmKRSOy&?h4o$W$=|f0YAvRKn3F<^y z8kQ&I)1n5JkmiQH%@*IXeU?xoH7^Kl-lZ=$Qp_Ko!o(rU3&om`5RSkLW$cKK%IkDg zJwUdsP?!y?#X6$BHy`Q2B*OJnd*Zb=xm4P&O~P1T?ACs8b^vVI{)n>Ww;Ua4(=Z9T zo#@bbOxiNzrm7A|3=`GRLeo1Pv|@uvc<{DO|I>`;SPC!B)rn*oP^JSD=HC+(8viuj zwvgF^U7g;*K+)@oYCFz{*Um(!DrfJkUV6TW)h{qJ(5Z-cHF@%31=lbT8zmw9c|uGX zHMNvqaGDeU;;~+YS=sqDY$rF1?Sy(%9 zq1k<%8USUNxX9tARJk+16-Jr{!XVv;)) zaEzzqIL)bOf|(c0{F6>@?o`-Fm9^*f_I1=e>Yg&G=qWD!)G2hS!Q>6u|r z)$Cu-4{Ymq*2Smqo@lEtKMO=^xbYw}8sTesZBqpfcL_`uJF7eKjG7FPXz@`h zU<=x%rlfp>i_tc3DHJj?Lbje!5>hLze(&M&JU#(e5k%S6_c2BMN8SsJsjn}rbhV9C zG#CZm4jlVW4Nfo>7Ot)rz54-_`W*Y#zrTMnSvq%X{Nb;qstS^xpX2CY>wBK>Dw$@D zO-@R!t>t`)uX1u&xBC+ThqTexhXh7tpnV(r8Q=*-dOq@92ZxHa#1~Yj z(0o-|PPu5fyBgZrNviJ4NQZ+c>4Cw)zQe+Tnasy5>AFZ7u}*S6KAp5Mlyw&1^EX@R zVgdgzsJJU_^&$aYwc;I{D_37f-pu^G?z=?=Sm!W3)eLC>&zJ!D*psxECioFuFBddX z&d%|>{3PKvq=slhPy3M#ao_{PaH%SN4fcwkzxEEe;cX|5Rdrnb{^FW{N5e2WIDc9z z>*sUsUTqWAU~Qza>uuCnp$i9ftC%dsuuRxX3tesP>=F&~XZMs{ke!<%$Pd{mummjj zVD(FeGw2lhD#IX4g%rE37#9FXU*(5h4Z$ZD#8#G}FT5!Ag&T z6xpsGqYpu?pXwk>M2a?05J*NrMO*nBZxnu65gAjn%omb*!)^6sV|NM<4&_yZQ~xYb)*IXna;XFvQ*u3t)Zfq0tTIXW&d$%r&i72tC)}!%s*$x&O!YER zkUjnOpg=NNT+ihI+h6iPz&1!o!3bFscf`|Yk;>i+|J$TnSQ6sy_9LzqAegeLrumWt zKMyhAsip^Acs67Y_Z)|ELD&zH6VCOPMpxUJ1rvk`uzh02D8=egR^(3xQPy%G(4=l; zWaQ0B^sd#$vCKvG^pDP>+j@r_zw-mm2*+s|RaIOZY!K3NcNfssT>YJOij|*V$UBzg z)6L!JkmQsb$MK#SN_Zul>>l#bri$rLNWSt{c)Z}A)s3*qv<%wq@Ysjvs)Znvo~@b@ zAc$Sfl$4aB=_@(KTEBoil=8$r6vwR$Ga74iZyI=9wN1@&&)UC#m;Q$1-B?zt|9)~l zP0-!p!|GJ-nwo?sUVM0^@hGJ)#K7pUqK-h8@01+n7+C9vloO`zH}E`?J#GN@t8heL ziqzV3d>R^LN>sDC5Yv=q3M4bOSF)loDysoMHD=t{pT>LT32O(0yGcubx6O(3E z$oPHsx0{FB`bsk*sUly5KK_IYWDjJI!gV z7i}LRkTuI%)$Han5|VR06u$n<-;YHVlPME~Rr-x=r565jId$LOZ%6XE?r&4s{?Z$vNX>dX=aPsEKC#( z1`d0Eigr4-!FIn(hq%0YsVT4k${-Ca+#Zd2^L{!A^_A9?KlnUgSs=^&}8 zzH&~?`|&rN3M~17#&#v%H3ZRYv9{1s;^o1=EJ?nYE9zB51+YYF zq<|tHL6g@M5$~Eu6s^gNX3##_2zrKkjNam-6dr2k^{q&RJ3cOsg0O(n@XBHYOplS=DFAvm4CG+9p4L`D`nyu|K%aNJ<%4O7aa zgjH#px2kOVk^DHYEmnF3Wd9RBYhf8)a;d5%1 zDolA`gh#=3XKg)W~`s z%ux3AC(}S*KqJzLvaGCZFJG{!ekfk`tMZTdFD~k5^bss9EvfeA8)TYc*ZXF`K7-Vn z>|B$E`UnHueqm=Ld zjN`S`Q=bT^}zN2 zig`2iYf210d#2^uSpygOjJciCN*_ic^yQ{KGarunWpZ)Dl4UGxx7lT8cS!RflO;yB zrI%13sy?|O0ufaEh3C#~5jJB?dR4xk)W+!ok7=0uBMR|s)c~EB)LZtFYK{4r?)12) zuP+A6Fj*e6e$w1}FQ#yPZEa1`559jRiwZ%tr=5mz4*3h>BiQ*jd2WuhVY#1iViE~! zT5IF*jh2;R>9ZGn8_x*b&8DT8McB6Otm=w$=+F3W-=u^|#7vUe2u?#|Z}5@+sPB$g zB4X{5L5!@p15t}MGsRw{ngYeB|Ep=p{+8bd0jBY1B=5?Q5LKnyZi%_X-P6rT^}0t- z>NnsiojV;G)D7$M8ltN|sNt8Yv=x#3OF4*t6GPoK6{C2O70jzTe_r0g;UhQ5{*}-k zoulXqzNmB>e7WK60}L<`vwfT*60$<3b73c$t)A=gm+#Vzdh>vfk6C<;3xn`EO8aXD zbeEC-_GYq!fZ^EqB&j3~bO9D)?;7H=;7(cPj~|1aZoG`f7Z$`CTt1AowxY);B=p4F zyzF`b8nDNxq94sMdWB*us5pdPTf^O&a6hLk5ME6Z>y^@vi1_?m*CibAc*4QVZO#t! z9Z%CxINR1Q*%&8qd!aCgq|Js~Db-J*{0IS8 zXJLqhN$#6{77(^{df#fnOmGUbbwHBE=jk%s)AMra_j`Bu0#GmFYNwb$xnPiL-a@I7 zo)sWo`txiPDl?b@gYYpYWzW&~Hb%;?M+boXN@P{XL`9{*B4yGa$bTy$(*EzF*b5hz z^Y^a*4^eL!P}SD9ZA*7|mox}ScY_LugmiZ<5RvWfB=zp z%|k2NFiJ8z*flI5lWj-=r9@u7IksW{G-pv@b?KtR!!G=5y7}Soq^^oX<@zN|{K$=0 z9w=n2ArP40EdqMnlba8OIYeU>C4!fZvOq=-=r;Df97pN)3)6P@($XxkskVA;BO?+oE{LhJ-dT_6q`c74-mJNE!kY$cq2s*9(*gaA zE}oL5&0>u-YQ9f|0B2I+!?gBq-A)%PnX17OJCq z@bw3f8+z?`=={Oz$WDX}vJNH$C6QsU)Qv(D$`_spz^r3-TGQ?v&k?VmsD5H!4Hsn!}?prx%-@w^YvEH1Aa6?jL1 zZ@s08J2nWzg1*}Zieo$vgJwNt(CVOV(h?;=jXd3WS^A^^rVy^}+c1n#*si3yfd(|jBiA`L}ZGmBygPS|4I*dRN3u1fY@VE z#+3Dx=Oak96I9UzbE@?Gug1y8e+DQ?QGs3;AQ^LvwkZ-45{eIvz*Sf`n+tUe9YYD3 zDzPZgxOBosYi~_Y*6QouM(c+;5B?Zq5;==$~?IWxzV9LKCU~VX4uzS40WR7Ukf)Z;_Zdnox)hv3FVH z)D%CYJ74Yq2e@kgr31B%U&?m;1jMF?ovepA7MA`g?GR%{i9lnoMMhGht%9;%Afik) z3WN0^F|uL}0V-RUci#GW=^bzoy}-n{tSoum8m*56Q&w%Jbnr1=%x=%=%m3ggtRF|& zR+XuKra+HtjYR^`65@bNL69-bdCD?|1)3ujg?z$sULbF)QR2P9w z*2EP{&nY4MNIQu7&10XkWN|{uHF&kL1q!RT&r=ihndJ`($-2vKx7M1w4@`PWRCB{vG&&s%e9^L-c%n z#C%nZA;?h3@wFG4HOm;8^NpyA%E-V#d=Hpgj#86TQE9#azxE2Dy-t7KPXcm&nw-#s z%>yD9y5E)dUu}5w3V~>oezz|;8VFT>1$$vqQs^k}iEJT;9;604 zm+S-e)4qDhBAKk~!JO>5jp;B%oQ?J+YuD^-vK^DnlWml?vQjb?4Ust;9D*q+U)6d9 zsD_bQ7n`W{V2cXqbCa^E?4P(<_e9(84xWR1O&>^K)?eGMHFr@`gRQeFa@!&eI^4gL z(W(V1&4nC!>x`ZHTVmiVohKzBQF#$5^8-46fFx}k5#d(BXJMY4nhG|%uQw|G$kA2K zYn@HQ7Zl}OR`4SQMbBvq#=wOzNc?%F6!^y&!#FNiUMJNm*(ue05HN8Il+yZhe?5$^ zh(8yv$lm|?>zAtK&&=m!{D~@0hOdqiSAi|iUR?WG`xbCO%G76DTc4zD2|H*+HVL-O zIkd=&S_VL*2#XBY6O*O-B65_+@>_FrI0%{om0NFbQC*8leY*SEP{Bi(+>fGn80hHC znr6o@z$zc-Akr8DKdW3Pq7%4k>Cp5^u09>S*DNh{sejIIivD5T0rAVK3_#=ky!C&( z$pU!U$IbRH20UXxRYjm11O6eftveH20C5X8^S$5*@M{|#19PB{O3D3iR6NMncXtzB z2aA#-mx8tT?%`n`+LrAT_3VXygq-yOn9X@q%j)eVSR+a6P(zGfs~v@DVk*r7KbnmV z0A1{|J5}@p92`gw6buf2M!?B$YEt_JYXZyAB;xx};UpM%NA$oC<6}j1WC6Rs*BL%L zOT=*bd$+w=YGBL?H0|=o6q0to4v;+E`_AhsO7R$nRpsPT|KiqRZ-Su1I&PoK%kc?W zZ&#gqLP;%ZMa2)m2P|Fcy3>>SirSt!DmkFaQpjIuysFr{O=EFV+bFW^HQt}lrvgC) zW`9LI;|R+WoCH1cC5$`esn|atX#3DE4z5n09E_Tt-k|<%3IR;;)t^7vcM%jf^uo(6 zcA>V}YZ*r1f`%*6*?pWmdY9}O%n2D4<%hmY$hLAeIVV_Zq@x#1Q6<{kdZj&{2G_Hg z7Z}c4*OTOs0ZJpe(7!)xo&~yiyzX`ucfH z;Om>Tv7s=aJ7`(?$(H`f1I<0?@m~Z}Lk&56I=173wzjqmjoBJeR`zvl2>EPmqO$ho zBX1!ao}1ozsPuK$p1A;WJDxkClOh}x zt^EQRGb6=cySlij0rww1`WScTTT9EB-ysQ~iU0eTLJ~p0;FC0FLQNDIe?O|WK)3I) z%UFZ{JT?!F`Pu!~>zc=0z3||Xy*$_nDb2+_yi2*3#yA5no3Q1RV7k>KZjbKyXHu^; zHIwj>-{s6i@p=AbUnX^m+efMiybQVzqMN-JQp&he?%#+Wti4Yu%gO6cpe_kwm&rK^ zX0+aZ;24i{{6W$ajvhTjlvxvY3`*-=X+R;omiKqXCu5LMG&;zN*k{3uxvx4zU*;9Q#@`H!Up|@? z^r(e9bs?1cA;`EHR~0ntec@L8wj@FP%_4bY!RA5bcMJ?4J8cNIJq&dhs5QZ6ndf)m zH_P=SCnu+(k`fu8rR5f1{(}WgQxM!)v`StjP0Z*bvJInS3*g)%VXKZ16jPMNes+RG zy9V6E%se5mAK;}?Nf1liFw?$P`9Fd3b~qldY5|F;_ir{yNH~?o&o81x?~qQ~QXG7u z4ZaTCMTSh^DUW|B6|wICf?J>Hx1#qX?uLKSlt=NDjnpnZVkY9p*=vYM!&42ueNz^Z zH}u)9yV^LviWawCx}8**EF5MRoA3xZx>A5HKFM6y1fcUe2s;W6sjqr$GpvsM*c)Q& zyy8xD$miZ(mOdJKoC>JcmBoC^e8*SywaeKnB)a7|zY6be^M)silBV4t)j&_=u`_bb z4xZ5N@U=(rgb=|{2({UKmazG~*%0oqMtg=SZQyY`(O*YeB#k`3UVlO4(R)hTP>4F9 z*sz6>V+&9_@6W$inwomAy56dXptCN~5iN@u5mX9iBR`aAB)lIE0a|{VkHEY;feQp{ri7D?12|!r# z=66_TYdi`ut(P{HxCng2w)_}YH5RF?M}0H26^1FM5Ob*d?I0{${ML&B_3ceqv8&2}7 z519QvwXg6$_#5V*wG_0m7)x$s<#JG>%w$P_+q!q&D0aa0hLzUxT95p?OhuAEqPMI( zf_fWZbajKrA`=(T<9H;>JdXZ-~wb#&&fFApikmlsFJ z#+1Ng29Jn{6ku~#AiZM3<{fyWz{JR}n_)c7%9`pKI9s_cuRjp2-%halnoFoo#bW zuce%(Gt5_4cX`~r>@ducz@)hDq_No(k;y`34G2=KI7?t`A*rD>Qj}gnwlx=)C{lcQo;`a zUVTkR_qU=4X%MH0J0dc&$VmZVDlBBiKhSgC=QjH1sor(BDhtbtucCqgw55i zu%MjA@8ATAzg;<8XYCptzBm2?FWWjyiQ6K!HpOqx&2N;IAN(@!Hd&!Z%@y;ei7S#T zk8^61AH8-o9+g|<4aIUH;EY#TP%usVikoEMwKC*s0FJca|piaSE$jJ zcjsH%l<5;PjWyk^GbEl3)1u7n{;0UkgeiWvoZYkdiv1R-78O(!6q>(!|L7ZW00rwQ zMc({TLpg;w#p4AvMZ@iLZ#B#~uJw=!p3WFP(l0KEDl+?WxuSSYa=%7>^gJ&}ZWfI{ zd0?z27s6jv*hG-s2K*UaaS+Q>`S|gNkW+U>X~71O4Cd9)7FmL{>K{$7`>u5ItzrG# z`S~Af4pnI4;a=nXa&7g3Z$@5y3uyOZw%T8;){h|HA+ZKB5{>V`I04SX{l6~V-32QK zL_EA^+f=I!j>5q4Q&eGI#=znYusEe#cEX48A>*l~l9n3Qv*U(I|C1^Q9?Kh8ppIv% z-AsaWzS&}`UlsV&766`I#vgu$R;(iN6hGIGriRFk#l^F;nriX&0R264l?=eVc*yXd z_d>IO{#3gl=Yx?RDVy6lposm^6!dF1z;bQwEp_HtBpM%F8oE3a&DINblCTc7UF-cZgF!>D8K8n-cZm)_?{T5`RauyyzFS6wnDb&p_?oY+ zOh8i%3k+TD-Qf~Vl!32zQJN==A>diQnzJ@P%+lNm!OYiOsrY3S!lGBfZ~qYX9hJ@P z+Z_h{D86q~fPXq~cN=d#QGefYJ&$Bcw^nA7Cx`Z~^ZZ~D!LX2l5d zZ3e73xCex*C}S>`fp<_?E|?7_86vgVBx`X&e^-o|E^*vo8dh@|xDQsOTf{78s+yy9 z+POTeXzt$uiS1MDofja_lg^g4l`I47_lx#UKu@o2Ea!0D(EQGN3&w*^FPEJw1wwt# zPMyHB5#av*^M%4uxceZPYmO-eb(FVEo4FGJ(rt( zho<;t_oHU|xDVAovzE`p#feZ(Lnx1Mp~%0}zq<;~xZ;NTVytX+w-x7@zJ8^UQ{WGV`)s;TLhLOCPP=A0r! zUMe-;uYs$&-^Qp3bp3adJ7-bK(96u|nIWece6xCaOSFmXk1==91>xqO@Zsc-Hh3|u zBjn;@?7+?;%;;&7eG=eWcBC?F1?+|4Xdd#CSDj`Z5E&Fixm^xKr&n@YA@%O6C(Wdv~Pbc$##a}roP5k7xCTq8Qyc>FMVuL{WV3M zj!kx&f*Jo!U+zh8pTYW$Z}pD8n91}=Y2e@ZHdW_WoZCH?mM5K4ODm!VT>wEi@# zWS!!JFr6ia8lYe_#xXL{^2qV{N~!e0Rb^Bscy1kvZf>6cnrq!t+$R!n)5@z1t0-xh zoiFx$Ur}A%9-F+@qA~j+kW7*{|50f&r8;WaU4g5>b9Wm{?6?*xDx|n_J%$IBXv62t z_?4&gJ>PX%|E;^v$>Qa_448h>tjQvuJYL~65IS-l8dlPTJ5gTK*!RB5)wzcr z4g#60kTpOvha96TJ!FlrCJBETILs|s`SX5$nyj2qicl64y6o3#e7@EiazlA>u}qdD zg;etjkMJdW$Fso$KOA^a`zqodfIHjHF6rW&nFgxzr}3$i=-S=_ca4VEOL;aK5v{yYFkc#pb=cYZ>Ez*mL;7`a{Bk-uqPe(cdTdpzBc=Gm^yC zE%4#)XzG)x#m>I?9la>D;Aphvy4LrN??>~*qPj23x0HIU-(Gr^obrqrK6JTmU$#IE zcXBxH*zY%wzl&pJvB zA$9V-XUdP6_#$`l6Hvy-LlyUC*HL*_@f*g|n_H^7C`#WWQ62#iIm4UMmG~Lc(A|S_ zv& ztnD2Ap2Tdv*I1c^7Uxe)|A|!)M)5Ix}D#pMPr=gPqUo zsT^Hj?>_b}Ip>dEEbJ~hDMoRz1{NkVQ86>~SC(NK-yU9(Kwz0BG}_>WBF=$a&n7=V zss?e}Zz?7Snlgd&3%_k$9i4rf09A(ax*Q#Pma@mS%N9A`lS}cuz(yig7v{d#RpmT} z20djdF}^zvPZ($aRc85zugy-F3fpGmzl2@JD!ylYtTfhujJj<*G#{ILY>)Uzt_Uwl z!}V=AuftIR6a2x^*wJ_4r(iz!bVF|&*bs-=YMH~ZmSpR?CGC%my~a3KauU>L;}1t{ z>kO*SH-bQF_N&WHd-xhQ61}S|W2*a66Tfs_=dMZ3G5ImI(}E6Lxzt;``8m3?%mi%7 zXF&$*`q+$|5pV;Z_cyz_!`5pVA5%v`eE+Mecnpmh=ZV{?{UGm?FaeCQCx#l z)}0J0mCOHF&Yj4}AHW^p7-K>5%h!QF zr`f?|<5*M5g1)WfV6T`Tc>$~POLi#I=2lVBEYsFu@4!F-=n4s{gSXG(r~eo%1;vt6 zaB^<0)TfK?w$sLK3|KH^kw(bq9UP233pYZhsT9-JI5=Atb{PlK;(Ms8?*;RB&yA7o zDThbHT-rweVLeFRDvr4KgL2D{Lp?b1N6h;kO+=_=%cpn2N4q^BnuoQYds+{3(>L>&>GN3C5NeDYG#bLv zECzBLoB;MIzXvxskiCUvoJ>Yf&GHCR2PaaNmRMk>G(c&?in3b(+$YbZ5tTx^pM~`$ zqo-}-yE%cJ9;=)ny)m)yFXp$X@d(!|`KIo27nU&8w)KlAjb+LuQEcUowL z7{$~P9R>uz)^iO;`BhC$PuCdndsG-3)lz*W1NiIHS(Mo%1H>5=XW5>lNv+u#D9=8c z!m$v)#o~PZia5=HQ2erXveiXu5&a&=&{%5{q@>k!e#%#MF)c9@f$3 zkGAAY>8yeoXV?O4mgUmZZaS1Q5HRL2$i93Iv8OrM6N*G3eu#}rVIH3xj}cmxNH?X& z;hD~mNT(A`X{KuzVrO6Kr7Gj-^ejBCMJcc3P^=JP-ydKr!PyEU%6Ob@`>uXQJIKj_ zJW4fHMw);Z79SqoYaBK8GuAc7QV%?d;7f7hd{@(N15Z02*^H-td`6ePb$YMq=UqUG z!i~I_lH&IvovtKqC3)SvC@^~>pic_vxTosawZ_;W#0$$^s!jk4;ku{Vb{f@;p{`Fl zmXZ3AvzUrjJR>7R+aM;^&n;XnNoU6)>z=@SGv-lT2)bH2>VNqlFUa*><~3_CHMk86 z#@46JPi=1(>5&cw&CO+7#bUC|sP2=b4eJ2{j2T=a82smtm6vmA`*Sf%_|FK_q6yu(3j*bqO`>T2z0mV%8#$eZ@A|-7&tVREu`0(KKsfyTbh6AendBD?JRVdE^za3tyh7ormwZS?$J7Q~h zxlSxf*hkF^`csC9Q#5zV{XZdzBT?{AYO#bZ3-+uZmmbmx<2OPCUk#DLh@ik@=pz+p zVRji62?-8?Vb+}xj6VjD(k0pZc~^BW>CjL5ROv2Y?swmO7oxLA5|vhL9_2)r5R#I` zH6b0x-Dg-FucxLw2sKXbgj@SNsqXjvM{Q@xFD=8B=b9Z-F@8W~Bk1tOnuDw4*L$sG zjxBcMX`+8*l(NJi?0uS$Y_3p@p6%k?QsY2jXE=|M=Y$PTrYokE{KL?Y5HddJC>d$Y z%N@wQ$3NDIElIw2l-M+%KL%ee0*+VxXm0M33zTpeGV7Cw)Q(|@VhPKQpWYE0o$W}6 zB|SS50Zn44!B3%_7%x*!Ah3x)9ORFvApu|7eD_8S$^Fq9*c0@$8qjJ2pSsBG+GiTI z?fJvRsW@owLJXSbrJmGThd(fes+8_T<7w=lHA#-{^)KrNYg2W~Ah)r+tUdgh`%8@D z(`B0Lo(VC-OIt}~m zJ9zgGFvRYKfspfW_2;)|5>&Rjt%V1Op7z4Ce^bS<$i`dX?8G;>KeP}>n}t_Ri(w;2 zIEPjt!VKeLk9Q`fBC5Q&FbO5wzX-&D*p-ph8FS2sRO!=4+&{5vB;eZ|(Zi@2&T`p^ z&`aB5aoGqkf9ch;Bw@j{dz-5iIPOq-+OuA;t2}++uMqL_Otu+iVyAk2fu?kQmpM2XK^5B? z0{I3W*o%Ky7^$ia8fs{3=L~c1!Tj5AX`xAlEt^h62{vKf=QbI%nAvhJ;}5h$7BmWm zOE{c27@-3-gp$sGm5zX**fa{@cgnuGdfU}v>?<9}H{y}}rt@$$LPoQA=n+2N9HbbWQ3@&;0xiL zP##iJo+&BkxrXu@vw1j!gH0#q;29S$N%ro6?j-o3uXYw7SQmfa;mBmwQZ1l-;zsZ2 z3>Y4!tT!BbEwAEZ(7Y%CMj&zSw3}!o$gYtK%ag(AK{+4Bq^=R|ktJn2T zvw@?Y05VdI*@=NnvAyBX;Y_?7U>Zh|TrxR7UU2N+gm@HQK97w(Hs#)g!+c=sP)lMj zKTiPx;3aULDSswPN)tfMvGUvflc%~7<(JIN=3q){u@$DMs3^7f?+ct8Va|D5d;@AA zpB{A?#Qw`2q%jBijUuLcQ_Y7>K(a@EYTn$2uGgt~(P%42;odgRlQ~k4$zo%t9v)oa zHpE!o`ag1}BZvx@N#6O9eLyd#qodQ;i00eVQw zm(5EdBijdAyXN0JkvU$h(9?o;g3Z3QAYkwpRh~pa(glr66@&P;X%7y9go!wU<|Upt zj(OHzRL;B5g_dNyh88)$eSHF#+f#)v45rM4L0HHn$ph^eZV(3tadS({h<@jKp|rY^ zko%wpz!ulQg=TK=Z|78;;sQ0Ms--5;AkvqVl-FHd!Q)Hd4m^-GK5nw(g4XF)LawHj zTsq7dj-K=ds0Y(UKdJyb86<+3(Hcl~@%4-C-~E!MeP2?lXC13gvs&Yz)%7gllbt|! z$ZISW&fsO+6=;*y%yzFOzVOE4_1j`kJtXWZA z1M=m>U#h0Kn0?=Sl`jYh0G6CPQty$E9A~6z!E@jDQU`QT_QmPhS+-DNE7v-=$+Vuo z1PSqC?pbfKW~Gkc1bhz{>D|dAntL$UyqujC>vQ2witFp2Bx>F_zlEH@TeC3RJo>5j z*Wqsa?JUpnf3oKqMS^0?$xecCiyVkAX;*kUm zV$dn*+{h1OPCH)B-0RC#T!V78yEF~daKpd>_ZC2hX;0NxdW*mq{EXvCatwyr4x#MH zqf=7~0D=r)U{J9R+~PuT)E@ZV2JcnZ%JQ6{wRspgS3oR7Ta%M!`Xbn?Gw9VtaKcF} z1Ak;gls>#a2FOj=G00SKTw$#>A_j;CVPBEpWvBX;NhJFL2WF2@$kT|M_Qn60UDhMe zK;-c%_cSA&`6qeeY(7fa2ViSi#wqq8#smS(C(~MJ-74i%us2&C`6YsSn9^tp9m^O) zwlO*6=cS`zp$hd5s>k-ZBV^GsRlCt$eo2c5#+NE>=N@vO)6s!~FC!gD$?}33KOxja z#Yp~ z{81&hxUbTpC1lI(t(DuK^r=QVYBGx=cuA<#5vC$EvebS$g57$TXr?l~tb2tNzsAqf zv{H9t02s_TN2z@R@1M-7Jn#Hcig(t z0Fn()x8aO(1`&2pCdLW{uvPJu2|cHYtCit?|=C=_#JpRFY0EZNOfxWDGWsm*r9-K=i!VPPo$ z|F6=oFv_RwTIorz`e7wyTsvd}%gV~A*EGZ2I+XI>#ur7^rCEz_ujY7qNI;(@N5V;K z9rI$P?aZWXz@Cf%jcG^1=mh*4m5c;~$Ur$oY!S$j3=Scz$%Bw{LBY4aoMu}XbOgC0 z+1t*djy zZU{&afVo9q^Q}QUjwDD-X1G|dh%qw9R zh;DKV>e-BB>uhbQT!RDrZ|>9SwDRYil^*{uCCI}T)=7zODe;$3pLeXRbHO)0qV z>vYi^>t3Sc*-|53-N&oHe~&Zao|gzGa|tB5^Np@aLZ(wmoGtQ7LoS~zuK#Fjm81X2 zs}rv!(;XA)HCfTXn0(9OZ#eN6D#YXAp^Z65g<9{v}<%krc*r zw~WE*LCAFI!2k93j~_oKQ^?>3&hzAvWxN@<{`-8oVM_u07t;Wo!iXe#+h|xHO1YAf z${VMb<`LaYoj8hb!b7atJfa-)*lLVSgaEdk#RZRtPv}=rj9vf0P!9Tn-Bo!F1)F|t zY;&iNiIeKEE*uOZEb>J<;bMnzH|L-Z{F;=j>q=@G*7L8xkvXSOP*?XZgXYvjPDa$q zql@8VR~hwJPh!H(=M4Wor?I_K$dyi71G#TG*kXj}qvQIB2shOd^+>01(Kjt_83csM59r{!V|NmMkiEttBvsI!HUy#-UY##F$ zBLP_pxrTjxKg1W!r;Pm-O(*bc2&@Rk(NXpF)EgU{sy>p--<`Xg`)&+dPUj?v%QKP`k$j*&zyrZJnh^CWfE)g{1+J~u| zQmJbYD5(}#S4%(GEAoW4Ai&s7biYh_!P!U7(q073u|)N;gxyD5`l1Aamc(aKTWQ$H z@G`SHh6qZ2-XM3sy3dLmCeH6}(^5xtU4{}y1fAxBIoj-Qj(q{-03TpIc@XfhQrFHK z%1cQ|%jS~stk_cKiS|2`u7fSp0!n9&8&Q%Yk(QuJAlTT^(W^GJkGApiVHW~Bo|$%v z22$`mK`K~W2sKtZK2ZZAQ;W?FOViqVplB+lHfqF=U=9;91sispj0Pud%~dN`gdA}C zBoL|w2ZTQ<^Udpv>c(P(gTwGdo3AUugiKi$WGLJH2}YzQ0@KP|)O={{>%LP(RBaXB zy{zV#jkx9W&+$ePPoC8Fn_}R^0?s1qv}f*_@|_q@mjrq0K>my)8fNB{FX%_0A;DH2 z{+=0~O6hgq>+T;~tyB;X`s~PNYggGTe!;oBxx+hSN{+gbp;1`%H;P$Nj(iv~P$$ITah!;n9D*;M76#?a z&5`b1*1|e>d6KO)1NACusj8}~fj1q6N*hkyx1hOu8X1phJyHgtyT?w=EQ#nYD(S7N zaRCb4K>Rw+IhJuXYiq9e8XCPVO`mKFoJ}#mj5=?=C}G#-?^5y!^i;BcHlXyauKZ)1 z80afBdX0ZIBXVy{bAy4{J+mH}q(a7WJpna^Smjhv-TBYr)%dMzEuw;DaE&HtIx#AJfuSq+MkmXXQE1Wy})k# zLDj${FRNAeX6BVL7qV@w^@tM50pIY_`phe;LSyz!NHEn?iSx=2aKHKCyzKqjDO-fO)uwV>hufCOaBQ0rs+T~%81?! zQB+1)530Ae-ucSBh7i9=3wg<1o*L_3@C2)Qtv5P>1&d7DxE-}Ex2S{0!&}pz8Oii_ ze{zN&44hC6Jk^V+!;F5Rv1wKoi{J1F?01sS;XD{F zpwI!za4iys<=C@8J^Q#Ot%K|m*`uC7q|5KztVp|p+wa2-zABphcUK3$`{Y?OPmX_d zCY=@dKn@K=M5cRzY>Zi;s1n~S9)@}%ry=^LLoY}%=eA1;uA2!O{X!FsWA^Z*7U3SB z*!V6^EEjkjblfKD__SeSd`Fl^gxpVv9wkG98}kPaZZjG5lp=*p{f)VZT^cNv?K)A> zZM#NS6ZXgY7QOzjzQ$_;FXtcT}~ z^`bb<7Hr?pkc#FSQHf({y^z!%Vv)Sjd(a!AL<4P`u>@8OCpX34L&fbf|3y{7D}YO( zK2QC^%KsVJ=0a}>=a-CnYQp5?LHZ^*1H9SQ&Xy_494s6UsS- z&ZoaPM4Ne3y1dj?0&_%$BUWZxC2W;rNXfp+)i6q^5?Sm`wky3xdnCcqlDDc~cu)QN z5opn^A%HW)wY2C*14YlS*G zh5wxlo<8MT(l*p#T1~8Xs%8?dE9vB6bb?O%j%*Sf*q@cU7a5B5q(;jE-#c^qxS=m> z*X?h_#iy|&;2oZ{fpuSJs*1`5I1=>6z=w~cb-vZqEWNw(f2|-5oI1Jf?RUw_7V|}k zhx!LGoQih;*EA;H^q>xsz$T%}iZ!%7mSy7L4?R8oxNcU$3L``0Wgk#voMnZXi{AV; z=QLdO*A^(d8i({?gXt6J511eisJaA}%vTkrP~5ni8wNoVn>s8?C8b0OoIQdaM8uHw zP^`iF_D*qrHUbTM9U)r>0mVb`@gq^JutE*Nr>^9K(P!o|&SOqKdv+$6><2qPN#(}S zT$cvxo6Thdd*?4#$qc#k-&(r{Qq<#G<}f1Ah+DPW#bPa4>b(A}$jH!2^~$%f?gXhD z*_413gMomZjV=58_n|7^M+gwUf3@gZf!OYSA)wQ@=7RZA6BJ;612!YYzkfSR!ncVq z05iJtM*mcBffE+_EM4PXI|pc5w39yYA*=#JBP`?qkg@AeLdkAX8>%>JU4Tmocz<+- zfqt*>;TgY);h3MV{+v7UZA4Jo>A+8KZ2-X*;77v(GWI`PyvByKB}(^zDZlsxpdZlJ zO1psrAZ%A-dlN&Zz+VRTIF}=1hfBhrWw1qNl(fM8_GxRupoSFZ;XNE7@K#CxH==ms zYtKS=5|0INSOS6rt%W3JQ1O3ov^6ZRIMZ5~sR)i;&wuW*{-C>aa=EOuz(S{tU+b+< z{Cf*v@#0#*$od*wP)RNMl6zXWxCcrsEnuCZoYg?K^D~+p8>`S0n;e*#f=JqwcJ}uT zn8-8Ax?L_?3~tN#^9?QKXE~1j7v2?CNTLkar{Y18Vl^ z>mFirwb)-uk~6d6D2f84lAvsH`0M*x4Us?C&_*y1TN-pn$UcE1u^BGsODiG*Odz;sCj>e(TU48+rZipE^R7V z6Z!B}m#W|pLi$S^gwDzi4-a`?w8Zl1B^c6z+p;BBH3v@(I8$|jo#S;g;F%vGv-9Ty zWXFaUrZqO>*gl#y(gw~rlNda;AtwSToSMUKZaA460-dNVf}&%@5IppE`e$sHuu%87 z;e9HaKbGql3|bfF8kTKkTB1nFC}GkQo^gD!a*C4k-0Ur}g-QtIvk|(tF9}+W{$5F# z!=7TU(-cxWyku{|X58LsEXm>0Cb5RLxh_c(Y#>aIj>|uZ^=g2wdOTNzx>yC!_5Z3rPzWVp$DYu0h_MlA6tC#u=hZy{lWZ)0#C_%i65FHhl*~wjO=Nzr7 zibYfo2#6s8oiXP>tp;!2G&En*1EMy$)X~t?GtvIhJ)6e|QRDlLPOQFqUChUDAwCm6 z0p+v-v>gDWO-)A~MOyJ@EGT?FvGHRGI=)A=bB=cooxZmC?yyr(HHrV7jU(5aH*E*H z!Dq8aSvWaq)-Z0ioG?!DAH;SAJ6~nxxsu|@=%gO*#-^E??0XdY>Q8E#Y|tcv{Hli3 znhl(&i<0bMuc5GK}T| zz)^7~Pca6i>8`GO*~>7UL{32^O*iSr%ge91>}c$u*1;cf860=4dE z3@q%d(L#?P`vPvCsObJ}zj3Y++?}Kj9h->pCjrH*IV4yr)so zeS+=DSLSmATRv-Jbn1Gs3XjKm{MMtD0?}*nRL#l}6joZcqk-$;mlV0tlvwu@tly^Q zw6a6GW0Ru2x9~GmEO#zCpx45vR=>VQH(#LMCCGYWQe@xSL@-e`^>aQqvkMz07glB@ z-0VcHm!}S$O)oAmWsjHn9}Rr+2?wbrn)CVQHhu{@MKy_9%X|0S(k;wOuuDBN?zzp} zbTyLr9i>n*ubjQ9c2xmWuH@9z)F?jxQZQJH?q^NjapTWntDoKC<#< zyg&VhQFWYj)Ev58-*X{mfOUQPrr4{EBc~{bt6w~|B2RwjuP}Gt#4)P(p4-*&Z-tu= zIyaYF?cVo|Veu0_HUh5E2ETgpd1={>nt~5kv?b<}N42jU+Jz+;4Yp_mAb)bW(DBI1 zV9~`6>;OpX{CtQ|N94|JkBUU}UbrF>WprM^s{bVy1^P*^!5bM*#=f7zY!& z)>(0l5d4GjjZqzDNRN@p#rdQ#;U6{@536=Y&xLD4_I``DA6{*IU1 zv>HP&@^C&`nwdvA6GkDuSjY)U)Hyq>VoT<4Q+-V84V(-DNPgvbZohCxQaNAv>3Q0x zoMFF~xy+oKPTR0|P&+mz@+qEXhrU2Pll1vWSe6{4edTT8hRUbcxCq^M>n9`csst0y zSl=+4&MPPlNw1?Ypj`3^InAnyojKHpQr62}@w$f~HXMt^%gC-dG+^$kHZ-E!?BQdi za4;DAyE5Y&FVa0cwBjRhJ6Vk!cTJQ$Lf|pBV4p z_(8_XDT26$<`MhyWS}A_HsYo2XARq=>)G(>7q-mb`=fqv&gs3OT)LR`hQ78_UX+(- zRb}E_eBCqhcMVghf-Y3`4i1{<>lJSx;1cHpoiK&w$%(7T;T#B>Eq$!hS>)c!!xQ%k z%=A@SQWKW3$H-7W;u{*d-Z`}`9ji5P3v7Ov5c-5W82{}=MD#3>e_MAXBX?$Ut_=CH z`RumC*^kc0$erdT*{@>avKVU)UYA?Pdt}gz$mXK)XVGhTV#50WB@d|LeKW#pWnUJr`9uXKNNFjjOHhA+vgZY9Q&*W z^Y2_6rWYRlHfiR;18d%r*3-BSY3a#{2|8AFt!BP?+@^mwF_0yLNp!S_K;orEMp?Nu zd8%fFwF(!Q$vH#c*zE2`%3dQ}cMo85+s2!t(Ck=G3k~J{wXi)8SjA~s+}irpe)TgS zzn^0;$HZ>#jK^_<)_W@yTbk?CIv^l?~$Ciyw!Jneo6IL3|c`>nB zSUIzR=&f`7_3l0D3g^AZ;w00VkIij$N-~eoWIZQ(h=&Dx939EYyEkjR;df#l9T)s^ zBN*>W`&hbY|HiI9ueqyYw(;D97d;y=N5-!-opgtAiNXhLUw@J5!*<-eE&3S2`Xb{Wcp5K)D-TT>MRL zQ*Myh$qOSDmf68dj$G+pUQnoGKQRUWa2lihiY6pnIgvhSaB#R!R3jM0+=Ag%C}cs> z)5T8JlI=@;i8ukePD>Osw}H?_)T4Bd2MtmFyAQHl!O^5>P=kU}pT7xvH4gVWOJxD@ zusDg|Q4sr}X{L^Ratb>p(XkHc1q03QailMI9!Hk5o@qHHxMpr^)*szAzdx)lR!4qa zAVpt-z!pT`ipw6$#aS@~wQ}2(l)fBHJ4Ey}T0V0g$+n;rN%g@XHr;2G)HN?NGc$&V zO;D##P=Eiu-@Cs4i2qXb%%bTkE3EhPZ-UEiq7--TqtpA&Rk?IB0{27hma~likEgc` zi=%70MuS@dg9o?Z?iL(EaCe8`I=Br^a7l2NV8Pwp-7UDgd(hLlpZ9$H;2H+#nd#nL zyK1dftIoX(ORYjO&Z9mcyOU0ohWK1HR7}ZeAPlls$s+ynaK0MnKU6^*n*S&Q>JC&X zlvgx2M*zmOtGIBMp2CWNV9A^r7#Nh@G%BtTph?uQvGa!y3$ts=$KonEsTS4MG{MGX z+u5B}k37Z(p|Y-dh$*Rah3AyP+|0*s5Bv7Tznq0G*5Ym+5ENAO>-9ZkBAnHFcW!)A z2$-iSc`)_Jr3V>*A&lc=K95_bYEeitF%;C`7?AvFSo;#Qt|O)vit;c@>v>XoIXa7P zw4m+icvSF{@@YQfVRw;nJ1^3f&GCRZ;w1Cs%$Uxk&@^5DQGz7uv36ZWUovLhSbT<| ztg~4BIKF1od$2es71}Ms+12ucz(W_`uvLfrguojeu)b|)3){GxDjjC1BS;auMSjM| zi__AEsp@zwhN1%}AgImU)>e-wB!Xha$+2d(k;3P<9sIx^pM8Xu+V;K3kCX%{c5S}k z!Ech`vHqJ<+B)}BzkelQ;WoTzifDSb-nk+`-l#ir`gXC4%W3k09(X(#s(OaCVWU;< zW1u|G=8RJRg#LQ|ZgP{YjO9E9H}ScQdT%l%W<@4O$RS+txy$Q7r2$tZA#tAk_Plgr z#l)i^-U@&CkcOcae!$XE>S?qhTgZKf%k!bDwtYxzptzhd;ite116i&u-8~e?smwlmv;IyzB(^J~X zk|UqGy3NoCot{zN9NNNiq;b02nx%Td6Cu!xK6ISHaTXAGJ-NKx63K6_t;1jIpKgxK z!^I^+LDBlNpZ8KuuB<~9Wc(x_O>sQ;$m`$oEKG{3cl;o@@HkVPgG z%S;$MVPfLnu$*(24Am{>(zR=5wS+Kr z39?U8>9(+aSo1Q@L@4AF0JtI9c3h)9K=jY!Pu*Uj{?4zhSw3ax`B8hu$E*TRNYWb{ zH2r%}VqK)*y8Z{Q2BNhM?SB{idDF2^;t7Dg70>tmZn_6haCZ>y_MARWUE!g==goee zRB*Jk@3EACI;Csyr^uS>aB6ZLJ4$2Vv0!GA*!Ob{H4$ja&TtaZ>;92Kn3$MY^y}Ah zqZ9lKF;y<%WL9nz*e^_QZ4B-f%W|KElk-vg`h3lxUIqUHOlO10Yq=Vf%(S7 z4w!Q{Z`Qc+2fMC%f3!D9BR?3%XSvRr5jcyhs{+!fBI}?mkb-2)yo!Ei+qrloF!&a? zV|1z=nAsNvm>fTMYj);K5Og%Mp)9Fa-UYYQ(95jj1YV+?-$~x1IbKI1SbkaJC1qZT zG?;H4A0Adbb!Y6n`xsBQ5EE);piQ(H(7ay&$+2@1)&VcKPg3z_N3~cQ9Ede$+lvxA zvh2fNrX-lZu7sM@GLA6!a&R*HCg z3b^^|kRv>bV$%YlQ7N7&*}R_$2^p?lK!4q+J~AKB12Q>a4zZL-+bH| z(oT@sg8U|X;}qoxl%mhna$atH`41dj=6sosGxL@wjwD7$HupNE9ox4JR2G!d0?OeDpy7nkZt=3dmtMV>E&PhlSa|ut| zuhCLcQjT8Ioi{SRB3wx9fvymD?Q!juEoizrmmKH*=L_57QRM6 zdt2&hJ?BAe85;JyH9$99A+qC;za=)~*RGy4MjvIbZQp=xFoB>RYrtK|dT4D?$5^M_ zlcUV(z#)U!oucJYrCw};_0rDW7yNb z&aQlwS(!R}H1>A-+vg^n85s-OULq;i{mg9&Q8D}RO@g#r1u&;@rbG1Cpn7K8=kD{J zMBXDhZlXQ+@xM_t_T{Ax2eCRSlu@hgH*5;vl!fub&R%OXGyOJP$u47A#6;qR1?{%I z0*czx2Qn)6(D-$)1((~lTqW#f2mIb>u^{#)EzmL{fRbG*?PWQYMEna%;YQcj~OMP1z@s)_*i z%Ie_!FLH7YS$eChZ2sP3+Mrcb zsD{7lODK;xI=KT&0;y)yUO6h#)PN6-sO-<$H?G9nOTBnNSBKbK;s?0tq)t%{^-)R; zZz*mB+M-kv($EaD8=n9!g?UR$bf$VPiU&G8O9#D-l8cF`qVAQy%2=rc%QkdcLM$vj zqS)A#i<(>gqGuC}7}*_u@^Nv>?5`hXYt)efGZjpl&C?UB9e?xi$04Jz{U+s4v11rh z-p_A1{L7R{f?h9b{!G3`1dgzdXot{n&Os3%GLSkY;^6qW**PnAZrn>PBGgdG&!L9eRgp{@G2lfgH?W+X zokjl^(9qE-hJ(5Uco^ZM1v2Epfq|Cktxh{%GtiwJI>T-OJ5V*%csy3oPnCst{FNZQ ztaOU=n^qbTAbGii@})!sQHEBocPga|xw@zT`P`BpNh}~gcaheY0_Qk>r<0D&VCQEa zxylcik;`^QM%dVKwu^Q%4g(1eexU;LADRHK_*BvpN*;+XN13FERj`EFs2`$=Ojf~; zU&O>1O-&=%XII(S`3vQXouDP_iSz71^ooNUwb*Ar7De4j!$H6?5c_=l^wc+k1G`JT zl&RrFLgD%cT4$%HbL!s_3(RjE%&Kg#a`RV|TJlI19N~~{ZetM-vs596F#t;jShxPL zsSd-vDh?w@k92y(-f2FbFXxB+6~1ZhkXM}}e6Y1(dynn!zyMNle!!7`c791pW@Ka- zIe723x_@;?Q143~HdDNwRTexHw^Hkdw8jkx|ZL z?RHtF>0?$V9yPaJx^f)q6x!>PGTku7I>is@1+AmHA8rQ~YY#H1TA8fI^>JybGQQ3W z<_DTntQ7s3y<%atf*s}MV^aQeTE#r_*sx?jQ?d*c1%+8waBy*pq$(;KgcB{TtbS2Q z9%N*Q{!~f7A}|-)4E2x zJ&s5CA%y*k7f8tIpaNjh8DgTZe7ARYILgW{)k`FKq>NYPI`t5h2({IFm|=rIU)igTI-v$Qaa)d9)r*nH&bk|v4%y5lE^ z{qhMnoyQN@oucuYN}>ndxFK{`_)llNhMetMKj!8zD- zLXs?&vvk0}**M04<~jxMl~(a=8Q1<oowUYbfVtrmr7+y#(sQca-87m*a~ZeZ z0GM`Myshg5%-=0PyWpZk?6+W2QDHxV5GqpHEiDv9ut^$>C7^90z_}!#Tm_~r#G;|g zn$_`TCMH^yz=}Lo?{OZz+k&PQi8ag1fVm=HeArdUZvB1*BT1=q%sLE$P$JBJm_=u@ zJIZXitIJt>k6SBo_OQF&9^l{JGuDa#!F;4awJ>0xsmVn|5Bh3sX&KC@YWQx0j(+kG z%x~pq-h|S6oEke!6j^H*qn;uPtvLhhdgd3gPs-w?H4m%6bl~0;_zvFgOdN)^A~2!p zFOlGni4|vI^s61O8ls-_HH@AT{kZ+ZL+Z?I>P%~H^7P~a%k~q8?Al1K83X<2%=xYK zr?{OUtViaGH}fm^mZ;=35l#+jmw7%s#ByQy3;i=GXHNrQzp`5*e^fc^#cs6SUs*4@ zKV^DkE5JZ%JRf=Z+m&Gh(>Ce>;Xg`AyF~emUTUY^mE1LEEkI;9?46*uHyCm2;-lXJ zUJP9y96OHskaf(qzgL&$8Ku$uTfvjhy#w8n<(LFV9FUaIP*SEu6Io}ktk`VR5 zusR&lbp`Y?*`{5oo~x|J^9oh0-kMr7EiLq;E+v7t{aRe3@A?!FuJ9pP2*Bo@IS2&m zP|!E)NJC?yhf!+{Lg=a)jag%`5|7_ss_m-(G%`^J$?AdV%kQqn&PU7GG}qYIwhhI}}U^$#u{@k_#*u ziW8E`>zDw2*w1Q5DJQIF(ANh;0|x>j=LHonTY#5EEB<3g$303v_j1>KazYAr9r<@d zyWk(EtUx%zubd2^#HQK>`&lztzK^YU;vttAI5+o} z_TJ;}1DN}@4aet;uaS4C6d(53=kJT|?Q-)EM<%8T*1e@)mdo6PczCv-JKYfVLZS}-UMC%B*stZU3tbb@c|uKaO!XC49Qp32Nd8vxGJWDi zgUEYPZ`JGga7_O)c;d-;U2!{@@;+Vt_Szz8aT4h2=@Z^Qk#8+B&-*u5S()ter}bbqukKY6};p5n4Is^=_9ix^(})?fPu3bD?u zQqolZh=Wnt{c(=y2juyaDz-SU1m_N3AWRiLT`xb?z|eISrt}Y};J&aJ4~tfvCvLY) z(tJ@1_xN^qo8Zs5?NSP~^bv&zX}GoK2MhwD2CNLs&Lh$NPiMIiS)8wTPNzDFBnA2K z7tvD$LR@dF8V7$+v>)r7Iq>Ew6(H9iw!1dcnffd0-r8Od5h+pVn}1dB1busKIDd1y zSu7(_yQeCk9PDdHx#^MdHSIl;9lg$|K?)!ZVy~n-I3~&lpmtk~Ou5@z&iEAC!mbnU zi_>=@oqzr?VUcP7>2j0d0X#42$B;@v?V;r2AmDTgC=wT;cpo8y;&P4u`{al*hjgeD z$>rh6#Mg>)Kl-Hf^vUL)8>QBla$kIBVyufQ)xEaU`=4^$$co zRm@HH;hY|}*4#+kzPKGYLFB~u?hSZK=+%UM&@*0>EztumXr`Rjsg-eS25APg&iRJ5yIpoH@u*;z zpo)OV33#jt@dPHB%k{|B{iP;wbAp3KG=T2Wj8oBrzenw6=y#xlf3ed6;V7 zIfqe*`D|PJd(ZBxr>i7)3UP}jIM?w7Ueey;mim_tdZ-0O4Vur_ZRkW^M}psMbh3~m zeF+f%AX6ZU@&r;y>S*&-hKOXqs;FYMDqxCE++bmmEi3FG!OI6pA@|s_Qe9w@hsO>I z%SWLHA&LOs{ocf9@GGSQCICCV?Xa|}O#E#;gI^sGNaE{D$n|eu$Eeq%xzqgv-@}~G z_?cSkfhr~&pPkds8nm@|K=Ra;;^bZOQFvJ-zkHu!-q5z?8$f?GI<6_L7(R{E%+njefMTfw;TBU%c(W z&en@=#REmJuIbizi3UwBg8nlx6}T9P!u9=#qCtLTtjRl$awE5YS>FCC8p)6TJw%mD zU2W%{oj8YOXO?rY{~(y9fP}NV>pvzjR>nFo5=XF`P8^pJBLV_Rdvi7(3+T3x1 z$cgM6(xMf2GjvvV>mJ`iM~)RY>)(sLe8zP%ZRhrg0y{W62T)NL7Bx1;{+mN;@7^dU z$+JV`khF00Cic^g8-OD%?R@-;OX#CM5?@>@8yLs|u7+Pm&n8C*O%i<0VW}%}LJ2;8 z%>Djdd%<+5PcJ1z!2o_Gk3Zgn8raW4BCUV-Ot*(q*4BJns00z57Z=LwKDvzqb5Hd$ zHph*=eUtqFPdL8-xw%_D2c8Fnoii)C7gFg+>O{@Wp8+Tkm$}_kmltN@8q~}~+BP<3 z3p@vSp{gOdz9t8TC3JNe;S&>co}Dz+v8Al(WppbC_>Io;G`El?SfSj1*#LEigr6Wk zUY)--EnQ)R&ZwCmtiPOT1kj1RJE9q#Xwq%$v^Q{PUSdY5Wxhetom^k6W9FZX$r0Lq zXYQj>q0fDm-sD=QbsXYR5^~`*xxj6>0*CO@n=hjQpum74X$}V()gAR|qp9`sd)j45 z3{~+CGYG$b|DxJ43`PB;5w9aPZTSx>cH5d1;I+zmmJXP(HUO0i zi;tY#<*>J_YbF3I1Bbo299bJ4C8=eewMDH&Z1k)fYFsuJP19_scec{tJ~){AE=M`Y zf6ih3W~Qab@6Q1U$MD^{M4r0scLG||QvRGp(1R=tGRptKrCy)gwDm8SlL%6FuXe4A z86EuUP#fO#baYhC`>!=Gz{&xnV=H&Tt-_fOn>(D|f!Ud^wQj#pg$Nb7SL&JhwI)$# zEq$J})os6{ERp-rBeX#-_h1kBf#2OI5l-%l)B{yLke2pp6&n&=09k1&R5z3;&`9$r zAF9WLL#F;^@B87Ye}g4N4A(FY^XEZ*_403;8P7?up&@!TM_pd22=G_HUF8KA51(w9 z|A|_~NXeh4DDqw9JHW=dLjDX4<>fl%t&AMYzaP_eW3JbXcPGqerH><8>^{N~UY&W1;1 zB9kxR)a4Zec6N91Sw$PkbWDKlBNs8{8#2&l{o5;m^aO2jDTq;~k_NylavvY@l>)>; znvGrHeh&{j-J;{ppP!WFU!VS}-fSaVe}u<)I=pzD!|gXvoIbv%BkmfB}sO_{Tn}Ea)!0ZufMY8b&K5 zbS{(>ksr!OcxkCQUgb-AuadHIUi~K04}?+?N2gVtK)Re3(>11WAFC%_f4@y7jYH$w;-20I&%VVI>$Kx*JIZ;M8d;r4)k>V4d(G~4EPxr7ik~- z6=oMHi%CoCtbxr)=Y1&yxdc>VY;3k*pES2Ox&-hhL}5)boV6kM_J*Y?!Ie@egDo*kN>JUg!PEKTtD=G>iBb~Q($drLvcL|8D^?l;sIYW`+ zU5&9Z+6RBL27t(b@6asN>7A|pg?N?I#y+Vfpv4etNdNoJXRO?ahaLBAD|TKVZc6kx61{G;@Lw` zle5%Mwt@}@45T(I20N*tVKre@paK*&Dquz4?NI}*V*#X^0FwNxY6>hZEe~uo0iob< z+6zBNYshND_AmREX3uzIhMx?9*|R!2I)&BM#uQ%=ssHUb=mEgu+^ERA(6D--=XbD& zw{HF0ddoiVahRWUaC0~lA6LU(vue?8;9;a@MJoK)U!mlx$I})vAO6-xiU{p36d5H) zv##zCkeZNS=}jSy37{#eDk^m*a0|D`WzHv>Jq1?+7|9tKzkUx{^6BbX_9AWuJ^Z^* za?8r%1T2I1i1g=qg&!{5AAvBFov_d6+wG%h(aURC9c^;i_^&5rYwIT%H@)eP2R=SN z>sh+x6ub?SJ2N~#>Q{S%bwBXFEHL$87#Fi8EDw*Hbu<6>@02kl8fdD*z2Y{`jSL7R zuzo$|tsfsykj_d+1N4)Tot;QR%c`3zRP@Ek)@e5|VsnYw=03yM&mn66w7mvp`&Zo3 zHD>+#UxW%b>8FhAe~)StUSG+U&rd`R3_7g#UDyAHq@-x6?Ew!ET5B2_*6;XX-}nGk zqqMcP^>Ta2H8(8#c&RzSF40xUf%aoK%bS|U8Xfx~Pxu^X=^{@7yI2UavkoT|2s=7% zZZY8f_mAs|&@FBCs{~Ol4(600>nivvP^@bnfN35@oQ$ zcJE1D=i@Tnh=RKI=g%Qcj~JLKsjE1p)`5ezCA`8!0DuFwk=VqL*1KRfkv|S#mA$j2 zg=%$;zicuZNdxr|qx1mi-eTBGD_azQm?PpPzJ4;)uh(of5HjY{?IcXQEBsqaAAjn? zT)XUO2e6fax?-UcJmIAJq*-G&W3HT|uuo)Ea51DQ8S~>QyNr+e`@SVa6ukjklD^I#^HYeuH0mv2E7t^j8|UBmm`4l zur2p8b+5TucAwWGLI-<^H-f~d?=6a926BV)OCBn zs?e$t`~#3M7Ywga|M*A`sLjDM{_@TbXn=o) z?SO8B>j;sh)yo{{bL=-9PxxKpdw#F=!$JM!Q zznJOO_&^`my%IE?%Gkpk%%ltZ#>K~1rl3+QMt=?AwDs>?AMm=g=h@Jzg+cX%OdtQo z^!)Rw_-fNIkpe-WVl_W-If?Sq!EI`wRs}DFZWz_$v|#j-U^Ri4x`H+O$Rz=0E$sOp zagvkdK?9I8qr4t%Nv5c#9tP*6lvd|_-i^w~UNe!cdx^XTW~|q#DtjL#e?(8#GW+mz zAM4W$6Z&T9>CLl=@im^J47+C82VPx!v~#KIvo9xURfzpq zaHqy9UYyBBlo0LW`}g1LCk(itI{P!x(> zoLkYjv%9jYYCaT<_9{R%@neT(4i?oIe1xtsMSemuPKWtV98rqMmBT@uJ8#%8CKJ8k zYugsHo4t!M^#RgHJ|C#p#&UjXIZyir3N>6u3IM$r9;3)k8MSAorODORg#ZaTg&{rd z!cPj2jEs!Uh@S%tG9iyFc6UrHjVRmn&qINRxfu$y)(B$v32-sP7<+q|b9GSxRaSKr zKTu6Lbf$mP7J3OpDu1D_$GGvc#RkYJn~sP%!+G=;&u?o_20B6j?6P5=UstDcODP#W ztE8XFlNXJh-RXB8p+_%@lSIqN$cAlR3A_2|hjEuAu#OLtKOnjGW+yGb)PBET*T8>x zJK`Op*HY;Jx-sLTn)r3c_U*&VM#JZQ@vg>u3bbh=I55Y|EfwT`;rsPKDZb}Ppo?mp z|9Xpb=7K@Ne(G%ntP%5%GS&bg|M~_8p|{U}c2|C~vMFUZCXd+MWZk_s<@O*A z`YzCb=<&8nZ3kq!E$ez66^Uej%#nXHfHS{SanW?-`4>mJ6E{r8`Cfq_hrb#=-t(c* z^E4R+CMMN?p3eN60iO@6WspjJ{;Y5HWOZ<py5gJ}4!9H*mN(kzRZSj$D_@K^PJ24BY>kUW`N3ebnFso8ng zp)j>QOx)a_w}}fiH=64GOo(JiFHG!!B=}sMlo)>ig#GQCLWeh5$?z!2i{YZ*=!O46 zS%cE?*#!iwpJYqF3*Yu?PmEgoo@@c>k3Lm@8_-doFW=7NSexnS$l;+IDpg5YoK32r zC3XVOpekNX&dACtRo@>(>($IzP}LkHc+>8N=f*v=hnKT&&~CgE9DAj5lM*5w_St^g z&44LS;Z=##^u2!JdFwY`geO3-T|8JL^fjuv@0 z?~4~_dpxD}KHq{Y-H^GjaJJWat$b1y&$k55v?bxhKE4!l=G2PNORxQWW0CJCnSIx! z)2K&rEhe6$ck$NwJOoZ}Vk&N?lB>TWUDyrhvY)pkoD~g|<_Au#oe~jdYv@V!%I zU@!==rHttnoNjK6oi!oJI7Z|g$VJ1<9D{?4^E)IQ;#ayHWt6s7(z^ z$JZW;MR-gFN=gaz<3;5~0-*W=oIpe*qvuZ_YvZF)(Grr96t#os#BRz2#9H_`B@sWg zs7(E3G zgY1DFDsm(btH_^ApTj@D`fodPa7U|C4b*k8%Ks_>#~b*j6RS*YtZwh<%`h$O4Viej z-KFRp*to#=F>j?P;cL&n>X~w`dZ`mCAc>Y@SYTL1ai5o6zJjapsoG}Zld^v44Ol}p zUw#IL{x03gmcnBey8mz82y$_8Sy)&EJw75Nh{@UnqEm)$WF)I@AK3xyZDrUFriq`A z)q5%z(Z(uziNPPK7%p`0q06ZY_7dhoh(_qGZ{)2zz_`-VhtbulxpN*zD(zahzg?`t zXgIU%n&bp3Gj^pscMB^f(K5Xfd~O^`rm{oyeM~bGFIR^@w_OaYd2asf`}r*nimw8) zuh(@;=W)4WO+39OjH~Y*_4-Jn|BIw`Ru;FMYsP7!?x&=TsHYNMmwv&~s+wwoD93ga zVEcKfv$XCW;d9eKG|R^!n^dlVf1a5K$1uO9nJ+lfBEw!KX&2ZkFm_X;!8G%B{s-k| zg}JuFWldfSz?e!w7F(9`sxKW4{R57jKccrW_3L7Ygp0WE*-h+gRy)G_3@H^`rLx_D z8Y$xN5U=h7e1EXRw=fehoRP`)cYF;uFXi`&uOiASb2wvP;a&$wbpWKKFWMR#i-d>x zzJ6PcZUY}I^mfb7O)ExKhL#}CzI=95=E#( zI_Z00mfg$}*B5;QLhN-%Zpf<&+GJeJtdiEwBN~P-8)u(}yU~TF{$3q#GRyclAQ>1R z9UZOacg}-Z@2U}JOc%krWpaNUN ziLV|*W5j;w`!jTO z&wkHyP8HKa&sWGV4fi5+L&=PLfm3#T933`NWp#170pY= z9v+L0OZ0RHRbItKJm4SNzEAj&7cb^%gSYC4@9rj%?2v^+go!q(3XZ7h$ zHuHm_n&r>oh)T$W3gBSZ_t43oz>A9)O{vMUo`t%IKS!2?J2Q$_Nh^}pH4H%;$k-p> zt-Rke`K2yl>M;4&@fshdJgsY`WS6sHly!rN=_pRA5Q%o!-qCTmv1Y&F|(uP7>^(vu8KGS)kCDM>wyeJ9%zYx>?y6 zDwtrfw}{#aXQ|&nV-V;+6xeddX{5``&Oc&rU~OH(!NyTsDr|U$mW6wD!85wp#Nrl)^Ldw}S|k@PEZ$88~%jSlov$H%EsE3V0# zMted@AYLs2>LCuNT`_J`)Rj6wzckk{0IPs@1fXLmE-}l%R` z${0;4v|jg0&{t;t{%hus(c_sy`Oo4nYJ~^v=k)m}E~}e|i;(;2ZI|C1+a3|^Pt~;V zai(F;OR^JQnfTVDQ48aVpL%JFrIHI!mh5>egv2GHJ~$WfrF>@AE3^(BoL+lrytC`c zDFL;G)~FhBJ0Of`LUj*ycCySZEELUr=$tDV1L!OfPEH*Y9fS33?L!7;W=>;RkgPvB zqUepk)Y9Z+(IW#Rqx!J1$OVElv@DwuZ7nX)$6mzxXQ=tp%HiQ*Auykj(gbm{Cz??d zUea!UI0bA;E9JbW=W_E}Lw#VeDu^7@fUW$4YNA{r+Fm8VPkkcN(N9}fUuK;Dqqzn8 zyck~ajTcEI*!ffr-Da1RKw*>QJIf} zr#BaE4Kvh2V1iY3J=uo2)I_45o!|s@W7O5VORs~W^$xjvW@WErd^-9Ae}xsfm{qTE z;uawTtL^NXK6z=1XmHhDv z4D?9~Q|fr6Cx)_ZGzvLC%uMJX|H^^OJ=rqm(~c9PmD!#(ucJ%5om6=JouXaHMZ;SW z8$FM!44#Jf;7>aoV74O)cp6@Ya5T%tr~eT7Yw4+^=Xpt|hp$a;;D|Ffo1`qKPgoXt zHzoAazsC(=MLdSwLs5J0u5bNc$?Axx=;@uK()>BUy1LG1W?Tf=TK@h$wTZ|oD45wMAhRgs+R91w zu@COg%}`A1KqJL2wHQE``~xzL?w#H4l{`Ixp-lF;2RWooK5Yp1TCosUSTpmqYz8?v_#r&IlBx)Z~vt?_vxFIc! zWTrzeQxF<|Jt9*7$}>iWZ%JTz{oM>oN4r%o0RJI&cTFwxXCqmJ)r(>W@n{CYYFGKY z1Re6{wX|rQ#^C~~`IilpZx$Bfv=9j-a)(ZF5^RyDh4We9?G0wZD}j% zuN1%vIy0mf1pulegsSSS6xwJaIP*#tzhCj-=_UODOav7by}#6mI+dK?y=qG;eS`iZ zfq*mxy1sT}C&^GpoAjY4MGM-5ggL0KxEOht9hSZj_+UR!qcP8qj`E+MKhJ5wT7DuM$TO;1%-*!yEB5;D!IFomy% zWlP__6{v`bMPb-f1~gatnY91oD^YHYDw6{hV*C5?het+O+tFdo9g6YgvFYfR(&CI| zvk}7zX2LcM-X|7{y8lOW!;;bYJD_d;YHjTeMT)dvEyN0*yy!nBu@&y8;s;pfajfo4} zAgskI#NDyq4u}~Ap!gb@9_tE#H@5$cdz#DRl@M08sp62ZH&3JN4q(a{8MU0r@o zw^niB{n6c#g?Qdi2=1BZ#{_`39tLxjd%e|Wg?DCpMc!$NbQJ0-i=STrfYZu z508$>D%}2IaMQ~xD`qRebh(W)`qOigye^T>T0&xYeGY~=|VYVjAR@#Vuh^KugRT1^Qx=U0n51A z^AOJA>FMf6)YIb(x70aKgoaaVL&HJfm9ai2G3ot-MXjuy0Oz@4A_tGtI_^k%b3Xo- zKc^^rFN}<*z#H~_+Df7SGV8kjFAfn5n9m8F=Pu<{E0N?~ZQ2zjR z?Oh*9M*v{+0wewgYaRReUAv5gpb#*5-d^4i9*Uq)2H2?nWq|)#D7Mq8t2hSUlKcEg zOoqd9`vpOhu-PWvFg5jRUB!Zd?@Xg3JTQCD^cfjGo@k+Bxq&&-KiEe)wqdP#7+(x( zDQWeguDLkI7l6@^Ftf+DfE?KDEKzMIIPG#oPXE{YsrId{EwyD7_#^luMBLdn!|~O! z#wu-z|I^fyet>Ad^*}*FfMl~hY`wi;wti2{)Xi6aRwV!grR)1T5lJrsg7!yZhaGHg zw$d1Xg-4K#_uvUaMg+GAuwh6dMMudi%G+@PDN`GB>GfE54bBe|N57e`}} zSCLOl>(8X3b?*t$$DW)|rNwmI#^fY}I13eZ(?dEjfJl^w^goJ8RP|jK(mZM9sDsxQ zRHrI?IO0+u)QZBT-0EetzH=$$lb@K;YgK?aOnMF7q@Y2&54STFvzi z#5#LP{8(B0QE%D!UH#F|JE0#i-6#O>1iUMBv{IhR>983iW#twA=bjFYfqg4~>-l5Y zHMkLsT_JHV>QSz$0IGmkQp6{u=)aj4@=6fMhuu9hf~tT@cEc+0fg+*l{x>{UuaRzJ zXI^Q}F6a+#^bwUp|C4Zpy2B%UPnelbCqgzmwpI-KR3tE@`$ni+xeG6^VKM;!Lg?(Xszg$$jqx@=R6@D2-dO}EZBNL7i)zSv%g8k~6TUFjIsv14{ zf%$(iP@F?EL;L=3kvnbx9s}3Lg>yxH{~}cIR}T+bB3@-l0&Y7R$#TUF2IlMZ2SAD9 z;4c(gK2BWMw{fwF%!z~L$3ig#!kpNWni5fvPKu#98k`1MJFLG=HfpAUp_XJ3#4 z%IFX;FE6b~n6b!nYUjU7ODlWb#E{sl7S6hVezBTUk6y(F;IqylAs))I#ykfN$S+dO zu0}-b*j&&&ZTbzMY_VIBzy)7cb5gB<{t;6Fpek8~3A(;MJ6P)E%`GCTAt|Y}f%{+# z8hsWiUe{88N5^WQcPCe!M|}=f?hh9QaB@BpWhm;LS7O7zgp;0NA2qIYdz-F)q{G3@ zoe@F=Mf|_0*vrN)Am-7;+rK*AnYq&&B)*M`LlE{<9_k0vv$WLX+}NyqdTb9kbgZpG zRbimR`i}wG9^hl4s0VSGwPOaM!rRoPre&xUq6Q<|$mnV5Md=%Gh1{4p2VeZ-ErGb( z%f`>GkfMbLyn-J&td6Se&!6`L5eW;1mX_8kV#uzO4y#mHcB99n zLLnNALP)TMMJQ`^bW}X2)XGEL*w&_lls7U$-ya8LYHSB3fX#0LMqPFFXef428;p)3 z}JwT*^Sqgf; z_e99z2Q+*D;oiST+6G~R?ek|KvbJJ!vvfwsC;&vivx_GXHY@F2eSErrz)Ds5l{*R0 z2qgULjdJ)o07V}Pn6d&#V`=&#u*t&DF9{4p1%51A(9MQrVWa?Jd_d~t))Sze-)3T| zeC{oc%IXJr28?#?~EE$sk1UTp=O>yviD5A=OD?!&{)2Ih46 zUR#3$5~XNeMWGcyE^toBq6<(jB5PpAmyP9 zP20F@hi1=?>+N>L9W3Q8*0xB}Gr-tXig~>!QM@eyLYB2=ybi^CE-rVrk)0hZFZ%4| zJ2*mq-*#Jr zWwN~#V#OkXxP%MzUqQNukt+bINR-=0#B6FA7mKW`pJ4U__jXbXFBG6;F3TQsm@>s# zOMho6th4V22z?)RUFH4S75>|_4gEG!=P9YFt>gIDr2poKftP}Qc4YBzGA3l8me!gC zXGEky#w@i{F)`}vIbdPbVFodoQnXS4;*6Gdh-uotb?@9)P9Y@4;^U5sc6c#0;fDKc z6gwwj-vHb>C!?^s`n>8I0`x?eaW4`LU94NdyYHUWJ1in1d@J!yX=+UIGTD2<_y5#? zRohWZy$G34XqEvpMatpAMj+V>u{4)7RLxumVf{v0C!43u+@&PbV`Y}DZA?v5yDciA zi>F2iBA~4ulhZF$(j{cB$f@1FH|5sNhw<1i&_}TLeQjj*7&zZ9fGK#idt}qtPD)%@ z5md7`V4BW}f_c7kxqjeeXD^JGq0H|CU~co?qi<$c*Z8J`b;V+Q03R5u6-j^)JF&tj zVE4opkK!g1vey1!dRCy{GGwi1wz*%o?5s%?ga8w#K1jB5e+TX&oQz6><)u~xqQv;EHLLK!K zOY^(od$EjYE<-q|lCa%fwS<`Dpz|+g=y$09$+AG?;kC8j>K@-YsRZTbdT*y?Kp9OK z4E9KieEPfD`3-T|4A4dcsg$v)&+-CyQP5&ej#52d?Z4EqUScy;_W=!qG880TDF?I6 zCRPRuwO9-)yTwDHt1Rl9mrFlVRRn1p&_k2_UmSxQy$3)FS^%)eq`6XCq-E&~#w+Uh z!+}M%(pInuA2bAbC-17jmRPf3_vtSd6mZoIX!Ew4JbndA8}H7$hOjW&d{}gAX0mu; zkM`QyTr9FDs8ak@o50XdqzbdMs=upv0JJPbDqJ-8`WW&m^i!PE7qk42FRM zn&=7Vq{RXL@HMj# zJ)wgO%MWN@66R1Ae>*^pBnSV6OEu6`9Y&|osEmS_{&91oy4$J_bU4Em_Q=Z00@G-X zYze<|7{ie{Z#*njRe`N6ia3qkLr2L^u)WA?8btsRA3&O;7YP4IOr(MiuC(4DoYS>r zKTg!yI27ckmLZxz2`K^nG$INR*lBAE{MXcb+QE*=^EPBvtB`#Y8!M{a^-n&Ka+aZ`PU`&|=!$*)j<)j6`#055lhI`*T z;m-G<+4;)R5P+mxMR?`m%cMZ&^|b$ftevkS{JcELvJtFNgK+j&sd+}ONwd}2W8qU2 zs>i0LKY*IDIQpQhgkKg9&(exO4nUpwe!z#LC*LAGdf6@GHZY^iwnJ|gbZF^G@XZ4~ zwWJlRtpqR38V7)GMWc`i4A$7AFMnin+Km<~y23A%YgFoNBIua6j7nK>FH8etS=Q5E z|C5S*{HPpFH>Sd|vgblGvI5a3;6?=^GY|?3dw_OG{KnG}TYgFXmctQIK*Zr8z{XaN zV_02X|8#^HzYyB6_gfPl*~|Z@R%CNfJB%Y7?yL$o1aIlre>uj%*_jE7!$r3MKfg7Q zWCU()Y=i__-v+m1S2&AXKv)eXE zc{^quq|Kukt&lKaiBlFRe7+&F>$kX(1LQ6c;%_LxXk$R8^ne(q|IF`U(R+P$70EBM zNCavH?k+n={RV>o`lI`;ZVX0-Bf&2YQE{Gz5&_s>AMm7Pv95i|>ix;zBlmQ7Lzlfx zumbngP4=3pxv8=bl)_nMS~7575@-Ze@>h293JA0@vqco017#K}$k!kG-c|HF=?P9t zb3KW$`_Q1QMj>c+p143qo;V*c$u^aOKK?#Fo@wjzB{7AbF90NWbN@jJn2Ze+W)QeU zQo}-N(25gd0-S%KR@R-E*?*F?ZYcDkINTi3MVZoDTC9Lx`A+m!xYP|jH8uHxXigd- z3P7c3RU&NgM#{-qXKPp3Rqy8@)j%2)0BDb-<>i;Ae&h#Ks?6T?bt%y?Zt{M`PLWu& z7ZtdR_$R6-d(Xn}Rep{w;sA4$lp&06MHYxsDkh6i8WrU-ynu*B39778Xg~B!yP}7F zBbk}0vGK(R;Hhk%UxG>2E(V~xIDpWnV;C+}jS#|T+FSHj03w+e($S315W^OQIEWnf z5*bPlR9vQB>L&yK)ORJGD6Q5 zwdA5gwqoYTdu51N6gp8U)FWbSvH69n#$`-62Sabhk)@ zv~&qbN`rLQw|L)kzVlP$+B|zbHP;+-+#^fxqg384+g5ngmewZbYWUr)1~{S)#epTc?4>NIX%UmgP)rZ?yEn5a&g~a; z2`)WYI6nsi+((r}+-pDE$UNfy(uL)xRdVls@7N;yOe4l^1WtHrAZ#@2aj;{Teo#Ml*ro#!S*-I88m=0f37U}Iz7Vm2Wm zrI*p?Y8mp-krQ_*_0|&$`*)&C$AemDgFOTbTbzfInD00sF)b}O$uqDzd$sxRwfg`8 zR5|1$B2Arvl9rlU;rhKMnMY=d-osp9CNCDlie!nK&rEY9e9r2tqPq_J6p>9Vf{3`2 zyY~0ycBei>w;gTjLP%@^0_`kp!&x5$V)>5W+_rhyeT}L&q1t-!OQR^Q#N$+}=RD!W z6ykuwe?yBt@mat#(xIxZIuOk)D5W6+?1j$_ap`2r9cGh1xAk(**y@nHAhJF`2bQf| zjrOEu!;-&)J1%pGPOAR@}d2b#|ojRXr@y)?33Ke*d6r=vXQG zG`-Paa3}WyYBdAzeEaw)?$gh!4X-njF#->hn{@f?er@KlR6m4{p4%wS%s#L3U1dM@ z{0E+mG%~$us!M|JxA>?+uR?2koVz{GE%-rYEPr$35!^s^)ce}S*;(1zdV33rQz0d| zv<*pwe086Offs-vUZxl z6Cc0VTKf64)90OgCwWE>sM?-F8UGv;gaM{Fb|%MAyo2SE`6T|V!tJ|rx+gc=2b;qO zykBr^c(z;cZl~Z=hAM?v99`=0w-ZF_;Bj4&Iq5ag4UF$PtQPwqd>g6m4hjPtACs$JA~m={4uoO!WN zsXM*wu}b-vHl|aQ!N&(ko?CAZ=~JDyHj7nXp55K`Hwtvf`^o6@rRSsG3#*A_xo1VR zM<0Q@=L4LN|2OB>- z_QUj9roI&=M%G&|WIuk?I9`-j+`^0#zrX`8{3T3^HA{lM@@hPjz(icYYKeYG`Ndq zAesHRs}r~hUq?{HvfxTo#|T#oDl4E^W4V zSo`^(-_CX^q^i#MIQ%!E?sAcH-Pr;TpGb?Pr#d)}{kG2h+|l{Hm)rxC1}sjp-^seY zdb=8f-mdR;N$QEg-IP%%7=i^m$Af$8^>P^F;oH^J2A*jAfpO7JqCD)a7vY`{`h>ZF zJFgGCAJN#uM9{*6A~PpEoXh!I?E3G%s|8-t@5(V&*szA#&*nC%F)W{-dPwVIyGA{R z#|%D=0L8L5@)O)F?$7?Z;jN<~ zmOQNoh5}fviaO+`8Zk#lHju_+5$HwF$uT`SnAktPN4LJ#isiMPp1xSpRv}HYg3>{S zaS3PGltbFZV-=!ybtw{t56k@$HCoBlwULoJth1mz!fN?r-fj$y=cO_2D>k`kF8Mxl zv`u$el;|oUk%du$0j4hhmvtAF-3$-x^Sn-{Ipq_k$yH)uSxCVmW=NVWI=YHN*}msY znI7tyn02wQO(wpRehV6o)48@ z4loz(8XwVry>AxZJA0{Y{lrvJ^k>}W$fxO&Z&^6Wr|d*#{ay6s{#f~Iv+;?mC1I|J zcV2~kCEuL1xFxl|6wb;W^0!?|{C+=G^!Lrzb)2UTw=bj}F5d%bzL&=-rO=k-J7u)) zJ1qEcX7UG_PwoVG5f)R>#+XS+mdvorvM=yj+46{>!`m5(ymGiX<+M>UUV@CT2#R52 z^O3P@VA@!&rIT~%+I^i-T|P-RUJ!$pd&y%U_%u`Iy-%0=ZEWl;>K53uW?(~ypS>=O z=ANFn(|0_py^)ufZ#G7=iaBSFAd+MFela`4eRu>khhv5z8w8$PY34g+WbBl`9fj3> zEjX3##uzk&?A=qojMy;FWqtjU*R!~h<@JTHFg> zOVE+FKc_t(Q4t^iwjnwMrmqS>qPYGZbcu0rvwUV_o0=KoQxo8kk&{!jW9jf|yO>o4 zy~JyVE+k4~u#2C0JF5eN>iV);h2;F#<9mjNO0C#+$X7sXJ%;QR5fR{FQ-HyAA|h^` z29`f0r*T1G=0j!6N;+y*ZD=T>MBZ9o4}5HHB|KI02-err5!s{(B%7tDN0hP2etAdQ zAj0Z%e-{RmU^~AUIid+~@*Z~!9bDQZ%7uQFYPW7HST&ls4;;U!qtFk9+dQ{3X#imX z7}rzyvf9V#MPx>omxZ$L2LJ)3CH9Vd1B;HAJLr^|V$9i$Z&B)LoK1TFguwYAU~xyo zmbfC;V~6aG5$jMmuD^={dD;^(SK6qYuLm2Vs$7#GeO`qx@nv;Xqs@rz1N zpPUY#=YJkC|5PH+=r66~2ePp}Z<0NfjK-d4;36)wq+#~T4qsWGK6$aAPmM@GtNwP3 ziXoI|*pl20$!--E^x9;;?HbPK$QQ@`EX1EG%QGHY{!_U>Wca*$7=P#cScmx7Lr>Xx z0!1Rq+8FSFBpVx{y*fDM2#ACk>D=GHv7VBw4~L#xGic}fM#rVzNz#e@>ttim%2@&U z0;gd=iMqPtsj4m5ao222mtcc#CM)~Tp>@s9F4zvB)H@Uz zNBYMHk>ZG~`!n%Wx9hOxj!N3<6&X>Ez?$^Z@UBz=04i=_fPF27Owi~%TkK;+LA}EM zcjTEnxeQ3&X=O2;zHB!T)^-*e@TlEU(O#A*TbE-q6Nex_z1ATl+)ENi4h7j!6@J@x zMZiR+q^moq$1IVpuBvLe!~-)-^u~0Pdg1zq({3oUEP#3COA;l0<3zYVrek7a1;Q=+ zUl62x_2V~e5`lG@wTe=n_WG>lAWrK3 zLk*Y7i9Oc(ap;Qb6}`bDwJ_vmt-{&)cQCFn8o>eCXpu(rN3g$V%m-iviSr42jAl@^ zXG>OSH$9l<{$_5ikmh5%dG0Vu(cB0`TmIA}n(Ria(<9+6jMXOwE!evu1)r6yJH#4e zXQLF~GYO&#)X-a4oXS{?kMZ9^{|?N(GpFN!e1y(aA3DxI9~3MtTz}znIix`3v%fuf z%YN2#{ySSoHR4lxjXXkQ>ymb(%v}2wLtBB-flu(x80LlUz4`5+3zaOF-_i1l-}^h% zv)j9B&aFy5t;^OAD?YzQwhxc0JdXI!*ZN*cjWbLQO;URFOYYvzptW!r^1T7Us8p{# zt6g`R8{rX|Fp6RRHj&(OGPgY*Me({3H^;`o^z4;UU8ZJdubg+rAhqzk?J9Cf=+3N0 ze1|bDLq=sU&|Kdtv%lXit4~$9g88>VJ50|&I9%05hE9c_t#5vw4vXTw3zGX6`X|dc=pR-DM;hE%YA) zl&waXIaNa$li&v8`&xdg+I%$ZN+E>(jpHL_!kH; z5rltw#g5lxK(zF=W|{_;Tu^pD`EGE8RETTxI9JV_R_#IfFIOGjm6TJ#2%fE7alRTG z*)x+sLNfXIb~dXD;1{1%b6vC~320su`mMwW`5obK8qD=fViC|%^TdhM9yLcVt9MAS zblk%~?ELa8JaT=5Y1z^7vEySMoUj3=bDs0S^!i_aQHnXAGfVgB<8(t*KHAbQ3-6Jq zOfB~VVNVW;3gO@fY6{E^-@*l@AZGh{C72E>K$x)Fe>pO>R`j*5C%=SHV?;a&tCiqN2ZaN7sx_x_~i@ zKCe`J=gN*H=j94V+euxMMR`F2!zo+IYnv~X=U$df6tQwi=8B3gxHGt>il(|GZAY_| z3`0>xKLP~yzGxy}%Y4woqaJluKAXp)PPSaxEKoFbfx_L!pUQMoQSd;{;4Pn=H$CKM zXbV^IYYldsnksH-Oj=gW6t;7VQPvb?RyD`^`Eu`QFKjq}k@5aX56ZN!N_i!D_&j}r zGcV8aoo>8T*|JIt1LF0@HMomD-%>5&))ZCk{wU-(OrrD0_^vFI>ns12aGb-(PqUAoKc^&bA`)vGr=yOH zD8lQI$%wcI3A8u43CW5jCb$bQz#3M)di6>zLNgpfcA_(%8UQHHMwe+7k`2-L{;hwG*Vvn4p;)W|5b`W^NU&#%8D(-UoT%U?{) zWm{jKVH)wd9pL!vs8HY*r5~N$d3}nFySc>QMd+Bxw>=eBa!DfflZR6^eFdFDLyYdy zFfQ26Mcg?Tjv)uelXg0-hRpRgre1D|71y#=C)P*b2)ESySsr%q?xL@li_O5rGqm%M zjhmQIY?#CIvXx!ye)+u44^Ts;AtBUXtGl=Nh8RE)o$W1Q08I05Wbf`rm@+b(!uOlF z_KhrvcL$`||E^)#Ejdu<3*)BqT1y&3>_qBlc}` z#irsSiWFmrzjyA$WXJ$Y=_(%#`#QjW% zQuFew*ai7LqS4Wu2Z`a(UGbVitU!)wHWb9A@S0?Ax{3j*?hcCl7A&UFK;h`d5@J=qz<}-wfbz4?7?~8-g0$kd&BMLHrXHJysE@h%Rmy48y@7J@oXZK>rtuj5P-MNHg>8X?k(o zyMsgt6FftfU-D_<|H#apOYz@_xcT`-QBbtee1mEl8|CuzH~tw{TEgE0wdz}lQ>Pu# zv%x>3Ylt@ErDLb>VM&SQ#%S7eojgSFhjV!hN)9957UTU!v^R#bn_-oIEXIp9N)6tt zs#wUNiA%EEyLcea+BVgE4MJI92BCrvUA?^p4%<;+2)XQjXAF|#M@+PFRpRdIlQ~yp z^u%#X>{o3=LrWV8_;$s)xf1k}$dRWrbvsTVe50hUPV%B=C-LyszRFFBJk@g6P@>ST zM`UkoyzkCQdz_KJ61M&A$hAzz^^AzSeq~@rum4AX%`!jFVD<>;X0cjIunO~Ue#zLg znGo0RSS$2%Jk#SIYw=oUzIP*2dVt2~-rb^s+KieqV`HBxX;iynS5i_crumQ>$I0!c zU|tj|@u`10m~8X#P1ViAk=$zgf(_yMw#P$5*-CL6)|k;`7_*Gr&JmX_v3)WcDt>H> z6auMpBAVOAH-|1E)TTF&0pDAl=Fmi_>n!BUo*dG$vc3YMwTn(A_8Z?ULUxGdrRNPY zn=m5mA6#WjqwZu)6@F7S+XqEi*Of?R;NnN*ceIi@H^9xXw^`o6W=z)j)=euHO84fC z89H>JfvK%+3_LtMKxciB^)-C!-(mu9P@%0fW`theX$qjvCcsb z%RXgje2w%@EyIo`8pIk@wQh5>tz_d(h0bJ6C1vH3f&#HVEJH~lDCo2we9Ykai}znm za;HllfM0*UP*zcH78)2FRD=nL@9%bZ)ZFolL=PU{lLOg-~uK74r1eRI@Ie-4V-uTdk zc8)ObJ3X&K>p^`$WKjpGmJ}6kg$Gd2Fb2@l4m^z5kzSoA2php#oF8N=rY*eK^BsO> zP5nbpn1FfbgAF=^X8u9}@`(5?7&R^Q7O7)G^WZNprE65dkw9X=>}GCxITMxHk$)8m zq?{w9Rl%4RRw&MY)!z#i5gkwr7|>?Pw@M>`V#l2`*7e%jR!%*tw?AFmhHbitejNGT z{BVpfK|e7{x3B*R>Ea?N&64u}C>YWjuTH(}GQo?Ae+4uWWpT4`;RBN>`vlE@G*Mud z&qLs)03Ck+Y6FIM$4I%*+1I1@96>jBFqnsBEOm8tG013;u=2`Eg`fkyAfI3MR_a<> zCSVY-i{{It2$TS(%vv`N-LWMyNuh$F&+@a2UPG8aJ6k*F*Ef$%3%Mq|59$I)2?I&$kLb^Q8}X^Y?{G3{-HJonRb;Nw+L57ErkG(6|`3IO*}ng_V&x7Q^$>x zIqyv|V0qKvijHJt4DOkKf!#*=AQNe9=o`J(_wsxv!I;=Y!F3~)fT9(KAqHd!jS#3Z ze*<_##Orf)UyTk?)*8KjWLEKhgI5Q^@O(kSxQpr{aKx|I?Qn4Z5WLOp*Jtf4`sNu} z|3bMLTsHplPkt!nbAhXPJ}9U$UovU4xsZm2MwN{ zhv;L_39l*}3RWj_%kaQ<$(ex=^ohP4cQ6k9D~cWbW$1xyFpyPR%AlTtUh}DADL`J? ztgFGJV}{5m$I#n2GMl~y$hbq&Bc_Zt9Wa4iQts{L*IZEm!go>P*)Rb)*AcCOL(5XI%+(b(y6|=T1*qj z!4x1#1T*>K<>Ch5I9NgNAb->_MLS!#4PRxc6B5?Y)GVcW+^QqTGsPQV zKtc+q_$5I^ADsWcRrS$;g8?_%thI8^i&^U@c}7=<3FIV<>f86z@RK64to00h)l&Ri zTymV8x!F1AmuSVREFV!3;R4`&%5n^RwL(yN1G;aKz`|C4F0ZIy3_--9LR1JdF*g@a zR)S?b^YHk#P~d!&`4%Nm@Rc>-Cl{A9)%yw`R1e{)5%hZ64AI|;{J@x*RkQImf2cRY z$7jxYA|fGBM_yZB7op<1MVjuo8z#Tzd;jmdnIeqx^=&oUb!Sc0r`kfiTm&eXFM#6--!;Y%ItzvLPDXfnl($9fH(}N#_slxL|1D zYd34)VuSfHZ~%g0O%&D4KHYNc+ZI3})RM}Rkq9nZ^dgGvyduiXxd{n@sS`kc?X)`O ziAn(H>$J*PP*6a(WrBBKSyDp4EW>CW8W^bK0_#zZ^?nNx;Xj8egc&IOgM-2e`i|qL z+z%w(AitpIQPR0NAFX%iZ9Z~B*W&v9vIvuhTioky#1w=u*uGTUzcmJXOHmO1Na|#y zw=OGqQ3N5uZE?orz+6YWb_Ww6V8m+M@=!jF@PCnO1|6BmJKL)ZLYU2;yPYc`B1YgP z0%|8qkSq_Ha>?kJa=#85eHt1JiR5^`7TT`V*)ThyEjf0rNM=gJYfKrXVE+9H@&2;g zIpIt*dwz#K#OAixu296YYo9(x=fpSs6n%fW*_bVsi6*XhqUM{`T_RoXtsCIL|WhCQg8z%p-dSgMlue*$x0S4d`^D`nuoOf4X}8M zw%y~rg*)T!u6-fEy_&lCH;IP(re^9gM4oZh>ue!OGl#tQGV}br-ezpghsMmic;Uvk zk>%d@`u&&e>>{|2NF3!Wq)`A)%@;Vi%mxz%5UAa(vfGZw+7q<*WleVON9oQyF)L(3_s8I&#Z9gfGX4l z#^Wd4Lo|IVPk`^p{ska1b)CdN!$dOKZEQ$N$+rBtMh}&t`cU7bwhF~|Lr<^pvJv##3F@JoYPWiK=9{hd1BBXo zO%RGNH;wK^o=_sO&~~ih;ja((PdESRRo7pJ)c)}Y6e>oX2R)f&^MnL>kNp+ zwID*6K8J)IAoVQgdgFz-A>gaDNGwcrsHt?W^!=2=SS^$n*A2u4JXpnPRf8C68gQ`y z^M^9Ch)`eW$-?6XR^?CsQwd+~{$?R4k7&0=A9H*@()JPP8&p%yl4to{x!IhmvI-TElf_;&6Q_CK>`Ce||N>nwCy zM=-iDPhN(RVAODvr5BW89+n|^c=*W*p#fNBP^sRA4e*R=?sza~w*IGi4GoDg#3ja6 zlLv5x;|4};SNKfj^FNTfg#*J;l29;GYP!R0Y*RIYjdg$2bmiw8-~D~98OGo8&MPT4 z+<>;=kXDWjCFzUB@MDZHm8zL=OYR>HEuB1Z*-xFcG-GiTY;2PpJe0{wN(mRCaFX|T zPWwJN)))~ELeD#%=cj5*4K~Jo2#O!2YVkaJb!}8WT*abLxxo)TBra)2D{)Qz=ni2} zQ%UgzQX$oxe)1aSg2FCG>tgpdTze$@YE7NW3ME3JfY_}UnCw(;U}UPJ9%jP$)wG!v z0x6ei>@js@5TgaGqw0;Kh#@vA8a_mt#Ln86+P`J2=@Z|_X&Nv$%5rIF(PXFC>b`le zHiXWUA;g8@Dghb%Ur{Ja4%KDTnv>2aC;L;$rGdBqt45cWX25Zs2x#!u7x=7`&XQpG zCnRDgCx~{))vDI6u81J=dDiQtdH=-^2o8S0p@XHsF^GjQiP5wvC*?GJyT8xy;pgJO zUgxt=jFs`v*;lW?5;KXY*VlMz5}m25 ztJhaoCXvoB2RM*ycu7fNQBzxGe}Wd9*X{;;AF!1cL(D@AM-4|Byy40 zFx0f=V&ixd?I_5nXa#aTT5!H`goGyAQ5b|<+j!F&YU=tVv)B%P-(Vw%u9u1{EBO@C z;Ivf@Z^GN5jhlTd{MLD+ehsMqK2>fyo{wa%4fH}&`riREH$w;0NjAdJ>f7hJ-CxJY zq2~;EDGvnlRNNrNoO^eSq`iG z4YWx50!uX)tbwp^+A7{9c(SumV!}B6OzYH?mFM3(UM^ z)e2o%cUn-AV6$BBuaXY85&>lL3=-)vKA^mz%5`+k2W*&AAq_A=l1p-PUHdoi$~n&+ z9Chh=N35M{pf-wYcN8|h)>OZsM(C5ZVfw+}r}FphHsJGT|F7QrNF{|y3fZaxuyX)I zLz3BcAl5fF&crlEzf>0OocihRLLrajuaJ^Dw?GREgXp7>x)Xt=s~Ko*o#7#t3FdSy zcp;@E$e47ZtN|}od4lh1Wm4f zuEQzft67IC95QVwG^g?)BPxms9dcG2(mvWgUJ6gh|0*Z3W630{#ffC}e6TUI?j>dd zvsP6%UTbB&3_@SZOau@U7aDA7KKgDeM_?kxpA-i6HZR{#2Y;ZGInJPy4Yq(zq}K@P zk{5Px$UUW~o$GYiKR6&J5jYqns3V80Wu_JkmRN2Qy9j359Kl|XDQ(1!6+!pq03cb5 za3~!!r#M-xxAqWvlwfGipK$@~m!=X}NmqTTCJbk$G7-MrB~jx4g^^J}AZ8d|0X>@# z>GBviQa%nWc(hAA@-Fn|e<62U%IP9T;H@S7@9Yg)C8|z7$T$v+QyQu4-oI*osH1S<4y?H-v|;4uK(WDn8BR}O0jK&v~nB_ zP)s`W(q(Z9FAUX?IxuWyh277z@ZBkTyLrCe%0<%3T7}7nE#d_JG+>ZFxAw+wB8UaG zyxo@Bwxzp?3eS5cdU~aZ$={Oz4b#X>+-ss5CTGAo@oQIc@!ge(rMarrPPV6hOm$7J z-?u?HW~X?K$+oTtEQ?f=fcA&R^@1ASph zz@`Llfw9T*o-%%T^_Xh-?S!_rhXoVnxiBO-FE4ykGqX~t38aI?z9C4zIgDDJw)VSu z#@kdAk?Ii5uhuw$fiSGoqH3^inSzq5HMFIY=bu9WAV^OnF0sBt55jA)<9` zJUn03zB)QKp1RoDW=Tl)&zmkUO-qD~ICnlFzL!^5j*bFb3wLdPMEC`KU!o#;5)u;ij>HXe?2fP6chP?r z)EVl0c0C4c^ueb6Kmd2+#pLQr`@#6)9OS!?ZpWzIw&z{9$;dbPkPAt`tpoPSYk#&* zdaP?3GoNmgL{NNJ{X9Xj@%0{&2HNJ{GV)%9-yJeMv5|;y4?)Lx%<9u?#z?ooFo5*z z@O$6DS?~j?UOfpF*fah*EaKlH4p3%XhA=A(2)3rfc(<6C71o*H{b6qua4^L?rw9pY z_58g!RW-D@5>K~BBNtj@yD37*T9_ZJt34GPpsS}hyWUzVAUep+&z|3CMe#H9@Ha>P z1JR%STU2(ZJy`a$*SNX?BvnHM5A4e*o8-6fbOz&GCsz|O<7RQB=YaJZnnbLwq*OkM z7J|5bf8tq0PtW5}=X-3D;-Fdt*}w<=!uapME53i195i_qlqqxC z*nDU`uX<;tZ2H4azii~Pp(W@tx$C2T;+sW$Ke-pQ7p{lHXmvi}|GZbL5hoB{Zhv?q z6dKwUL1khnQrps!r}hG3unrBCJauCv;{O5Fje7K_T`#hjQJ(MmC+w{~geyMZvg7^U zS}q4jrKT?XP=`HWJxy<$2COp$1VI^3 z{*g$)b-3Hu0@HX~y<)Qc^j!wMtYp5b!Y_@{Cfmow01&3`0UW zt&Krb?SOKP?~a#fF^%39<;@_7Zkf zWom9&=tB}x*&SR~+EY~ZoNi&6uyW7=ZnX)pS>oL+A_vSL^kUOfQ zy_9cWNTZ&+*t32qeBiJ1S;g)^83J9nmul$&j$(p(vp1TyX&bM!^pNOP7s4__C3J zbb(&kG^(oj|8y<^K%grtEsdwa2u}WD4H14D*c;z@)@M;yBQ8$RqH1p~ZNZA56G{2c z&_*nF^uLCK#=NZTx9n0$HBiM>d)dsj!Rq0UVczEe`;LMwEj3l8YI_YRKjX@1V zk*p@+O)sgZJ}h-eNjd8sMcYw8AeJ^t?s&uH2ZbDlA8@z$be!XJ7CvY+m(tRaDupxR zbnl!)5N@fw^W-CIUZI6PmRiuNZj&s(4zq}Y@YRvMrXl=B)S(<8{Uy+e;GrrYX=T$EA8VJQn1d5(8`^)bbXWauJul1xs9ZKFaz*&(BO0rR69wJ(=3| zN+Y_j=wrhIzr6r22S+ImaaRVl5hZG96|FoeyHRkPn7V+i9ns>^1*9IA4q_kssM55s zpv3S+d+pqd*osO2CS_G1A ziJ__yFx}o3S!#aOwjdXn^v9k2bh*e_HWZyY0AEzm=|LBhgctn&8K&^O!xO`33{urm zP*W=xpn`%}1sA!M3!nhZLP|VI!HpA0pciy?kP;S#tgP&Q#T`*qYf^_Q&qYb#RCzWfHCgKihsHNTYt~ z@`@Jyhlp+v3o5a-wmMl>Oq(0@{g#3B%X?=ekpfH=%u_IRI?#*-bmW`cD?N@%_kWJi z|3%E%CK1toQcutOJV4NhkVZBYY_KQSz@p4RbC5# z(D93Xt6{fq{b*h!X+wsz6-qOinwCagZDO+fzb2JR` zEw!JLg){gl`gf5h0xO9pXnt7>XsIS1aDFZCO+m_7$Ss$z4|fZETk}b&S0yGjd#m6s zC*T4>?YIJ)?_>Jl@Gugi^c!V@kQZ>FB}?-ocFac?t+IJy8aBmz%R}bMH<+lid}}Y0 z$dCE4KS*()#TOIfz%u@uN!W}BOA&Q*K>Jtu5pHtQT@G)p6}hky!!g7E1?Q5YI z+Zq^JJY}Mhk)_M9Q7CBkRy>hn zXXrqXBA&5;{S!<~s$iSNEL=c`m_@KV3P zae2){{XteLFu*g^wUd8uQuf`kp3|Z~c52-9XP#h5I#EX_CtEgpwGR2aEafd}X}U8` zwKU_gb^xKu_tk&DFqgV`DPCH^8_?Q$c7MtFb>RhVRgDew;pdfhvw1m(qC&HIkg7z0 ztt$tD<|N4yQu=K4ovb#qjkjVr$mJE(p(ZP7eru5kWf@t2FjM<<+7zFiUB>SaWMze} z?CP3Q8f11mA%~BTuSzOoVPmgSMs%Gqu@LpBeKE-Ii!!;TuQp%}Vw)C(l$2^ymOD*j zTEbgXv-MRx0_yYpguGBW3imeV>W42kb+)Q$I_utZpAxya$_(en$2+)G?0r=`sk+@~RNavHGS=!5#h-ZY}Mm0tA-~aAYeCY?EDkC*sfL&h9l30mjp?=G^T^? zWgw6oAK}h5w*9=!jkaQC;y69-U7_C_+YRZLUq4fUAS%RL;yC!fcD*LBfe`%j5;|#+ zL}h!raG*@_!WauG1d(QUbCX&{SGS}wf<(|kEE77pOm->veOEA1hnrOT3t@-(>FGjm zn66j^8bgc(ihHAvfAV5oMonXoNO4?Kg}yWsjsOSu9X)013B1x^fi97rM~;<@9Q69 z==Ch^<^HX%6m=FhnAm=3uZF*Ed|EDoe~g}oMvA0MLKRvD+lMS|y{+ww1EvmC;F3Hq zalIhCK^G5t3dS2dy;B(orAW}MH~&4r1gz;Gp@Gd=c?)PRHCY`{Jt5du8}Tx%s-rSq>aaH@5Rwa zTdSfzss<@bk6~s>5JZuR=N}pQemFr2pLHrwz97^(fkg~nS}V=X^@5I!eTf+-9)3Yo zT%6u(5%_bd{;Ow;ln3#j2-p}YRwC(7i1|ImvAvDyGv^gl!2fn`le?io|3?AOvas|*{W8Tjw2YQg%? zJQ)-w@#-p)Auxr+24llQMJ38Z)DUP6{L=z&VhC-RnsTl?1zfLJG-sF7c&2|%NhcOjSg+n;MZlufEn?jHgHaRnsA2Sn@*H~X4X3OKD z(uOT34dBdPot}sSCV9W@;FEmQ2nhNz$;IA)r3FXdP3W#fMwVTWN4cG7M2VZK&?EnX z#sGA2SroKDtSfyaJH$v$P0L1?gO8(Bt$nR?n$HE6cBbBPrArBxR&ebcuRKb!j0G_O z0mt|l6?(YcLgLD+u9ztsmSXZBzh)rD>?)!(cpz-ANU~`teqxPAN&&bZm?TOF^%jV} zoc_RJ*XkhNFkCW7?+K_&fa;>6Miz>$u3%PJ9)WT@v0trKUd$AEtL$rl8c6kJlY@$a z@IQ>0Mgx|o>2nzs?VwbliJ;_7pc6b_kU`Me#5ougEy4mT?{2ubl2qRt86|x5!sH6r zP`HjODW;43GBpwi;dCyh#^JznpbLzAr${_5+(g*?Kzapbu zAUQFUrlw|nd%KL^U6Wt|m84aahA=sBRu`T}acmNE<8?N^B8Fj>6hfJJ4iTUTY_?xx z1_My?ZY*A^1pQCC-yDB8J8c~{tteuvpoxl7P5&EJl6AG>zF`CArM3VX;a!)ZQ8$2} z{05p-$HO5U$YmKFwQUH^fT3ynH@@1g-9d8`sG?`9tJOfV%Luc-Ipqbv4U$mQ7gZpg zYeoH2z=yrv7fs~6b|KyO+x3`KBS-@zmF57`1Q7p}m8bSR-c)rw7J`BArrGx97}<&T zNhP|YJIDaAER;r&OcIC1&Y*+U5g{5 zrmA0QJXW^0?7*JkeqsaQFyrmx*$zN!pIDgV1YfF53&>~oLLGoMs@pn1#tkO4xEJvs zZK_#8zYDOd++5f0Zz4g8Hk1p};V_`izX+kfrF{<92BsDZ7nhk!-X~y+Dem^UdO-8J zTU@CH=@a&QQJ1Db%yQq(G^_CW`Rh3g7(a92`uS%@Hu6x;&Kyq0Y64{+wt+9 z2)GM$bxBQa2B8l37aGi5fWuun4K0P`IoyWl)s{gc3wYDHbK@7J?c1Sv1n{k`(^M&b ziW7(AJoEmAS4M3ji>G>eT5Xyv7;U#;Eg~%viO2iAygZzs>si#DpqY=%%*=qPzjK#! zXiMSO@9gc(9B@AkU2H#%G-CztAOymcQ~p!o)*yI?bc*`bXN!QVP)Aq`qgm`bm8?u2 z1{feQHe00eUq3rulRAl~5<~spW7a>MAf~9inC@#-)V%A^?5|&<-@a)kWs?yV&WY^2 z#U+Da;h}kMo`=q_f4W==v;&YID6NZ#smHU5+pzj4aqWk|5V6=IeR^RfqxTdq7@;(N zfN4TIu&Mt;_1{f>FESO?Mtx(WRC2Nr64V;@BJAFNDLp6i3}6#wLQx~CRf7m=lWe~z zXz_t-N;~ZpTS&pfXLqW2F17QDw-Y%ST`5iAiR~iuCMGVLx|)A@dlB@HKpF=ICE2sj z9lSSC=f4XIaOJihqZG)*4G&9O`tRO}V?Y~f1)7+dL|};_@+dT4ulqRDDXRO6-QqD7 zbadE-MMR(&%OODh)W3SINkOvd4e2N{&bBZ)EMa=Ll{94pEg7j8xwG4SSyrx}MHB8{SmjR9o4w7pE zVm2cpQKDaU6}0B5c$jWyWlU^DTP%c}2L115Q2u ziQ!_1N}Y#^#7TSGO!7X=_uoSyhHMz?xkQM|$)#XYT_+|ah$JU}od!lSYwLZ9a&m}q zWw+#lR4WGmJpER8=C5Bogd_yYlBxYaepF&9(u=T}0`<)2`TYnh3;|nWwPZUChqygp zkK)?g+#2ufj02(zHx)9vVO5$|#_QOo^psh==vM+}K2fht2To6sFMyaevxY47;RBbE zF*+R}Oe>peXJRA|oP2wZkU9A#QV(qiSU4XVe>CS*x%p#ZVQitd`q2% zUEe-p8sCZHwa!P+kSUrZPJ1nmtR-temc96PSy<5e*6UQpWol?40#iCE@r!6vQbs|) z?E{@D4X^eIi58@w(KJoFu3Wpm+u{3cUmv^+(Ap$tYIcZch5&Ga`gO0eqGAB}4iiBd zfJFie6a~YooK?ihe;YC6C?_p9&*WVU`phy`h14-#)QaP@U`*!~6vsiJC- z*jrlfgGXMM*NHFMacfKkst&+#B`Nel-SBa8+AP7D`}~#!R2cBUYj1~gsb@x*{$BGo zD~F$-UmEz=8nK?Knv71%5r4Y7^E|U6(_6R;8>&EL3V6UWGBWP zJiKDIIBd*r)$_gk*W)JqMNyw8=Mm)U4iVP_7NPoRv$ei-Ww~ifJ==B@OI74$L*+%{ zn}yG_PnDIG1kXWJBI#@XCxN_RG<7BC>AL&%!%SISWw)PKqB@s|$;sI){?60&%#&P& zG4o;)xq+wU7rl#nkUKZ_L~dXo5gS{wlAb3`LJjPE6S7@C2QpEc!yo1zBh9yRJiOKK1$&^>x^WwP1XX*_8t`7@620f0d=~CEClnZCJCr zh_Q0Mvu%kC@LC=*7j($Jd;$s@^RZPHE>rDm8k(9Bf|HLrIvI>-W7E?(mr`;{M$r2Z zl~(lK&hOtk?Vul{vEN&Hd$+z5NPx-O-oBtXYzyCekSlh!*p)Tg;#@wYsfr&9*Yb6O z;JCftB*ULZm(zpcC+Qj{##W%4K|VdBV~;$FCv6v7-|*NKO6yz*mad+0WHd`VTln0L zH3azJTs#C9@9fhy2&U!BXN3*6XH@w~Km+# zTRHU*W$g9qW($3WC%_R`_4K`aS1@TX*M1*jw3NOXX8lJx#}$d^(oQe?EBfyg;iru| z$DLg5DUu$$#m~istns7MXTwSmcRZ5DrhCyT`Q4+cZ1%s_R;>HKNkZHXRRNvLt4fx_4BDV7QSzQF+WEt8dkHG-{&Q-Vw*RLiPGQ> zT(Tn$g;V+PadArpf`jzl4DKb0=CqHX{UQ1;ulhhDFEFiCaZ2gD@fqc2JY*zqj^Fm( zc)L3r$|uYLvfOxdyJF{N0?FS$k*+uBPPP6?*dy0A4z@71$CxGLE^cjAiN(-?DNCYY zCfnb$)j@5p~$p z4XPSUz=cw~1baKc4Zr(nYt9@gmp0m)c^z&nr{(TkTcajP`Qh3%} zkd?5ug|**yJ)xGWZKNN59I-7PL<-~)zRad`q6PS3HK=NVva&KRBO{{nrJhoeJ_`!1 zwUXB$-2~VK0;-XmrYDt79BPPa>*|VgSoLjmv4VwChcJ|Rj6!cvX@=UPiO#DY&7$;L z3$@Yi%Q{GuugoAGndpS}o}7Ze?{m>cuHa}vwT%y@ z-Y#{%=`=0ik=6e6e^i}yP!?#swk4!fBqW3vkPr!x?k-7bke2Rlkdp516r{Tw>29Qv z?(X~^+;h&%_t)->JIun1PyO!ux;8b=iDG{=Zm*(`$XgBw7swrrcj$%2*j{4ZUMG%> zIaci*aDBFW^>k>veq)1`Dr@|%;SZzK!H9d8*vO}I#oM4SG57uVoarqC*H>|MhAo1^ z^w-pG0=|yc-fjd9*#l97UtC81HcnMzt9UOc+W(+CZ;^hwSoNGOQik~yX&ejJ6S14H zP=F*MGlT63O&BmtV~U|#gpu_oe-T`eK>k{0@z_*ONlCX8!BtpMk$q^0wv4+1*&AD4 zs^UmpuPB8&i_|_+a8g%A$n7%uo17MvH13|vjtW4R>5Lsyxnl!|m4Qs-(!wntlpA`*gF}tUo zh!WC~f{`W!(qo2)aJ-|gWk^!yNlWn&zKqA$r+G!c?qzCnF5&34b+4a@OIMxp=R?S9 zE|;0ArMETGaCg_{GNZ()k60YG*W4l@C9q!adiR=3$FAremC2M09oVk}=^y*+!40S_ z(D~-3J|>zh$eHF(%ll~(fvV>-F!<}F^?ei(KK~fGgZn~w#V58Au#oISXs?Wf_!-y} zia$bavxzQKF6yo&mbxzlpa`L*K6qQ`K+&My&u<0u=BWeO3Jx|_=$ky$@KB3<1une? zd_9*zLFNl)g|jt^-@93#LSO8HA9SRMywCV{dACw43uFO?^N0XP8t-)I= z?&9+3FW%A*3}yBkawdbEpFQ2SgcskhRpRcH$mk^x#Vv=Gj*{82p!Z{X5<}XJ-B8pm z2OZsF-%dSVK5Q=E{1raSFJ{BOG&Gg8P^0@vvD;q?O6<4j^iO_Ye%zxU-4O2#7fd7r z0yyf8d~}|fDkQoZB70Fp|5Y#)dj;ncM%jbHJt9Y8$FjL zKi%RcD=a$Fb8Gr%&u;F_U1|P|Ki{Sz2@PAa4;h}^lJ^M8NSM=`WW-i2V`=9r4c0~%bpySrEv|r@&W}a{WsovB-RG47!NAS zb3>-KFPFB?@<;af%_KfoUip;Ox%j+&ynXR7a96o=GLd-R_@wha&CQbK!SU+b7)V;S zULJ+QGe35vCZ2n`dZM}G^=!SB%J#e%Jq#N`eK(zLGoSA2og}@>Bc_W@_0gLs{ca}y zG00=$A;>yU-Q|7+{hoeu;~=Zm<%~Y`Px8%zE3Zs#TpHscs2ewz+`7XOMw`N|Jk=8e zx~y74?(MC~n{czXKY-5XBk{RdeH0dQ`^R~2gg(c$KXgy({dO4R-PsOVvVhBRx$E{{ zbtG!uKqGyRv?lqps+G?f`>RMU%VbxhKCByEbkFr_Fb@;M5(sgH$5CRbdRord_=!>U zqCxVRG9P%=MnZ0Q9L34M|B+(XTeB z4S$<;@7wkv(1ohdiMBwCpr`I8<9~b zHtyR*rs}c^^>*B(ZSr+oU4W9VD@A^|G1{! z%GF?F!H$p)RP)_qw4GyM_|o>|vcfY)=y8SGWqAkgfm;-bO3yfoOP&Rre24B9(ouR$$(}mLmDr8decSPaF_lzSS2ut4VO;tliy`N>M-n_ElnsIh!RB8R6MU&d;;;jY&D+PL@A z4f(1x5fwkw3WmSt&HBD_Il7Y>e{eK9LX0@dqjk=<^yCmEHWSO7QSbK7)i-QalsOVw zw07BtsU3qq93NOYk0gD6>^yaZyZ=1nBn!_AKRr7O&=c6bkj@`Q>51X+f^u|nQjk(& z2gXurO0|;{b?+}E4B8nm(lT4@Uw*4TUWXD!7+edjBs^yO2&nmF*XhqUXN5g3e(pb8>HSS06{vDwY(GkF5C8&&jN@GrGB-$)W`!y3Xg5iua{wud7p}%UG*X2QR(HjcuwDA;ybhv+?{37&*s#Ie62jkp1!R^?~ zgGBkcPe;>?x&|ppjaA~V*3?9$gBj~TtzC93m$OX87F~}r(OC^-J9fD)P=_PwKc&A% zjzW;q*8dgSa$!3%z08=VDk+xNU-3c0UnW}1Xy>AK-|6bj?Y%jB)wpA9%UQ4v86(Ec zb5oyY5A(sXNUma8ww*=A#|wq_mVpUzRH^M~hAov+VhBpAtQ6Qc;K@ zbM!Os$2xN2Z>vAhFrC_E9yMI5!?B!s4>rl+={K?QY0ZqQV>8gwdG6Jmqsa|ZT4W}z z+n=I^O|a>Ik*ZgUD~vwxUvA=$Kd!UF0_4g7CI(fXsO*-OA3w=o3BbcrP`v^TsZcz~ zTv-Xq>Ks0a?CPh4aE#HTsqT%JOS?b&PAbSHB&={c{f^uEL<7Ke;a=|y`P}7bkEzm( zE&+22dvc=L{whD)zC5-c@fxB18!Fa2U>R4Tw4UB;hSS5`^$-`Tcua^Fmh6>0C6 zkf_;2VFcPSP9howxg_aXYSwMtyVv^Z#yQK>(z$+^=tJLPWUc!PbTBLUG-%f*nMb8@ z)hUd%jbci!-pMbP6TX)YQdWXjP>{}}`9iJZ5#w>WR=cfx#Gr9vFdle|(jVT+Us@}T z5sLO@z74PWObNX@Ogb{JFgy;a@E09#=qu%heCMVx9w#ko{ky8Za~Z8XPYa@-aV+Se z8*vlm3qq$u&O4#QPA=C=F7E@AbCG%7Lp3MUcD8r(KJPi(x$3j%Q__z9wBW5QyftZ{ zuDaBI)L9Bz<++UI<2=p}0f7{M%r!HZdSiH%sjVB7sF0b*#5q@Ms#^X}~BIRv5!&7nX1Ec#@ zmr}zx)34WP4KESK1~aBe$-@j>mU5qtf>S5GxDHO_gax6nH*;jiV?B16Pg50^5@1r~ zx=kw!%nU?ea=JDw7t@v)8y><4`&IDsv>tYmnwJ(t&9gsE+Bf^CT~B}c2>%@LiQ3-T zQ7*JxP{e`QY5N&V?v2d2^IS^gff*+vw7077_x&oi_kGe<*efzZ0UuI62uQwK!`gA` zLv%Urh2Q>7$y4#>FyEP8aZ{lphUB{*@V?KuA{NcBRyhtJejIL#LY4l)UU=t?v9rG* z+PeX@t3@h7^&!`lI{h%;_sz(w3s%(aW%b^cYA>GqcK?{)QrJ9dnGeXQt&bK4O)|W^ zL?|VZbC`sdNle(|2Zk~e(VkToY48#uN5?d~wp8i_^U4(6J5l;~L0jWh8{d)z`u(`l z5(Ew3*5>q-iHM7peZ*B$QQi>~r=B|Uv8o~oeN~m`@51@P(>9XMep}tprbslnc<9UW zygJNS(3}y0=n3K8c&PK~eO&gCR@UFoaa#u5lOIbSlIob=H*^KeLrOgMq;?MEb8BNX zCDA?4`EMt}m!;6B`-Q|YvJer{^7Hd63SLua@AdrMmC!c{J&+JE(@wQQ>fpDLy|8i{ zEUalzP&&BChZ}>hLFSX|pH(A47;78GhA{EPi^Ns2ev{jPnaVjQS+aJS@-!8nmb)Eo zM5cDti=w+m&zc7#Iq?ePL`}5nToz{A9vluGw+&sC;;>foF1S16!SoYa zPc^|MsKev;nJ5JuF1~M{MuC9VNdUvQS0B*74=lT>KMGt&07W&(T=e_HT_Piw`y0O@ zMY#Ltowwst5^PwVDZFsag8iFu*SB)>_LZ-ZHf|6`RtwAGTvlCf#Hg8we=_Yf6r7@J z43JdljIl(H9-jYul|S^FDS-XBIh8 zQ4rNnEzSJH_#gWN50lUpGIn-$`%lfYwK!kOTX~6-tM+$m4K^l-z^`Ir5x>jMPvmnS zL(MGZA5~n^eOXBwY6S@1R%LA`ZwQliL^t{+?CPMjXu&|7Dwv2h?Z|KYyX!76zhGM@ zm=Jz{MCB=1A;gX&(f!*7BQ01jo$wNZ|0kBz7moASJkzR5a_(?&cXjMuCDXwK#r!+_ z^L3xR?Z5CxnKFwdmcrJ6X$E(sQ1Gf2bjKP>J$U4?e$CO)q! zf_B_ZXnMHM*Y&PO<vg%BPPYaM3ba49Q@rn}r%No=hVH>C>3~1k=HJ^bp}l9`$kBkia?X?z zp(rOuf#piUwu6*6C+>k*-g)>MQYA)zB~3$&k4y$*#~*&49sBtAK<>1JLe@i{nuvDB zcdgGF-@cw|>~_G^?#&Ywtv|liuH-n9^N*VJ>iwI8`E!XauHsHZSw$)8sc&~i+YOhs zp-1h5oR*{ETErAbYS7qZ+_9=LOQ3c05VOZ3v@WMBBjl>VHVpOxhG7?bRK@o=h397@+C=VD#1#PTMFL0_Jw z#(1{NlBh{~i_S#93}5s{e{Q+BFZ(>ydDrQVv3aGnS^~p2K`&uvBf|L^q140>{ zZ|TBEP%SGyn{JnMf7^BXX>UuK_Hg*=eBGR#Y2uLpC?TCSD_%kfBiC&GtPC6+q*Z;G zn5KhjZxEnex67pf;!;922q7|f>dD_ZROZtPf`(~tZ?7y`sCAS-_MWTnHcZbJ-4a4M z*FKq=x@LCu&*~vETe*=zdA;xX37=+Zgn{9)tl)QStiDJ{8W-C-Ncc{mGL8W`A)xj>AIA5mJdzil8-9xUQ2PG)upfE7T3>bET1pnpa02gI z+z)@z{FWBAlEUO)g$uA-VEX>ij^`2R2(@(Ab|J`biaK88x3TN&+(CI`x`*$?AVaeMIFj4Ip9Ur$?V7I+Ftsd$N#`=Sc%1cBj>j9LyAoS9Y+{$Y}RQaC2 z?ETFA{Ikd?y6AI!2~ZS)VQBsVA>r{?U=)1D1zo;e#=lM_jIadsr2ir@jx#NAP6x;r z2p4@}O-+P=n3wTZ3Qtq5!&x_HmwLuCV1_flx>OU)ZG&-d4hlb;YAvjJj1gyOt(?|O zFZ*V>>*$_@fuQ3>t3eCrM;4ZnTSqz&|Cp*9r_9-;gjU)gigksZG|hpTNsvrRpVsCH zb_q2{*ld;u9kDLZUI!4UaexRoaD0Rc`zUv{-%|M;O;JN5tat#WYOS7=!l9v|b6n38 z+?tUDC@klu`9sde(Zxm4LN)Gk(7PTS9-cTJ{dM5z&@QE^XSx8!#|MrE3-C75IdriI)8$E0W&Yg z-u9)mF!)I0I5gOjKzSJa8s+OvN{w2=*;s`&{1q{%x7lbgi6(@D_g7qbLBj$jx8r7% zm4CdN_|mMbqi3iDO9s6gGOuK~WUuOiOeT#j?gZ4!Yv?JDKg^NC=d_0op4RB!+jMB$ zB`H-1$Ebpd`#E$lM!KaS7L(KMKximI(n`C!KEG&?Hd3Fo_65cfYFFdfNn)XxGzrnd zRg%w3${~`kO9z*;va!L7jSwy!()Cv8yjFr}T1}6-{23=!)^xArEwXBNs6$J;0-1zs z{&tvj2$0|HGacD;6#1SmTHFz^ZoKMeL_fFqRQlZ+^=0KO4yJ#re#>wCL8oZG86}Jn z+W66Gdf(CW{LP~u$6+Tb7)3v^6?-uNHPN$a-j7R^%`pR6tq^73_-62Ubr2shX z#g8nTKa+MPBmnOZ(~o)~F+m{jVvZNBBiq?Llojw+-h-w-6F2wPFN8qsBCNM(b14Wq zq^rbRo|c*bK(h#KN}}?qcX}{&VscUh=ni5M5}vy_X8+g4@x@xjC2#mhE7)l$wyu*0 zmM=A*z>B3P`O%^I^4p-ktcz7OqeL-RYTa^(n0^5`D=L66C%t*$G@G2ia`j5vR9%Uo3YEw6S=1 zb?Hk;NK`!V;0R>6Xg9L}8N`0c)vIg!@AN)FNbD4jtZndOE)9gKbe$Bs{~E9$vhlYq zPAdZcV|FN%RT2teMF%5!#3ydZ{c-IjL%Eu(N@;QWBWzGOHs~%ITU*N=$n(0@bp(WJ zcvCa)#}Iq*NZlW?w?)#Wn04@CzgFlN-Z?p!x6rIP1z*1W2l(>fJNPg?JstBZ2aKj; zNIpVKNAo+c83`XEN(ngWq#CJFX19A|Hy-J|TZxGQ2FEG1{g&`9?XE-DPYKKO?n&3n zwtR`_S38uQ0k6JYt$ZsxKetc3%q_AC|qG%F?oqcSP6;*rkzNCL!LS(T#k78 zFIriA@-Yx!-=~Mil{GZv?s9zKx3Sfm!0#++DsDeqPG~RsOabURjck;+cm7mlWIm_J zry+PctZi}yDn1D1A^#<)GJuQyz1H=RsoRSePv_!7{0f+>F?2P3K!Vqgu|Jfal*FRz zXVv{2V0uCKkIcl3fD-%`u$t4UrP$)FEPpX zaop8I*d(xZasNwTLbdl(4gRK~8R&FVlHJ^{uJ(_G=?e@LCsRT&gq9L2KG}57z(8m; zBx44oox&CiP+l!Si!Y3h-cJjTeg1tT;NyBvV9tFR+qe!@n1p? z+EuUq?fu`C(8hFPXcFlkD~wD`0(*NrV9Qd`SW6n$d$RO=$^co%Kih1@MMZc(lZW^# zIgsEUV)ZWsBzE*Cp=HPy<`m|p1MHw8s+LxKH#+*ufq;5f{XVF{*QEohP!i*7eTJs( z8{%~Bt&8#TsJIy{uq`q8ZFw*IFZV%78;FW@gRN1P?pOvq?1$QHt(?^ZD;^rK`5c=sK+X_Oyj@OnA!!QkC9`Xwq#*f~f%R!Ar6%3m_h|FGIAb zY|k`9Jq5ETmzIRVN(vhBpMiR1motWS4zC1e!{^Ph^m+XAf12~g_#X}AQBM;5lkVKu zJyD}cgYQk&hiA&mHahf0{`8jQeGmPf<0Gtz)-QOY&$8V;J$n;z7Z>)5VP8jEjxE_h zxMwDB_rSEb&K6ETjLef=`#uTp3Sei>FHb=^cj}jn6-)3$gtr%?<0p?yRI48P7jrh1R?q--2yo6kP^e8UHe@ zVL#y@yVt)sZd(SnWJFn1M8pbPTjENrQyQ`~iVlS!B3bRy#W*JM$cQPce%PsldRTkg%8z9wl>|#RWTqCY7k54Nz@P5)*mLw7P}kXW zl7o{OXZDzD-Mi`wTrI};Yf>Bg_Jm?*F8#u;CMT7s^)Yq>HA`>w0i4MUB&C*nEy2$n zOk_q*pSuHI^ET_#rnNf^1BA}oG2uoqPPI2?W#6~X%iA7ZpT0BJf>3KOdZm^ySvZi?9NzsloL3xc}aW@uH_E4OBk^g0MKT0@VNkN;E8AUEE*d`oIvzo z^+2FPiFefD`J-}4H%Uk196X`<^NHfNuDn@O&&Cf+Hv`rSdK&h~W~Sv>ll35lzOrt| zGQz-uQ`80P>xI5W_n1@hBYkB}mo^G0faul;qq%TyH4_|lHr8(+bOFB5k;5mE#(vW( zT!X~MSk=LrUjBI1uan%kH#(P*AQ)U{&KFxh;4&oTuli4yck_pbL=%J5cW>k=(fS=w zr4>G`VMQxwX&neyVdwIS2kp0XB~ZtuUbs z+9#aQ3*J%SRmkC?rg=r0NLSZtj%%sclqmp(KigYDJIpn}js`6AW-tch58dUFz+3L^ z)rUsm|BDj9tz44>t&TJHiy%o}7d;>^$jn+hv9JIQ#@@eZ8D7409`8ATxT>aGedSbY z0`YtNG7Uk(pGf6l=a0`Z2#U=#Ln{99aUro+P8lsZIpp%n%Jju^*VvdH#Czl{_|Pbt z+FejDi7pd=qu}?WQyo@R+7pw!x@6hMKAw585u2ibgTEpQq2n2zs(}Ii?f(*;X3!uy z8>mf32gHA#ekmy*cK3JXhPPn$)uzO3PCHli9ZuSrw^+-s9z^txWJMpoI-2l4dtD z-jU6tYfC!#Q|?m0@ikNWJ0_{m)Rf}S5Pw?**070}j(l>oIO*Qc2MCapfywh++5Tu| z4P53C(a}=O%!;5rX7d|R=l=w`0@rKNI8BnuN%zQ<5N4=yk4 zjIEY|;Dx2eA7KCQyTw=oEBG4^{%_dQe+U4YC%>j7DL2nBWE(-@0viM2G~6y zn$)$B71mEYeSpeKnkG~7>xI_C2vhk1Zf8Vgx4!bG*LJUbRYd=N#~;yPWv^kqVEFm} z&}1kS7IRCsT-e(yWT#M(?ZZ;>{`evCtNN!P_0$S0^dlJ{4P^t@=f{(9=yUT#Q{Biq z^bd6XHF*3C|4I)6C_acHAU;SaN97A^ru;n}uIL;{2!2f< zy;Q7WoMer)Rugaq> z;toM)uQS&IPFY4RHMK9(E}#=O#I`HVvmUMm+;k3y7WX#Fn+r{FnPyrRyJ^WMpXqHK z*Ni}Bkp4`zrj!|+0|dx{DyHwPc6WC-kPdbO3oVCyc-}~zZEjqwtOzu74Y}o zd@t;wFMK#j{#Hed$$4njR-c@?SZRw`>-# z9kV#u`z*D7iT;xLdV34^$9sBzzvy%k1BI>3cf7dP@%!^NSU1&rx#b7g+8K5>ukx|f zJY>M9?sYuBIAyxPo?T0yw!+TLd{E$C{V&Z+a#SZuy!ZU66c-UXsCH) zhQ-Cl%Yq>!$8Mb7jhEY={a8yz2(Z*`%Z!!ka)wRFl>nURT;uZy+}73>$j-gqq2iuL z+S)epU`KEB6hGMTFCtI3FyILE#U>_NRFxX9C9i0c`ke$Lr)~6q5QBMbLn!OX`4wDT zuFIJHfXU+J%_w5b%c5cfgH2%OvtHJ?d+s;7pMdEi*3qUnlkGh2X-BAAZ=Wev3`E(0 ze^FLKo8p)2*O#!n|EW>UwZ3k^jRW+mtkz|eh6 z$NTg*&z?DkI&FazPzMDiCCc0i=;(Cd3I$i7cNu10S#KqXQZS;DlZmD&{lGPt8tQG{ zID#w8?F*{-Z=VMrb#CX4pGAzI_8?TAch(`aAx=Pdc{F26*hzoi}mm9*@?k$_TvT8 znJqsX)#@r*{?tWhC+zP2zK&(%D^m1?Ji(K(g~?o{9G#c2t;B2;%n(SslSBaaCd$^Y z*w`%CSYYZp_1OfmnrsCxlr53bu@x|cI;LomahX>S~7xNg_G4&(} z9RFHU0Z=QrBditgpMw(prAkYhUNapN(>C7slB}7`N3&k!y1efX%INh&XAe7xC&}o5#jwkbf0f2 z?fsb*jl7<}>skWoANmBlV*%4@Xr}cl%ergrKQS+Kcy%>%wFEMh!sR#QNH~f=40)mt z*vLM|fXFJ6iXgA<%2qIRm?lQJEV7AGRy^-?5+L4lziVy)D3v3yvy{%iBx zx3BCl8)y{o2v=&`)^@kqYtI z)ZM!6&rU)w`&8F-CS7U0fij0I?f!RkenQlL6|Fp$WPuT$P2-zQ)O<_auWX;?7FoK5 z%b(r!#qOEQ4)|v)^KjKsCsTN@h={{GFDyLBIF}OOO@SOn3bdRw1q8frQ>DQkjv-rrHx)VKkJe?3H7=+4 z_~$*=FN=KeD(Wsi*d&semy`3$VGXvQK|1;RY=F?+glcZ)hteiNff*a0SKxB57wAD) zrX6Y)%-kl3nQDQ%O z>i?4MZhdwe5o~unAK%E>L;fhz3usfst%FwvghcrGik>r71<3kZHPM${eWtYs$e?mS zM+i8pEhrb^oM8?5t`>93RA>0D=to=Y)oEzGADWf}nAV!_?aZFnF4C8vj&1clV`_Fx zDprv}S%wuDQ90U7XqBp0?v?MC_IWpvXo|itShvajDqs07SJQp`krn~KD`umGAywKx zn|}S0{RYi{{99H3xf3RY!jMikNJb*VP<=Ou}@(eBV=3f1v2nZe4aqnTFMf*E+Ad=HBAvj~&{3gS+>(vvZ1hN_%c{D*^0zqwAEzYzWE;0mXkmq<^#z zhmf!^8c`#gBrlA;45H_(nP~gZpIsvR!|kx>qQARW*OmUD)!>3wE9Y$sLpXkyVbLVP z^~?4+>e*LVX}}>Y0YtL_)xSvo{NLN?^(R)=QVI`LRObFSNhF;d3Ew*yVHK1P%?k0B zpoqxcBmzrPP2%NE@O52P4Hy^Q`q z^#EJZ(o&+dx0kMBsiE1}+>tsxeTXn_}Ko zVDg?2TS=C<_x7ci?GE(B;754g-5}mTQ4Jn|+S3dU$zMe81|pV%wd%Sq-i>F88r3Qb zZr~*vgD^2cGQ(j?VoFLDxWs`MEr#Uuw6u?hDPC)=;Hl~RmF@RgrtzVd;6!<1D2ByX zPRvO|GZ8RGD+GwiA&o05F{rH02dN1P7vOyh`XYSbSFj5=Gjq05JzJ5H_wD3l6&SY6 zU+R}!b9gm%2O@oyrMthZZlkl6>;)%va1bw>YpiK7Sa?ZUbzmxDF17Og@deG~yLZAS zT5g7h;im^cOHMogr>@+*B1pUlP(}-dBlEviX3fpu*UD&oq4cq~o`}zMw+1WsCd)il zH>iZJW?-40vpTHB6XCViF(TiJ)-obreWO-6tHW=t;_*f9CMy|)7RLI9Oy}VKehKPl z8Sk&p3r#-;S1%ffTAliNLo^P%>4D2T&H#)rnye;T7W)0E;L%L(ZGb^M-}P`4mS@VK z4m_Rd>gyLgO&?c@o20gC8d^Sd&~ux%7L&7nxE!zcbOftpfYk+y)Qc~ zwOEa$Ov7JD%a@y@KrbGbJ#=E2x2dITYrNy(_icveF(^{mD}P@4=A`acrJG*dq93va zc@XV%3o-CO(vXl&LoD}IH617`kM9_Jku2^MJUA8r#G)G&$h?=DYc_R5YTk%`oUN2s zfTS<}{GB=ow$b_veDgK{&LGadI0=3tSnSI;g1NV=8`#5Eq+eD8`&WtJzWnFrWi+K-`gc;skozmwy@fV9*fy&M)agJEeZ z_8~w-1<@wWJL;pu1X2}-!0_ z^Wg34{2+AC4A{P*zTCkg6tFZZ$Hg7gb|H9|`TbjDNnfyyc1SolI|qWAski!b;j5%X zjeY!g4w?#OJ~(b)zM7=0^thoBB$Z;<~{}$7wsAl;4P$sB0>}CPD8bpa99kuyKBA6hzd!`qg1CTrhI&U z@5bOk7!lT~6ft~h|2sr)YH#3EcIN#2iT@E?uHXnw)f5%^^Npz|r>1^$aE!6e3L!t; zlhd}G=FN9>(DO*``IO?xAa*!pAOzYKMzllT_>hM3iEaU0#ed&o2VM$RI2Igej6E-G zmqCC?U%iOZ-yIm(mw^LmiDV7f596O)jibB2OMXpmpyG1z*-p>C+lg&1A@^(7DU!T^9lb_W1)IwAsb|~oK@3(%f z+I_0P0OFk$$a9sRB!%!s)%va|S+O7#)e*~EHkUKzX!BJNlO8r5HG3eqqJ`oa)*Z1} z%ZL3Uw(z9HtsSvR@6r1kwJBRo-^3ZSgSOFFzi*gycDin~E5(Sv6Rbsc+i$zIy$~PR za_d*gbh#I7H&dIQX_&^&f3CO29@@wgT6Z+6@#J~=2s;?H)^H>wb8xq$*i1h~FlzLj z6Q@EvGQ%M<3T`sMS3fY@Ys>j~+e`TOTURq)IhPz>S1$ba6fghZh680Nq56FIFcq*S z1acU{EP>ri+a;O>4Gov>jnzj}^iPqP8nd-W&CV4K78e?it&drs$QUh^z6qrSU{UHh zuT8B_eAC>E!dbYgR&u^nCzWYB_IA5CqnDUZHr%LZeH&-#@`&4VQkLSn#9G%xs&$dJ zUHUn8M$(>tHT&8AeVR+c0K;CpdzvLL<@ zA6AE0m#Yo`o?l)jU%X>l*=q+piembdU|hOB5SP99X#n!=w|DTx^vs}Ue>%*GTST8i zqs;Nm!eZ(6ix1>zru0fhi*0MTS%0LX-IkIv8a*G~dxuV$&QV26_gETh>5Kt?U4x!` zdS>Pzc2Zl>Gp81Aec2D$d>VK7=jQ{1geth0M!EHInt(mu& zn42VB(qQB`ZD_hKbP(z-ZJ{a+-`;JIBx(8D9&Xc6R%Rsl1?Fb=aGL(d>Fu7&O^0s# zpET}(#8+!mYbW#7Bg4by{4I8jFS+^WJu;=u8=bmtJB&>mbv?47j*6#LWeuAyrk(eY z^O~FG4GioKX25x*kAT*)=_HI>@b*LUVtLfq;^J26L)HuZ^L}jiwAQO7VQ{RiY?;*V z#7u$P-}N&zRKpuxS&U%+HtwSDF`YppOBgBY*cEaIaE?1Q79FgRc8cDF)P0%4G#L+Lf^gfB3tX^7FG@i+oJD4h)9|7_gmNSP9 zodZt@WKFncO=}z%`fcW0V`B$e&E3bS*w6L8Jj&us|0SXy_>@gMe!KrG#pCbC$oRC# zav5$~L;P~FU0YHuH-U9$LXAI#s{}|8y9>3s@ zq_jenw@kYlZlyY1&|)5#NIy&SHD|5OPm8?m{&cE(6U2MFrmkTiB=(Rv5>$r?N&DRL zR@@-cTpv|lSGz6~liXERts4ZL_5{T62r&i3;@U7F^&_htS$3L7k+LlN5Ro5d_($OIUK(K(%9Mg6`@5@LGi!{VXZ zoUY52qDt^odw-)#PzvPYvVYHD7laXo5rzQ%>Z9lwIVoHoE=rMs&)6#?)0cd6eFzgG z(_iV&lRS!GV_O@a;VRxLB25=*^yonmU)kP{Ia`_MClc|&U^WC!=MgjO`3Kkcij9=K z7R-K`g1M?|47R^H3RI+M!%wEJZ7%>`xq^}s7}#m#dcJGy5j0i+Bfj@8?aJ{1Y_*ku@w2*ZNBZRs zJ4#Q}h{?(l$S*nHnj513FtA=_5sxHLgM}|(gNvfyWirzfUTb6hNnpG)6<@%jh`w)p zPb-#y<`avMD8F1{4DWT}7uoJ&=d)0=kG|Tt<%PRHvtQ^eLk zU1Xi#xO%*{U(>#Jy0QuXKv m}1fDD1b1=$jwdj`7;fZH--ShtTo68>f7tbibN1X zP3TJ0iy%PWJG5bZp(!qwiYg}H2N>(#k;h*D!4rBAna)Pbdw(o`^T$7I zpohmwXvwoqWSTSy65!{{CW#j>j|5OQ>|p@COJ!`)dT|c+kl9-<}O5_%}j6gFf5Dd)}wdyb#Z)__tBve~_p1O`&S zAtI`6nam&?Us^JD|EsNe*5Pf@KQ{=D)o5tyT5R!jJGHKzwkK|=0hXn>mLH`3x#kE? z-<4#qHHhP)ifxJ0H$MkH65S>N9?i5r5H5z(aDD8%P4UlC%QpFQ=;_PXgF{1~L3HE7 z&-SK*XXFH%8E1kv3321~PTmICL+L%I!Iq7M70t+!8(j)g3G!)G;MdWasQ%sF=b`g+ zNuiqt6R=~idaB(B^kA7D8u6>G(G&@95*=XhoK`)ya1y08YOUfxJ|#D~ z^Q(dTQOqD4S9LHmK3-wxxHnH<_0h$mbXysBd2jiACb*OD%#(wMJ=4!x5ZUM2REAk7y&S6EU4H zMuz)|TgGUPG!J8I>Ayxx8^^$~!mWpUVnpk^KCM*mo%%R(Y&1i~^P$?@J~ZY?;?7WZ&E>V>2lcuIiIN}7{XCw9p+(??U< zVj4n{(NZGez^q`afKX4PEd&Mu`2~?MV5b4@OM@_-6}{Iud%hF0U%r^772gDH^kkoJ zl3MF%i&3T(L!I!s7~v$*eE7gP(v7Qy`g6&BY(fnyTp)TFfb@ZH&GhSg46U4Wmg0}B zb$<$5nXZkA-i>@F%dMsx$&N9Ne|zf`leBEamy*5VoX`Hlt0|a53-fHjwM3-lnCod4 zoH3e{sxH?u<4cX4597I|)EX<`anYBSsY4I9PsgBOAPyj!0uSlq>(SQL?TF#*#Yo|V zk5E^R_v_=B!-)}ZcFQ!EY`#P0Ky7q9ib_gKN<0lZ*(Z~Exz0DlG(kZ@KN~nMez7#EJJ+{>Kgn8W zKb-klt;ySYmNIA*kTmV&5K|=`KD@e`8E@;ZmdzzRs@mj=iHjN_YD{hS(w*xZd;^rI zP85ez1M$j~LUYWympnIJ>73+^GkT|U#7z{2*xjAs_v_)klvjc0X?Mss55WnKHhf9@ z%)k3{+_oGw<5`C$@s8Arv^>suwW@Gw6Yy;-)F|ej6g=)45%1xiuBLpHH?*F_<9V4k z|B58;9md^g_^-=W$5RmyRB&;VF5d;8omxcI>2>x@mFVp3HZH39VL~X?udHV9Y?N0@ zKGys2e$f#-Y`&^Ph4}8nTG%NbZxe=)Z8r(XEwp;JO6ZW2V^Vz<6cpS<=I+NNjIrI~ zlU1FeI-EyDIk~oRxju#>5U_y?q4-r&5cNaEVoECDhY|D9iJI}L$4A~t+hELfuka=< zp0hsn#k^(5;QHUMqQgw}7`@sdg|_4N=3*swVSTjVFRj{oJnz+s`r@O6>qibL(XieY z(kDxMDN8Zc-Tr2#SGpT81bV0O-o9O<`{=tM z9A(dS@)9X^=J3!09Kc)HZG16kD=RB=@3o8J$|gj@7#Q~5oe+ZUDpC8Hm6WFI^iUv} z0W75Cf5lKDiSO#uBj8sU z5S<8cy>GsD8w{2y5BYj8f73dg*f#&CwXxl_&VqQ!$GOudyWvLB*Nl}AGm1caC?;`7 z)tK9*7wnl1NUX15>mo@G3D4#}GQ)MF6Y5~JBVgi*F4%t!eo7@v9E08C3kj7L({Qhz zoYLQW>vDs7YUSXtB>9ft`MZzT@6<+2Vk#KC=`hF!UT&xr;o`$Elp}2Wncc)Zr}htD z+sMO(wbKnWc8{hhNktN0U##NIRIO;{BU#OsnJe|jd)}w~`_0zMLry(tg6q>POT=)L z-0nI<_L&m(Zxomlzm$wm~vjBwy?hl4h zMMqa{V;t?TxXb&b&VK(tqTV_#s`rZ)RuMsIP`VrG5TunxN z78tq_q=xQp7;?z>;rqMyzW)t;=A1dXpS{;!YwaKJb?KjbNyssx(4iA@7ybTK29iL< zcCR@X$VvG{O7kKGA!f&CFB3IYRT`~ueC!Q$g*7niaBZ@jPEA{z132KVBSZK53D5Uz zd~`YV>Y@&HIE2JYf#qLAzk?>F9@3WXjsF_@6Mk7*lpbYL=`!8+bWuM4Rocm;XC_g& zSxh>6!VdC-=cV7`#`UW|Pit^6cEM2OR_{%lA5R1PHb9RzgH1wsdxL2yBC>@-3fyPf-v6%<; z^Iyn1e)i)HuG_*Mq?wH%wwrimeRu0MGC64|6U@e0Oy9^vHrsTbo)ALm&b(Bu=jbhX z*dKJTI(m#4BD*Mq_%6y@EBneB*GaG}N{h=N^xj$j5`(`E4l1vBjXgHclcr-he1c=M( z$-jFJVG2ziDsD$smT~G-Fio+4%X8IyEH*dtwmSwAJXDq$3KfY zB}QazNl|>Mx^M(KTlKq#Nn~bET_xoYMWP)hRw!gb45c2z2HPW5KOdbN|7mw8*?gb9 z62IwQBfM}}o}38E{E9ynUz}HLC+slS9S#LJ*tDKJxNfCGPtV5)C*0iMkA=en7`^Gw zTdW-mVCEJ@R-kwaXAkcti*3FRIOwnC_|e<3931ySQ$by_dMv)A_xd8r^D4eME^%JB z?3{63*yg;iC+4ELcvj;1my({0464|z@y}*Lb(+e%EYZ31^2s%c0g5<3dK}*tS5Ypo z7CQx1h^%!U4f%&v;Y-N5w@P@F7m@%a2APa3`jo0(2p?WaJLy8GG}?sMO$f%1=jcKc zPn|i2MF)Y!dv7Q~_kjOztv73{L{I)oCvtJ__29Tr$ZGO0#>7*F2;&6r zJ)bPZ%*Ml0iAe=BJJI2Ew1JzUH5QmOt0k@aMiq(k>E0}*%=!0~(}S>QPwc#Far1;- zvL%;?0%UPT7M=y5v{(G8dFgS!&rQcx;Ww$FxV}!b*5ap2G&J-9@J#O8?$}iGX)##- z0E(_?hoY6D4h3{vjXsolhIxY1D|${@OG8hzwY?n=u$V+dln*1X7A<A+_=EtyL6}PqSB|u`DrKnI*Zoy_dL=<8oL79T&#*lIWN!q)~8HOk6H+X|v5NX4{o{Ve_}s z|N9(`0B$wKY7eFHXFi&Tj<~(?wupBY;r;GOQ(XN7ojE2N;b}RW9)waqP6~ZbC8dhJ zvNL9g1{ZX+9G;T)E-fh`117P^bh0+fuGo93vf=C!$WJCYC9d zU+wi;-(Phh6z0;<5>$%xFlmUaEE9o&%4*v@>n0(g;WZ*c`4ct;efB@bx`ZC2lfTVx zbO&Y^Z&+zE|InIK8D^b_>W_c?%jERJLW)mZHQ=3 z;OAn8#%vTEM+u~nN&>v4kb4U4*P3@Z3d#r9`XejJx+Kg526g`?5O&7gHH;;~gpn}F+s#&53Vx%Nk zNUbI1IUX5xju%1gFHc1I197uxZeDTI1g^4xe1D{owN@BC(3rJQDF3y}+Oq5u%S6eL z(~Bdz^?pNO7|vZs|A$k3wl&}iH5+=E%RhXoXUDN%w~r9hqVyk(r;>)(K(NwF)&I5CHpN`2l)&;z64LK6m;xD`S)?daVACyS-C#;P)Al8 z;AMX4SH~<;c7!n2foVp9_N$o#g9C092SMqN&>`gWPY_>!2D{c~!MYM#Rz;;*JA@g; z-9@hD#m(||M)nfO!G}^vj1os(-Zwf|QB_xw-nP-!&7Qhsoez3?#*%Uz0eHrpo%cAH zhI5|G{b@YPep_a%At{MK-A1QLmaM@3wbRJ>hq5t&BV;a*Yx%RSX@}>MilYkS>+sKl zuC^K4#O|iH9f~`>tI=$_c1LcM^xAcF+O9$^S&?B1{d^r8cv00n$`Lg5JbG;LQEYx; zxnFPqh)qlSSYrp%uQe0@?2<>+{vl>{$_vrYVq~{GI5bUZr1<_ z8ojXmsmYzSAM#F1>V>XW_GtE8-=($RFnYc+l#7B_kzjYT&Ki?0+0^(QS_$T!qhK3h zCn~B0om{$~Rg!|jJ>sMKlO_;Xq6QBWvQS+#e2Gpo^{$M!VU%K8%CH;XazV$yz#4YR z$-yzjnv4NcqZ0O(I@_JHf@7OmSR?{cokO~Xv4l22CsyK;M2wxRlmLakI1)E22&C8O zQ;eCj*$yLPV}_FGH1!f?*<=UKF`^zR5#^@(3*f3)#q$VvLln9>^#R>)MO90+yt^5#Nd@NFcuo|bf{5$ya;|UG@ zf!r9ka()2K>gI4e4-Zema9BZ?u;S5FL#_0F!$HpO(GLs^1$(4@2w;W*M>dJJ1!Mny zN3agmscWbE7uGQ_=rPAf6~6(8sohcUU^7)|3w&xLVtzlQBN@zSHuYL+JRU@?bj^6e zTn8rK!4pY6$6Z$V=X_e=&%8*V!@OcE^r zS5ni{B|4DTayWbL4*=o;7$KHkg>P@0Zq7VF;{thXRZ&+!Hu%Fn1l8!(Y@MB?fybT_ z=rw$wFdOkk?OqPnEO7*Fk2iqEd5^^3oaC$h?;az!cgV_MW50`~6FK8F zu`WpGrlzdCmeOwK1w9C{;fZg)LQsqk)gB|=Z%%8K{+PD#*SI5f_)l{ZTo#%N(mB8d zdpS4PwBCPLZX^qmiY`{P&=phw;als$X02Cc+Q&2$OAMO6crAI}UD-_XQgu*;MWzcm zjf5Kmgj+x+8sNC~?rSN@>pvepi35iQcPl-vGxsXoI-ojPwfG0xI`${)sR!Wni8u}T zg&R%LV@rYK8DO}-3LEu5@VRqx82dLrUzord_bLvLJ*qUqY3>_U57D$L7O~sYWU!W2 ztM}rDq^FC4rq1flse{dbh?o9{ey}pfRk5yOn2_uQNcB2wvZ=1_v<3BpE2^9BSzKJK z4Kb&N*peC^wpLbC4;~*2AtiLEm8zs-I5}~cn2f-EjKc!I$n3!!K>9up40He(S28W6 zK=^%#>D#$Lp?4_TBAbM>uY$pQ-Oy#sze&B+zmzV@4&FtH5uE3r6koOCjjB|9dMwG4 zkdV+g*z#NTOF82^E2{|5FP`09Twpg$1A~HN0qi~%HSo#AWSscNe*Jmf1kHMnuO|w| zYL+FXNZkT>SE_8K0ERwGla!6IW3NsC0*8b%Au%RbMOI^@Vu}7vLT;{!2g6m1M41-K zTDr%NUOyC%wXO)Pt*!mov$Gu@5s|#x?YcQI-SZugAf1{Ju7LGmwjxX>N(7Zo;x#yf z;?h=|iK-)f!v3>nq6wP`@Enc$%Ujk>uyPr~C&60UwC8!i?(J#$^`Q92q@PGXSOX+Cl+sUEoDUwP0iXgN9g zogYX@A`Qir*yp##`@r_)mp(aXYKuEfgNs1wsSq}W5H%`l(3gnkY@Q*g;nf&!Uxl^w zh{E31RP9#K&{Hjq5lhrygS8gd9?dZRpexvYhb}Ox0SAuZ$NiX&p{gO>n2zgv-7D#{ zg9Quc$+rEMlhx05_&?p`>7i!7HGNETJC;NXHj(oCFK*Fl6;sVn!3D;qKosl6Eb~yb z*rDN#csx_MQK9<4P$}B$>a54>92%#u6!E3%am~vR8o34~s~X?))=O%=%v3{N?Giiy zhGhO1ThW8l1^C92L7)mWrk(UcrDH0~#tg4TzOmAeNalCg9+N-bpPVOG|5|4K46f1R z+*PKINhJ5zW1uQ&`SUh)pe5da(JsueHtC6S7}&{cYgrOSves#Q%fw2^kgun&?-|i1V^@J@$1vCf$$xvmF!bmf8Bsf~j>eVXp&)*2h}(Ke zQd#N1&*#(P4@bk<=gpQX>wiM1C9Tu(nfL6+u5b3+Z${v0o;f41NBf5>AxCI;<6aBm zQ~Q0dwq^6TMay9yGsgCk0bdJgSrW61vS|zX+5Wy5fUaj4IoN?4Rfi@Y8%I7WL|L8t z)hqVX)9Q8dq4B}D-9+-3qj139`bzj<%-2!I-J*qe81LW6g19M6!u(BrYSKTOxzNFH zguDLbbp=;3VVp2k=fCLv$J8q;1m?&#$HzGt3BWV_^w4WnAkWmy*Yl@{*;~O*p~s}} zZ@ar{RABywOi@TADhL_d0nCV?OPL1eCpEor=M+_OTB_RpOh7iW#gy*$bG&VuZ|;Ix|}4 z14wy$tN>Bn_&braiWg9qzi5(o2)}?r^+EVQ*30$$c_YzKl-odNF}mqU$Yvc@QT!0` zqFT9Ahm;f-&FTu(3v=hdG_+#8QU>NP+$9xTYFF*~v++}rIXhTow$CssUP!|ZE!n?S zI-{}VNDIwyT0u!az}}x#c|D3V+qExvSb<-CGjjIS6PZ#0x&!-fD6$xSc}D#BF%0e% zzpA==b~?s)eUvH;(-(w<@=n#>R};T}J#!KPl=_HKd|r zC^RlMA7`sSysmV9=Vhnnri}58EHl4+T3KIjGweE0{1h zpXp@fIE@Lt*!o%AkBegk{a*EhcH^7ur=J|Os?(eu*^CnUAMt*29Pf;c9ck(7HzONU zUJI%OEqHGoi@56({#5?39im&PK73|=xKqLWMwNxVUaiA2jisB>x=X}*CuvJ>Sr>KA7I@5l)M5E zN1!p$g*|+9)CP{d#Q$52cq>Ge8|&1m;;T&%B$mYsu=od(c ziO+-_a%V-qMX|TxfzX1Fhq>o)-#n)$Yn`Yx>i&^*$=3HgQSZs=WpAvkiJM8MBGm}C z@WN9=-;HT%dUTn5bJ=5D3G0yiAQ$>z-O=c82=gd2*q)#@%2pApmGrqP{9~^PR~0Fp zcoe@X{P#O1;-&zR5ydbXBnpVGvn2OXU+ytiWMhf4_+1JJBP5R3y1F>YK_JhWGQPWQ z%8M0zZF@fY7~_w9gC0H(Aww#=K|a56JT&Cg%+2mda%>ai$oVjnFh0qm&w3E18GkNq-U zK@wgoWizuvESiq{3%*ZIDuGCg^t!0?yv)vD&88BuCJ(17MoU>62vO%V>ookpeP!b1S zxsgdN8?QjiWipA^`ro63{eq)J@_%J1!mhqFT+dTATkGj|`!;&i%MvDjH2GpZJTJ`? z#Eek`1lmc#YVDVof7sYIkI7{G$ONvDg`W&xa}zF(;=iK&*6?IDq}Zm~u&9k1EE8&) zsjfPh>0}I)-)aGPgzBV22dy@X_$m|Dopni}x({BpIQyom=j=n-jVssJ#ct`(8manf zf=^E)fW%`d_fM}c^ipXJrx>0gM3coQz$fEnJ33ICXhrE#A6E>zdz`^=4!p*XvTC>?{Y`N=F%v2sMp#-FqPq2rStqGjA%6zu`2jF^5clkj5mNf++Z(|BkO|ykzCxnvV25mo+uF@)nwA{?eAB^Y=NqB?8Qq=5>UW{y!CWv?rZ?9xm5l$Itc~zuw>`C zN(Zn6?YL}f74fn{;eGWYcFSxo%iHvERHRODaPYXB7%%VFzKjHBH!(0?(mK5NgakX7CK2}ln z^{Bi;QVN$N?%Fbq?r$+h=u$k*oD!V2RI)vPdQUv?`rSR+UzKS+;_UdTN|^ytVC1iE zlr>h^<6w0aBtk_#Hc`kp6i?yPLzEU=fCzfc$vR4E=Enz?>*DATjJ`pUJ{kHOE{Ti> zChqXjzkF{#W^J-XNf$qS#6x*(tS>nxB0HCJLmePwGdub}T9PA+scC%1U9QLK`&o_7 z>RiFC1~|)tm;n8sWkof_V)v@%ye4ZWuO#BC*~j4Imz$&aatLKqk$(HgxPbHjVIAzf zXhD|_$btTk+a!!d8U)hv{{zxU*oSciO)Ev)WlPCj`r;7c9 z?t7Strb8?Uuu>)#bQ1hI8G)VreG)7$fYPM>{mv+CMaBJlC9I3ue0&f#3Q znUFZ4ap7h}5`jndSxb*#avc6K!DXN2l33ECS1cI`?KGtlOJNzpyjZ@ zIh&CgoYP!b@*@stc61W!LQV`1?hRJyuojF8z;U8!%)8XGOW_ z$yv`QCLFqnCFGm~uwOdU6c!Y4Q5eNQ42LJmgHE=$f>49hc5i01rdz01O{4jI3)7T9 z{_XJ?A6EIT213~zh^A%qD5gN9>|#MTQUR5&nqtdzg55D3ObfvU(4dS z`pgV2RUX_`oZpTPx-LVGw9CyBH=&ZFJa0)|*;KH4&u^C-8&i#+x!Ia4iV)-3#{@@( zgLvQ2-5I92I`5%k8{{k@2VaK$OH52uFfqv$WI}c;6HAci2IW@3>NrpZo|$4!eE}t9 z3@Bo3nS>?wA2ioCQVi(DsA^%qV>PZtnoa}8%IJA-Q^R)?H+#hrJ+j#7cnLEh?+njd zCf!`@+QkwTPdV@FTbUtorY3#K5$-x>wK--gWCz-6ES@+lVb44vubO1*@D|>o^K*%CeRQKRxrROi~H{XY{ErD*a zHV5MRJR?el&RgBceoIGjfJZ%y`+!8!J{K*lWjBG!<1mW+msWhM1=RlRItr6x>E<9l z!<+b-+(0A&+*$`3%W+-ez-CDOEK5PbaAfgGx&Uo}1DP7UOq0@W)wjuA7?6aJ9kR+P z#r7+6-A#w}s5jf4fvdDyIe3u!)+azw2hsgmxcK~|pr1{Qq`&uF*ASRQjhfWpuql%O zqz@M#y2l&ns>tzK)(^i>gx-Er9W?vvdmJZ(5r6Lx37$K`HobSJ4F^5(B&C(G(huGj zuf6RQDb0!#mdBGC&mb_S=DWm=HJrOuVMGW~ch1hcQaw0(U~_L@PMX)zE2QLs&CMB7 zzBTW4C#w-6(e9H{Lle6Dh23ma>+wS3!fmXJCAn}8kxX#A?5cr7;*jHn8ohw6v2V%*bDLgsyslt)9*0PJJ?@4-|?)L4zY2Ee1(K+!sza{geefysT*tJ zH+Ie8me2e4Z)BTxZtTy5V|gO^m%6^6EW0cmysP8-1&nkIbYRH7^R?WAMzCEc2Z`Q` z!s+U(1ed3XOOJ`QnT2M0ao3G$t9{PFn%?ty%UT!wm<&*}Zks!k16WbM>^PK#J8+kt z47GNjTEeL3%op1E{d-L{nXKG=c`^Q&*2DhQUu>>TNVq%ZxK7+pfq?ddu2tQpGV8c^ zdj9gr#DhX4k(@Ku#{S9L?*9zt91;Rw33OAVZjGX`A6PCG*>y@?d#kBg>dqi}?LHuie zn-n=k!8?Wi=`ks4_6cLGy}#@S_QLQ*V3x=NzQY) zYb|DYFa7QVCA3u=&$9qTGSyYmbp%f5-bjVC`+_ogHx8v`jATRhb&wgi#rKZ_{VF%J z-D}NPy;o8kgL0PoIhI4*$TcdDW+IzLP0z*<<464X2RGsusGRI~ML2zE*BG^#3JMZE z&b;Ey)bStmr~+|gn#$Vc_XDd>Uu`9CCrr3|Q~TJjMXt^HxfmNeuhwjF6v}=1W;8WR zy0eo+*bbhfSr*;cthUdE76l26uPw0}c$oBwhWbGZQFZo7?K1S{mP$W{b;Uw$c9QL! zvYYt_R&t%Qy!U|$e{JdB<^BxfD-n~ZPc^yu94ph=hcGt9zEp#YjMOdW8^oSN6HkQ( z^M?kP{-&#e z4FrQHneWrt#FWlXhI&u=W-gAJSbYQd)1{uCBuvcBVD=@}?uLWU3YHXmd(bbML|E$T z+*|v~RpfS`=UrDQ8pPx-zTxg}hzl0#s-qoAH&&;KFMRnJGQkC4^AX+lzP?{oZpebQ zk=&xZG*eJxfp+nm6Bw7C<;3`xxHnK0pHS7<*xEa-K? ziYh(cI7K)CE`M^yL@!aF&vT^v@n07}^N%VmWYJd;5D;LZpqK!f1ptp5j!2_yYczQl z%ju>Xz}|}`Bsd~o38m=3mW2}!U}4Vf3*~elx(O<6u+c^Z-&F9R zBS|U7d+&*jOptrogu;B&MZ}XH&1TJJi^;>lb*PJVi4<{PwZ#OmCxBn75rio*SvA8& zq14_|J-Rl~+W(Xn{T&@SKxy&mlWK|HRvFB8-s2!f=j%PART(9ZWo1eS_iKkf%6}Ek zI~_Wl=tR=@{O@I|GIxJsn!ibwnSwvi_8TzuW7}S{80h))WQ;`h^0DdF&svK&=|C+n z_aCmof%=rjCH_9!ov8a=SPN)rfyl*_(kJf~v)CG6Fi^U3q?fw!?@Kl+ymK7t(!^7A zn;3|Xz{hzIfGX7RAE43uR`!`$aReGE$DVVe9M0;`FV;SBuD;C3Z(mpHr(NGnj5iy( zFGmmB@AVVGv*7VNiqV`sig6X1S_PaFs;d~m=`k_*I|@n~NJQ3Brz8`~`;utNDF(>q zdqx$D)c}-@Zy{k0pm8>ZErJ)uDzKXGT?sjZ6DcSZb5L=farS=|f6orNJp$Zp9*Xc) zVqG~aTP=M^{N3@@U5Fm>GXddhfv0`So`NCb-#w8|$=+9!dvT?% zBMcjb+hbamUBs6-RAPpbS)Fw6=6<}kG<^In@C=BYCp~1zq8{a-$tmcpP{jWs7!gV_ zAOzKMoJ9wNz6nl)Q%;eJug5`^9@8VMDwv9I6$hV*FeVWW0Bd{k4bf?Y(PW~4g-8GY z1&sLL@A|)PR=+eiH;*BeApB_E^A()a?OC=HLV$9(IQEPeLKP)@P#o8MPT;bDi=P@U z?`70c{)nCElhGCtew4F?s-Ctvm0{92(g(sBy|yy&LxRnB7QY@ zr)2WDkhw6fa~u_^i*wh!8r@D+{qt!Vm05s{uVKFYs2^>rqOxM|@<`0?0@Wmc3@(1M zZ6z*%DrmHrtH19A0EFiG*T=2gn=wm&vQX#6!)v;#62+SrRU+IkU2LG($B_9mkD3c5 znUhb#1_>J+Gsg`w4UN~HsiH0&tGmageFdi`CL4P38W zOuXojj_Ab8ZlHEICp2vRIZYnS;pTT4f`;6 z&Nq*P`UCV&;`2MpGjyP0pxqS z4i^q66=ZeT?N?DzY+ql;lldyeh~$H~ueW<;B7SC+{|RN#a!;qk1aBVYWwmhqTZd*O z@H?Ee85Ud=eR2DI&WcDAp;5^onQPowcf=EbG8_sY@5(k^*+3N^t+YAN71K7cGh%0h zb2rLx*BRtnwrQqiUvQOJMQ%vJzHwd6a@O>)4uw&D?dTT1t4AB^_2EA{x)n;_g8DeG z3SO~Yb%%dq^nMo>wqr!gDav7m(?}f&J&X|EpJh7Ky%NYcoL6Do$)y~s-9nsf>^Ca4 zjU^hZ8?9@uul4&J#+@>(%dOzKU8>(@3;L{=-fC{8pZw*=+4ah}o>TFhO6#r^+jE(n z8zWYZtmiFz;|zed-D#)FLp+I^2Y*8WHrn0R7BitOClZ+sP)exrlW2`P>&+VFh~$hH zZiiJi#IKK%M7+$k|A_famvq~yDf_?_y115?qY2HpKZ%{@QxdvmD}AQ0LeTDKkjHX_ z*KlRirN@1RC|o!E#iP0{h`!`i>1l7$M;sD)q4fQy{DRTl)*yrar~8_BD2=r4sz0g{ zVav96l3HC-hq<+PSQ%?5dvAXdJZBL8O!vBtjssilVg?d53O}?%_;JcjREy6v9w;5$ zV85Dap4#n1cZKyk$Mqzp$4@9}xhLyQ&OY5CkoA#Ezn)CYDX?{T_9UG8ZZY<*C+)5a za@(IOgUj_gd*-SCte_UUMnf6X|9v&c(+DNed-?ggeo*B9>Jl(roDDZmN`x<15G+{! z2tK;l{Vu|912Z(UEh4F;V4*TKM7W3l7Z!bUBS|>Ky897iyNp+BI^uSw5X#ne{2P(-z8yNMv?A+5N z2KwS2U$)a%SA@3g2SDyp*%3xK8p&l}y?=IIK(D57KBU@SGn!=7Y$u5vEeiV3cP9?^ zwr5;&yX{)gLb&N@V6=#8 z-6dtr1)NTE)*)i z@g60bM?`hTlOfcbr#SC!vQH7n5Q_oFqzrdRJa_OS`KSqbWcn|@uBePoQkr>WqR;id zO-9krn{`qet%mkutk0b5PYdDb5AUvL?iBwpmgr`PORtAfAx$c&x9<>pKGBZP28?h# z!AmgqwfNv;nG8Do!P64FW%%Pz@fceGVD|f=T;*hC%>ux@=25k$1o~4$nE;KC|Ioov zn&XqmAfW$GQsyFqizRNqEW4XLGw}g)=e!*qhON9#x6;Q5D_JUW0SbNJi%yIQtE76Uod)$h8AgQ(WPvTn55a)&2>f zY)ka+ZEoob@C1Jiqzw#gu41g2GW&bd3?(KyA%aBw$W6BZ(c|mV8*7?oQebdq;;F|8 zDX^(>G8bC2Fg6}wWqOB1^g%zu094BHqQ=QwF%M6xc_qkd(vN}2ZSx-;0_Js4D@H{u z`6TbE_`vygJ;Ynyyx&O}ivh*Neb7Rf?+pydH!~|uXE-|^o;Ez`nP~9^T8sa95RT8P zTtZvy*@AFK9;iAI{tn6m+{Tm3Pe5&Bl~{8#{e95C&f<-ZltuH28HM!M&}r|VyIWU} zdCigjzu$bRdP7+4_5)a~j^loSW$NxY5uQBFyI-xNsKX3G!rGOd(jbF(KHdxqAJ^{U z#`dwZ*`R;rTcuR-K2fm0OGg7Ys`+Axv#|OpN+T@k z;gL@+QkT&Mx>FzVu6e#WY;@y6c5D@^Wza8(eE3-Cc>i#2{D26lvr>t*UGnIngZIx* z-|%)7QM(%7?_(@9e^fPA)x+t%8D?*O?!`|pXi@&E@(FsLL*40H{9-J85vEW-a!8}A zG^H!>`Jf53eDPnFW(p)_8e9P0_|EJ3APtQoPkM>?3+N9;!f}^dXbWvln^?-FeYPJc zzUX$QNK|J(FT^6^3$&!-GB>EVS5nS^b>Bbf&$>V0Nqj@y!Hr8i?q{T-7clIOnPvIF z@BtwyE6yVBaQQXj%4}$^_KL3`uK!4PgOF$M>C}Je4sQR*2v2?ou7v$FXRY43yt1AJ z{=`{9;;BvwBlIcC`)drovK#}({b^Hy?1*q0%8Ne5@eJeHTGx}WOEyGF>oVE+G&A?z zJ>1XQ2HN*@uo!~J!V3F(4NgonGtx^kh!UABo_A^}l5(otvLN@M_^%^Js2IV*ntlIm z-8$uU@ce>IHN`<G!N^Yos6(19Y^#eH6rlQRiUS#Ez( zNM&?%6h90p?28+N{T}|k$)~@vzoH!WNz!XFw3(NmAD&zAZB9U<2E{r<0njS<97X~A z*dG%oqOW{K_P#e}jKj4mbg$;CXLq#gAWVDx`7yar-oAIBpdK+`my;3ac6T$GOq9s5 za12tkJF@Yw!kubiV92|9Jxm=hRD(X~rbrz#p|6hKkVnl=pL`?5DY$mgg=MX}jkzw)JI3#v z^d@F(Y_UvSx;_xm&DgjEwC2O~ql0fJIFH0spFvu&S`qX!VMwzQ9GUq3HFf8l*7{74euS>qks8MaYCnqYpf*0tFu&pe@39q z6C7bbnrh*j2Rx!l+>C-f&A2KVz2%kY@B$}cxrs{sdJH;k9VR-c8T1!+D3keAKuOwD z#0!SvQFh25(sIs>xKc!uN7>)` zuU*Gs&}cc-MnX=0Hm@$wWN2pA3y3JJn$r{JGPh(WUX`=^#a#mC#kz zXhcYvLgKVdx-i2eRI)Wt;#n>w_E1n79l19-)ZzSf$W?Giw>5^!I`Hts4`#A|--gHO zd86zox*6qD5I~@>u~y{`bC3zi&4PG|zNS%k>gHXAe9@C>JZeoN0p z9Y-+@>+t2{iEu@8-|%d(gjB15Rm8oiap~KAI-?vR&uRL5d&eTTqy&B+S)BcpW7?rB z1$OU+=IXEXMMB#~P{V1sy7p#jHNm7CtIfHGPsVM0u09}jeRi75X@=i0N^yD1<)?jk|;aqGV;uuGjgTkz|%% zX;BEy!BD7vum6)Uso;prBb2)1Ooyc(i-PhIWDn~+Tq*_e|Ho-Fe##e^lUP$zWPi}a zG;l2eQX!cb5-!_taTTY(cp}ZZhA)HjOZs454L80G5AsHVZI_2thaFiKTKh5g?(N0V zN3xh3dY;dVn`v~!>uoEM)UL{EynK8wwIs9Z>wEuOfL?h^$jz^;SB~5#T3^(hm{)|; z2P3nuzDH&|9Njjo-Sg()AKPXRq$XV2fCXhzA$w>^kRV%|zOH)gp-zpEruVEH*TB+s z>){1mD0a<|$t7Ne==&X}z*SML)xxT(4nQrY7myo!sI=-o;SZfL9-M0+!1lobZ0r_F znF9JhOi=9La56H1fzQ*SB_V>>u)G?frD{c&BP8X)tmyu&q2RM%zr|B(a6?EjMKzqUrwaNZGkvI@JRva)EW8f+rLYR1?Io3l25`Ng?d(ALnV zZHH7qSb+1@t0_TtnU(h+{nr?@@@DJlT(OsLq|h*F1EzHc)B7V4aHWEXo!5$Ok#+L5)=))x<#uhs}-_&5@>8S>?P6C5w zf{`XPczTK2{GSNxM$2F@jzkW9kU9eb_Q3aR z($D2)_h~OoS6jEqOR5K$wE!XlFpx13;x__9`82x@-Tt-yJw|;#;A!VZ5%?5DA;NFc zV&+@ByZ5A7piUn%Rs``1u%!V&EGp{7BP{&Yt1uaw96LYn4LH`z(q1Bf$tNTQg#t?$ z_tWXW?9g@qDhohL?=|zOh@7>{Q%VAW9CP;gB`_mU>!ImCXdz92t+A%Cx6S!`vRaB0 zQWU+qqXX6s9E+A^D4-$7oc{>@c2MUJcp&QePf8gaq9j*=8Yz@~RYcX577Zzp{LT?; zl=ytt>}dLhj|TcSL)`eqRgB;&#`c)uB(QaiZ8k+KZ|{$$Lu=yyIavjX;+{qEZw_@~ zmIYk!*%La7qF*m<>X|mvJQ+ZY2hQJ0DfE95dVX>!JWua=YyI`h7Xl!1isBq=Jz9F} z|H*VH01?0G9l{BT1)+J9iXwf$w6)0{nWE88G|&Eh@DZ|&{~vH_X=nq*rXm zAmW&*v;~Ib;htu|#*=TanXt##;OlTL#Fa6<7}ZW@_7ydI6eFP6y+Yu+feA?{CQ=HG zp1F@qNKJH6i~$qT?CNpUQFOE_KxrM3+aJ!KW~yiIv;Gpi=qa7yl8}3HoC~>#Q~g!d zkZM>Y?)i7bQ?Nz!#ful)TQoo_AyA7CvLa~_u4ZA(j_h9*Bc9Tuz8kcZDJ~riA2i~a z^q#+X1`Kfi);6jDl-GR@Hx@o$nVnm0ZTC@pNI}Ca!Uf3g4o83-{?WnIrHJe_4cJtQ zbj4dK9p(H7Y1GErnn|DQ^W-WxMLF+N{hGAkNkNbe!8=gS0bTQ1Y%u9{6^HtSNdeQi*Ae%NC~<Xq0T>z|>1z`yS6)b#$F z9=+F5`?|LO=6u28K3LTb|6m(1K0K{z_L1AK->A#?oq5A>vA5b0r#(ZLdz!pt?pD;4s6o3gRNS!OO8>A!=mVql=wK@;N>Dap?y{~ozmQwRH^U%{1d+_d z+k2R;#(lX%6}SV-%`^-S4yLlu*DHD^+x^HP3Sa*$L{K4mUncchlrfvgADww)f!$d$ z!W>cyv5#AIb)1t=U;8g~Pb(|=S}y$;4~XAOUqGVqi{Z?FlpM2n-BkTVDT)k433bX`;F0KTE)TdlyE`1MtbfyfSZ9l7SYC^GJYBQ<4 z0_>EaDWP+q*#T;5>IRo9jQn{k;B%at0+vcf(EkXu#QEPYEHnB`S;6|Go!O4IaxVj` z(r?^`4kfWLfld-%S7e@kgqm7~Sw~56G50N)HAGf%oOJs3a-}o6{>~P3YS)*E!&@ za%u&_E^_V2kmwP+>pnnkPII5!_aqnlH@brNF>puR>{N+xR;hyHEZ!npHc7C)q1^NF z)<}0A=1jlj?Li>E*FzNIN_$I;6WAIztt3; zBU7NsyojQQ4M9@!7Gna_oWGM0Wd$Yb6Ld~JK-9RcQS7-i*VP5c)Mokdr7ud`sia|D zafl#$7qFJ5iTm`XO^lECY+W5h-&15y#g``>B}kwth~vh3w?wAy!X1LKLepcuOst@Q z@SkGxsM9eZCoL=)DaKi@g=|HaGCPq>mzKFWWLio^bB^r+ozS<;N)YIJft3dhHV{=F zq3;p(RT_MiXM@NMNvSu>$kL4!8fcGfvzeY}S3~}3R{^MMVkgA4hOk3>!JAz=j?)XY z7^^{G2)Pt`^=>NcS0%uMq_zm^E;Ryx%yNf7Y8XfL{GPB*$So{f^zmipYcn!GFn+5v zX6%Eap|`gx1&#*)w9EskP_&m$R%;kR1ZMa+q!UsWfl{i0|0AlK#f$p7&er`@e+m9} zBKE7{epE-aCWS(J%zX4M%lE>qgmr?MY&w_T&N9GT^Ff3QQaCPw>Z0LRq5frwW{sm0>aCg}q7g zG!WN2Kfh0v>u7Q2W4*=2XyHdF7T)wcABCf=If{#lSaU0~L~>hk_7W2%VN8#E2#ql< zyyN2Ir~h?O4-Ly`y4n5nwf|MD9?dkoT`?1J+xsnMLv=?3-2Fe8NB`i3ay!iSKU_`4 znWfB{71IG{-moM5WBxoT&UKUV&cc99j6)UVQY6wKQMy1q{&MLfct~1DOZ&5WG^>O@ z6y<0A&!G*}nukuPP*0k>tgUC4Vh>jpW~>*ZRhRK{RTdo+{Jp5IUg!_}-IWN@W?-0Z2E*Q; zsnu0Zd;OA~yjI99f;(Ov?= z{wxV`NqGG{mp6cK6+Sq{r-^x~-IpqaZ|1CZbW&)GW6LQFUuv7Yh@hr#n(c)CeA+8m za6$rR;wPKmzdtiF46UF(f%?CC z`jWNYibI}i+KCvQM(F?T;`4vIKqYM711kHS36H-VocGd83+}zuDm-G{tlihP`G!{b zioG=S`YqTomgO1}a*qFPP5F`#h?O=B$8Xd3#Q>bZAC~dCb!4s~HMg#Aa=zT54r-8P zS?SIM41r$ccSTjjcuoLbq~krHx5_@f_Y$0Z&Xa6Tp;9LWlMlJ~fgh}!d+`n3CHfIO z-09vx>-`3pTkF|;&!GT|+LV}<5jgFXMV~AMYFh^R($|Y)nJp zlWH`8uP38U%6w1X)}ac^^XdWK&fdM2Q*QoIZkDg^1~36j*aDUf00tLy-z|$vNSOXN z@Q_ZO@m@_wNl2KO;}5=#-uv=@2g()r+&+l~1oBXz)u|+YS=0JggOqgJ*vVx4_fj+m zArVpTXEY*z2INyT@Juv;6;qDzXxidlfLZqW4zNW)tSy?$5ce6rY;)Uyo*TQlbzZ9R za&Z~QnPBbg>;NOg|04zjb2_v^uHSKy?%3B*j-9ve^$kkNY_==1c!f`^B>)o=M)h@JM+V0w`H z5t{NO>m(20&9y&)FApm@cRdia1|^JV)44(FY0&ZD42Yf8)lR?z&ZQXC8R|fXxsvYb z<3u2ym?TRE9+#*`qjf!T`Sh@836Yg1Viss}h*9^i7z3)+b|E zJ}ZP1P;y0+VaefCaZh5&wBvXm28&|kS60r9(UBT1*4Zw_oHFE<7cQ-T`~#+H8hq5$ z{Od*x^TbLy&50>y39{|605teUMDXMJT; z&ngW-H||pxp1uGf09GJFAd$2H$Nhrd$-m-0DmUAbo}S(tiDU4fQXqr09W?76M$WfC zXq|U{6Ho$}$9t}YX`U@MP<;T?H+ruL;@mBs9iXPGgGWypNqeuc{>&=a`WBa4L81RG zkQ$HR2;XR!03zbu0XW%+A1fqzaNt+(c+uq8D9|m^LxGKzTcX*^)AN@m^)7LSVpc^3 z9eYIH8(DWV27rud1~_!~*;yfOUJ*M}d($9(EIK#LyHkH|X{pwm3SE_O18@)}@f`;5 zCH(S7?AWx=o*m`w6_=G&X>)&u!?_EKi+{VCpkvcni^w(C<>4$$aT%hG>pZ?+KZnhM<9F80C zzAMO1mU8vhqR^isJtUwHk_oM@on;kfC5e;xVbzR{zOjRYlYU?HocB0yve)c< z5TIgto4naoRV@97QJ%usIz-S;!4uZg))sJBwPTILCX6+;oCwK&;PS%M&cPwhQDzLW z4t>P$#9v}t?(FOYWchY!e`|8)hk#$TmcAOc!e1aTd@kOstFHb=0wcT&7-#@0WyU%} z*DbH>tA?6d+}OkfcXQ9jF9YJ)KLBFp;qb77kn_tj4Q3F4@wKTb00*faa`3A3+c$Yn zughN>_}Vaaf{0%!@caj>di@}Gc?zb{4rU#0x3P#8G_{WTz4mP$Be6~p4Fd?SMMW|o zc@qr9eq%a=WzC+N-yC=jLGv0qpXI663syq%yXhmswGDdJ6cmTV`m+IDVRk^hh>*zT zDGH#e&H-8%0_q?A`8&J)YV$?Oq_P5jg%oc-5YQZHn`i9eGCrz%0qpUh zK-lNy&v@WT1h_K$A$LUB65lXE{{eMPr-L}?cVmFRSyN~^U{Mic=KLVe9YEp>MC-bA zpJN3c$O{plfB?{(+-wJV9#it%tfl5v4Q;2Y{o=2)LTDWc8xmWp$_eZ<3$^`q*z zA>cOpQ^U{3KO30S2(F%r)l5-DC{!8Y&t{{uey7+pPMLvCCV@VMo0p$&NBK3&#u~j> zb7@V zCXN$q+w|S`Btj&f@wf##v9D=$`QA3m)h1{jL$;M^`l#j5N(C8;wlRSDDZIQWTLJ%nE0VKrF;Q;uI&%We%4K8?E$u5veo9}6Q8=nvu<==IombW$6gv=2>M zTTB~itbgM5j_BW3^+dT)E@_Op>-ABe`RUWprA5ZXq&>o)`LKK#5$!<09nTJ)PlurP z4@WpSIE*_lCX&q#qsn@tR#R{eSeUJ1;W1YlEX)%n)++GO)Ku~+qbFN3+A?cSv#GKt z=>q}mDhaz3PA0}pQY~a#7x}lDx%|geal+Z>}TuqU47)@nY!afck#X!W?x!v`C9C8ocH~ZSkOz# z(Nz7yG^Ln5Ko(sfP+=v-Ez#3gbF2Sx{#tv&ja5o%KcVvSw)VJF?e(6^Z)IJ)UZ!%V z&Y2miZKas38KiYLeyqPqrXjIkd2j2BUH$qmY`?DbsL1s$ey)V+`^Qz5pM>50D*T<_ zl2edEuw8*iCW)>iV;LQ-GaCjIE~GO(1X_^k9=`1eQMFXf>y0CI9iJblrrccNN)rk` zImaPfo3ry9vv$qogDbioYh|BSHwQDSkv7eoo&VMn&}lxAttBYt^)Gscw51e@4mum$ zH|`%ft_5`K|E2@V?vlx^_=nT$*kdzVg6)LgQ|2qnnq4g**M8>T&!)_GqYW!ZxjL9waI%Y8F5N4JY7IC$f$@_tAT~vNVlKZS>au zlydZ6^lVQgOmz$hGnFaps4(xs;YT(n>GbU^C18v~Hzn<@t8DIJNO@-E^)bTzc)bi} zF(@Zk%%lyWu#5z3YH;d~)m{xT=0GdFkG2;x= z*JgL(Sj5(BTfRH0&PB>KJv9RUbD6{I-Fq~cxbJoGq-6K#(=y>p1ipK)XG%`R*!{}E zgKA(lf+Otd&R16zId>^T?_FJw5rLLRDoAG+{cyO#&Gle_W)U2Ls(H+xi#AYTz9!o zCJ))z*glnoQqUsTLHqyyjRdZ~{m|1fL_ZxdjUycZr7wR;aza(+_nem~K`zIHK$0SO&w_ zOrsT^0=)A{J2MU2Os{s-2P?mNjJk{9J60Th+O*mZlYe>62Ldd)rd^5QBueJtd8`(z zSjS?T8B)N?Da0u*jXc8sZ6Kgl8+qw{oke(F66Am}#B*D$iqr zmqyS$bN)tqaC-gEWpi`~EBw{%e)EAYxzA1U-;Pc(#y=}FbZ&ztbCm9V{gin9yivDl zBXD*Vatz`1XC;*hCQey8Xp||Mm|bmc;<;T|pjrT0@69VKQ4;~gRAmtxcWW8xfn-s6 zR_i0mQNL(Pa`K}Q?fe;|-`6|EPfOy=-D^SfSFWZ4&4c>u5fAWe4z{JHM;0cHB_)|@ zNLPcHg}L~NcR8|8InS?yj(aybvIh=lcr)Rc5E#}nVHM?7Q4jJQye z^YIQBI{wf@7azv=+z+NA%AR#M^NVpnBjWg>5`hrFPZ)~>-KV%-4B#w%{ybhYFJRN( zsYHx-lF!vIA-&N$S8CdKAIp)y^?8y~hLr6j(Ltl({wEACAXK&8k}xM!jKb*qWsY}V zBU-$v28|Xx9UpY-O#wFRI1Vx&g{4xnceUsI{dq`_4Q~Q|4su9aw}4eZ?>xYk2oM#3 zJR<+a8}UiXs4P}6R`S{`dM5hw7rQ$s1kA<$!?m}WNAtKpi#Y98hI#D^(LH$Z;5!Sx zSb9rkWg8Vy!fM}5c()+QZ;jd()IhO!(lQ4O9lp(Hxo)f|hemic?ah{o(2fgLX|K-y+O;`sr_4VJXdEe&GHLbf zCD3FJdKzHfW}2^9uctUKM#LP7unc-;1XXMs=6t908_y~1^!1-5clGl74kR~;Wu}5pM|*ZJ)+tm=Yk*t|3UHi&cmqyXjtf5kvtxa%uzaMt7XWVNycjbFsZ{_D7mb2vBhX!{x|KQ~!m!2FT}QQlc&T3LZ(C#*a6Vj+avzZY~i%+^Xz9!8%ILJdT1G zt{47_CiH+$>HjL@v_{{0F|+;Xnq%bfcvX(iXCu8@iE;loIkkt5@}KBw3B1e~sCyr0 z4U6-d8xD^0Z4CB>upU*RxENXKCs2|93{gh-Mxf`-UA!Y04BQ_VfTR$25L1z z6n@1BgNMC6X)->`8^6>ZL(heXoD(!Bxx+txe5+#4QvWrwbGdE!qfC4mdc%F^SXub$ z=1aYFZ%YfyDFz&z1`jo;|3$|cCnqH+15+HxXwS@{Rw#PF4IS4L2f&QYLjdwEt7AzJ;^kh%oz)?h$fHe z^?NKc7zo+B?9XPtt4C%!x^sJ{WiIsUU#I5V^_fiukLeLQlkxUpbxHQ@PL+M{wLEIF z>(huioAg#U=*QJLO`t{Gl)xYsV%-uuY;P6nTdfE|hqXw-5EgDZY3H%;)t~q(Z1V3z zI0V<%iUn@9SE@3X-VZ&*?rtA0~vcB zSMXNHzB+Wh>E&rhx7huo1HV@*Rp!Eu{o0S7sqnshx1w(#0K$jlpY*!!y+#_~BDc8u zy?fjnK00zy4E-$RE4G5wY%&5DEfTzBXrX?W0EFBYXJie3wiE2U?pxm9_s5Cb6FmfEcehc{**Mbf>E$U$f$H_S{pD-%%Akdl zda=$vP#1>s#o2=b>a9S7B-_<-vYF3V$p@0CN<42}`Iy_L<5lFd`|xkiO+G~6^vVF1 z0BEjFVf#7%8LBT?56~ZS5a5q5%>Bl@SZ-wa>Q&NnUMA|=#+eg4(^a@%Im{xF>Vk{K zZ)1Xe$SHyTnBn1TAH$XKAS?dWrvT{|!0ucT%#@e6YSo-74{Gp{p&_H%Yn6_r(;NR{ zoZBB4*=i?0=Ho`Zd|NLnk-dvsdx{%tcAcJb3JO|JVB~;8ymf|&zI#Zg6${g48;?Fa zi?;n+l)me8`=?V|8--J^Erk{g8{U=?io??c3NE=z=4RwnW&8ly`8v`LMeg-rh?3(^ zWx|?nv7CEqzV$6c=e^uh`b{@BU}4cZFXjobPHVExDV3Bh1?d<;rWn%%bZ6hf+3VD! z&~-Q6)jkg1?oW{VXLA=`46lDJ*J~l8>%{^6Yg2*3EhTNYJPLwJVF$5?hSz>x=ELK8 zA7pM)iD}%&F8(Ke0{!7lI|0hJO6>OW+b2^_{0iR)=!!Bp`bPwAix35`&Nttcc?IJV z-#CkSN&x>FzJt%Txl)M#xH$7r`8@9(9TEU%WGUt|^`ZX^s{Q}`ZjeL?@-Jqi=#eN# zCs(>Y_*z*=IMfsK%v83p^%7a2AIdR!QAnWc(D)6G>N@K?^JDf=s{P*&4Iu|SnEN<%!iAP!B06m z7qDwF_gQ@1;QZ>X`8wE%0QJPNx#|z-3-8wO0{33V97qew2VV%=zfKPK3Q+X~@+V2M1eBCE`%XZW{kr+vd?7yk>$FR6&iC)ok}4zl z;b1I4=1V`FvpVvv0+2t@ib(>^@4K#NQ_jONrGlL2Sp$K#tvuik5&{MM6G3L|!nHqd zM0}Y~QX;$;?zy=E-B6#Vib@QNhP`cRiN;PKCQQK1?3Kirm;ajY?Zn?sNMEyktFwG( z7wX>Z&EI=!%Q)P0*R_y=@GYgWq%)kWm)(a4i|t!2GO|xDE-#g?!#%4C3VOznT0QTr zVk>L#nl^YdvqqxqDmoiG|F& zw)|$&SbM*M+}~U9D?E1+=bz`=x<;)1CUt!@rDbI*YwK&fB;wgXMo$OS!@hw86%`Qr z1a#Jxx@gB}ZWR+SXEH(|Cnx7xx6LPuCu2$mnqZuf=-85_ExRo?N#r|2mBshL;P!IS zIv_HsTD1fvji$DCnsI>b22c$Lx{LH0ES0Pc#6YNErfv5EjQhWS{hGHU`o*nJYUsh2 zB{A;QvxZT=_xz(cqcXGMj!=Dlt3IJEX>F0pxt~6qypJHnpsb3bS$*T;qVxeXqDfvI zc&f*O(6-gae3%8u#^29p&lu_WW_ym#?xSa}KR$F^j~?jUW*gGZ`mSnD7oAm%1jOsO z{1KGFQlPU|d-%O4h%9$@&Hy^OT*Q*qOBQmf|@_>1N zASo$nR+BNn$!6AW7C_yJ;=?;8T?Y0C)MT;^C9lJ9Q;T>5U6`o$T89Y)nd*@qj#$|U znQqn8*L$R9WPEpd6aM`9Sen<+l;bh&fEu)+fhdD(ENv%p_9=~W^h|vMhdH)?M6$pZ z+qr_Ou)T6d@ZFR@o&g1ijyCS``y8h|EUd&(_w_x877XgZmG;Sp#a{up*4U>WD>1;G zkOlM|9dM=E>P?|q?*+i6)1}ImXJSqG4Ddl@D;u7yZg~~glnB$G1>a)qf>GcV~ zlvd@lAMjaq?rS<)CTp6^5C~WY;J(sQ3@_7u*aZT5XUuylEq|841v z=c^*WfB6oLpsb_#$!SLd}9uq5bLV!=K!kdy!(GEQ z#_`E6W7{kGpg}(~Y=D<}cNe)^FW7WQQ3nH$8KX=0oF~PEC({fJnk&s!oCcIxC4E57 z4u?rjf@AJ+7U-z=f)y6DHF^$BO4O}!L|Yp;VRIQc z2MEMutG;&^p-Yvor24tSP^>AW-sj8Au&J=u?*ZG4i zKuSX;gWtHcdouVX>b8Pg=7zA$fglZfpKKG`BDzcl1K09$0ynP!M`#QcxUA1%$q89( z^-kc@Sj&m=fdlE}WL}eSDZFgwx=cGFPYwMAzU}X{^uCC+aDuN~n>kA80)U=4eSI9;Lg@n=$D5L|Z04NWa>NnH2)wbicXBh(7 zDjiYmaUavbVUCmJd$uvY^m zrk!#tI8A#Rxhmkx|!5<7Va*eU4!b=$(nQ3}HOHKyh zLg(yeesi81YOciGbHMu2Rxj91unA&Y9ryYoSV?Jb(2W$^1|%tHp^_@&mg4)8aYR?~a<%i%c{`OU0T3GRA4C`YLA)YiY0og# z`|^{>nwd$pZQJXFKa!5i%R}X$wldU-D=Z)8{BnfUiE>&mv^BerkH`GWcFz4t=wF%D z=)VfiLUUqjrGL9_g=j%^r{JjFL ztU1yWd95XPQX1x~rA?@6sh&Ki9aJk-n-4;o5{Ko$nqH@}stE{#V+`bR}^d7yLCp#(cs{y9LGyYifAF-Dn&i zchT^xdmxg)w{rCJZ@QbIY2@SZy>K^o85_Y^SD^cXW^aqFuLiLAO`lkeE5tl8(;FQ4 znOYgtB=bl|Q`17away#wBhveUdrJh^5hDDmOtd9HGr6x5bjg$T6ofDNuJdE}bbcZw z-A!cH{tgkk7p;8q{%ASOJQJ8u_g zG+(NIM!devXv#E>zPT-|G>5Aw{N-Bahc3a)h_32ze{u`8vd-n|!KfZ7-dJH+ptQLd zW#A=9f}9@ilNZ5j*E#(wkt@=d8NzJYywiKHFTPaySSHH>=*o4)vkSR!(Vg(uV?^$| zQEN(59S+h7Z^#-E-{-eeW*p35fCY!zg;r zSff7mUL|CVf&CNGaraf)pLhkTvDTr-YZ-xsfg4T6Q4G8G>(<7uZ?)cxR=q{qU0mdP zW_k^V^(RlZe+kgdo8j#T)UX)Ic1+`t9s3v7*VNoN)5KHsQBUFC9SPB&1-&7Zz*bmi!^W>D6{k93M$)6u1zosZ`J(~T5{_>4s`B8oX^9N)S%y`!FxhALb zSSIYpzeA|3LRK&%up+o$K7R~Kx89zEg0V5J-nnM4omVsyY5Gj_rg)>hYinz!zH^`7 zMerzdPNU0slhF-l?ZgQnG)9#uQFJJ|>JJM#%%7&*Vh5sQL&!etN++G65{Xbn2z=pk z!|JOaraxGoiQ@Ece|=Iyn8&I5g|;3Q{DElD?`UW;xlK))dO%w%wO#p$?X^1qzJo63 zyw`WSD#LaF3O(7{&HYBHYsbi_ZHDJ?>gIUlHlVMg!^!O*N2Ak0)NQS+!8Qu|D76Uv zhAD=gM3_X8sA}&ZwGrDP4rD+yV7X!48xn3OdDx~kxf*hc7v+b64SD-qF3t!`mbbFw z^b(brOrxAqoJ^BZwzS>zXrMJa8&I(yv6UkyyRQ2y0%DEpRF(O6Y#ba)WECeTjogOV zT0^SEBAKU>z%5(rKTXp|H`j%{H3h6%+GbZ zgX!(bN{TJIN6~BEzXaOC-V99sJaPZsH9#(#euTpSJM1`Zw7>1)(4!|cZuT&zz2-DP zjfi?Pi%%w5ek1k*kt3LQ33}V9mR#^osnOrvIaIXtAK<$*JmQ_)n?c-3EYL{K5Z ztNc2oQDvLS0YVhxJk3UfFvYnyhWjOZMU#4<_#IQ{?b^aE#Kc~`sU4u zVq9xmVWAO|wO8FAN75M_TxmQu-)M-m>V-6(-yd(P`bLGfqEiq=WI-(3qB0I$cvgT4 zmHsPQ)sS>GlpOg9-r_|R)Wj%C-l!)pDIgy(PNP^r4lJeI2X9EPjWqFmxw^%GLCQME z-x#VW@jcXH-tVY+*ua`e|AL1dNNGlfbnGVNdYnt>rNOunoXOIzko@*Tg|L4*i z@(}$3jmBd-;s-mX*_&b6?b||zLicI=1jPMY8%QZSJDdlaqZ?e!PQXNugSw%9S5{zB z4G-Bt2MT?*2^7g$)zGx8F**f+)ihH%wPp^J6|l*aFz!sQ(xhbv9@$@*7tAzWs&9^1 z+$HNMqXIl8!x}Zhc%vAQmc!j}8894zovgCzwsmA5b`j4kf4*F0lAn?gJP;7%z!sUa zA_t=4KB7J}*o+miaxxTTH|Fa!x}uCUWc`5Bzr@yf+tg5|nL5$qgl(ctU(+j*Na~ZB zqpTF_ZR73%Gloke!z`>wpGIjV(+y6JyQC&?Jos|l;G+^-{GnPINJrL!5q{yZIB_iz{33j4UeS=0|SXG3FV zYO7eg%p*^UfE3LD)j;M?v^0ws(4}QqMge^YkL7MRGJnM0BLZ_lZJHPyjFtracrKv8 z|MWo&!G|ITkq=2SZf<3u`gLiZ)ZC-sl<@|!H(+4eRRHvN7j3|d^c=@OWNONTLkW0_ zKz4O05GzVcOE<3%0@^A*6j)2m&67durimdx1b92untYpq+jH}<4~>Ez2Q5r85vX$YzYXHa@8~B+9g4dI6@`tEeb4yY)s146syR_c!Kuwz$lJ5e>{=V z!vp01%b$-!HZ)&pX;DaF%QHQ+mHeclGWZGi7myQ9O)Xs<+deyODcSw@y3T%zEzJGq zN=l`xwEuX38bm=IFO72`Be$>dLLerLHixgs9LGUa-5h~DjVawLYBAVBv>q+8MJ~@m zMprizXr{ruU4VoUOl${w{2%5vKU^WLO&=?-u4DGOM#OfGAosp8`@kq!dTO{lS&}szb7Ifxb>skE>BQ5d%V*U3N zXI7P zRo$mEcy81{o(=*_feIf425qv*Mlb{ZLA5mXV-6lNKwb!Ym?&hUC-H(7LXo`%fV1;M zgt(?u4c}159{sD*`zO;2FA;7GHo8X|Cg^l9?uarm)oM=h2kYp8G#P1Wlg%N%?c2{A zy{<2L2kqR~hpJ~uT~0fH-BPU7w>@SlU-#9CJLkt}$7wrwfCIHUaG5yA=q1g{&5;OD z7I~-@toH0B&Z;dEga$kN_sw6}<(`@!1H&KWNL9`kIo?Jq_XdntLIMe}k1Z_a!V?qU z(90BZl{V(MCX&$6i`=cLi1Y()8t>L0$#L!`eK_g*kO-`H7wn4QL*(ruYxkC5x~|U` z3%tKnmwejg;)p%hZP`hR)0VK=ZiHYW7T#0N=K>y?>CYL$z6X zYQ|5ZGalq*AG{*W*gI|BulD@SKaA9l3KMN#Kx!|4=3LzNdod%&_b7CjRMv$@`Z9?; z5FL($iZ|u}bipKd+YUnUa&WA}GMxq@WTD6qG{gS??-G8zbMDc&A>%9^D}`nQRgA+2cOwTh-p3n87y541YClyR+eBuae7tBzvjZj_5I9lnr) zJLmKLmouEkb0hEty@-8daUB7beahqN5c|Q_tP0Dq-&DPet_F)6ALsB^WK0_K=gsqa`^g$U<+4iidEX_OGr3*Y5)P#$yMGVJtzNzNBohm z9k8b7t}^%>iAD1Qyj`(M-YCU|gz#6DqPu2KKLv*I$|?V)FQEXe=742I z`u?r$r&Uda;*N;Wjb1|VssJZks2%O0Oj?#kB**GoCRz?`4eu9GOfb3!IeOm9AKZQk zxM>6p)&IqzzX>D0-G~wPH}1?xD-@e^5y5UbyiVWCF){WP%X{;TFzwUFZ(H&v_)}YB zskITimF0D`c)}^E5OJfOc63}teO<$o$sKzTJGoP>uJ<8B>xGO9Q1+)%&mHwo% zdaXC#*GgN$)fH#-j-s=bS zJnMtd=TP70h>)kVGXw##SEKe{auFCCjeAFpfcSFo%tC+xmU)}>>1)ux;umN%wY8lC z`+J!`f>uB}@CVG92HQ4wA8`&&zL%l_)U`^OJ%ai$B88|B*zfn9uWXIhDxpyF3|Si-~F4 z_sIu*XwY}rrgItuE)gJWHjyTH|8mycbv|9v=Bu zugpL;889i-Kzq!q8}&dzHzJDM=S+Owu#^P7l5Pj6`S1wTraxZ$n}0ZZh~aub%f+3y z>fLdIbsUCjoj*tLN8Hp@)RCb}KNb|{m)&}<)l@S*QR+G63u<;eJnFq)g%+NTl8 z9M0-S&SEoZX>M{p*K7Qbm0_jrI!m*pFCI%KOw^eh8@F!%{h0<2J=65uyzf!)4IXni zC@R>bZZ@JM@%(&b64cv*2JqE&zG;qTZ>eDt(B!myqy(GM6EUWxR-0BM`(?B-iqBJ` zc2m7;@%(z|wkIcC#yNvQ{*nY(A#+{$L3G1ZMCSVNv|5Kpa zKihcuF&d;wCX*jmCmEO=z|Q3Pc)!yg9UWyla=fKSqr#@0#w7vE@}0G*5(TkY1g#M6 z_1|pued3B-*rthOWW?@HdWG#+B~AE!ySHS0x~xu*QOr zuhty4r3s5Uf_T0HR|M1Xv5Vub-gm`Te7LHk1bRq6qNXo=kwUDh0T>o`tp#}CDpS$! z4w(O@@!#)DU|`d;X6oymxM@3B*#RyQ-`|wI`^)H5R}{z?(#}R)&Ak3EdAwkA&!m66d_I$8ATCB(dj=G-{5P zj1YLkn@X@lp`pnJ#2vho=&g@^b&N;^B8}A4$wc2-S^WgVY`Up`r{8}U=Y|^z`K?EU zyY2DA=)MVhEu@RoeYWIP@R&vru+5SC)x#si>#E(G#pTF-9 zp0xXqAO1CB3*`go6Q@5eXn)^a&$@A>UPe<3(YLqf0M(kI6JXlOp4>I@5QPZ%pNm#z z0!5$JQVyLfXA|Vnx3(!IcKeO{DbIO$E_i{Zs8yz^z{>fe;CX*oShXXCIKLr=d3cM>>cos zkry3VwUP>0e=b0ZSm}H-3U-l&EW@czry9%V%E~vcG(!Q^(hU}(i1Q%rEtskyJYsm@ zuq2C>D8im%S7!F^31h!PKiHRcmfm01VB>8s``d7<Tw#4>Uhy&PfD=Bx+Dzq+JR&fz2a$Z$Kh0B_@ArDkvTlub$sRX0gX z-g0EjiNT@wQZg0#A3icWMqePC$s>>8Z00}U&Pw7S69&)V?$?~dR36E>Q`qIt8u#pp z!U7^yUJKC-Ouiv*;)?4w@k$ECOP)dyGbjX-IJh$gPON;ClGkOeO1%A?}&$-9PUmUJd_7+kLQYhR2|xO_#;sd5Lvh{VcJ?vh_l zJ@VDU{)XoMk@;oU;-5wT_@2Y15S>|+CoU!G{?ZcqT#KoC3{;{X8q)0@bn=4LNo1nw zhe^uFd+o_W?88f6{GRd~6cLP2JOcd@896KFV>P(O#03q%suN9*y_&|S?cr@7xlsa3OkusJGEcQfjtFYX!lh;cIdG4M_|0}%X}fE z${}Uhl09BXNdR9BvAnI31YM218ewWR=ZAv&`t$Dr9qFajwq^%J{yNA#7XrvgYI9s4 z(jmBNBanAKL{jFh*9gldn?ES{faz1Ny3a=VeOb+K*JPvIpS$`vfR9kyiinN^kG}_I zDuJA;Bv^RorA=8tshz}O=`^B z{o7|IY|x;l_6^dEnJ1mWo$7Ul%#{5%p7)-E z-@uK7fDnL7=yjNp#itPKQ7*5oV6+OhQrFZoy-I%pSf$SqRWsr{)P|67ltY07c!!ws zH%b0)PeBW8P`?uXTV#S(Xb%Cl5<|R4xzf=@ik_1-qzkM7d3QxF_#m2Kelh6t_Vd^b z)Ky!PStwJ77&v#+6M>^7;ttSvi>)U-Qg!?u#^J>l0jl&niwwcG1N4=8Ma36wPlx#~ z=|pq8+}$bQH3dFW=0PdJY6sL*_)ftoNUgpcxwQ82lw_Gy8zGmh7tvp3?GuoW3jOHZ z1<4qU)|?$m9#+GQcTZJLVbhyg7G01q18DDyWbt+vi0)mLcK27Ll!%Y4P ze$T|m4|U#R121V`^DtZx{JdtL`-nrt<2m2UCQx0r_=(=F9y|&P3XXR*s1qP5=o$cC z&KQ_h_bvP=2UV|)jlrD2Jjm+=^=2QC{ew?Aax;SF3Rsu6%UXakce7#6B(u!e*4FS} zd7OSaBJl?wQ8)L5+3HKK(7@FYP@T%UI#VEEqD+g0!4S4hifp$zk3rDO+NgVahf+g5 zHE5gws&nv;2NI{FOo~`r?Wb;_iUq~>2YP6a8xFG!vx=UovYH`}qO!6F93i1CNw=kf<|3l{c{r6(r|7J~jPm zr0Ojx-Xw90H()D$+QXWBBb#|WcguLY5_8LRKH_(?zgpyXvkkX{Uh=}P_p08q8sdDa zL^}s~0#;VmkqOsvkfYSx->}m_HJ_J)JN^B; z3TS|VFB~0V#W{aGos+n%0jN20VANJ_pPv5Y0r&BN)%@m-K7S_6Ne8~y|uD~A&n_+l_>0KO*$auF}S7{cM#i!&EU~t{ z9YkzL9VjFwZhNpqRbbJG}N|ck1sIc|kSrBA$71HlNft@`N%TWuHLjb6)w-?F~hX7S3cT@XF)-Penr3 zi%s{ktguk-9lGx8dODHgv}^b%P4P#Sd2LGxjU$!YeTVbm(IogPjU9-d8yzMSe~Jr@ z^}BAqHJIRSlTp|$oMOEl#k(aclsCh!2*F<$9XdmJ^H*#^{Po?Ni%;}1MD~g+(&xAn zej{PF=51#X=lw$};{qa^mqHsHgP6mh^!a0CPh{Sb3RPlrxje18&fPp$Edj zl=>^gDn{hSePnw|*_w-S{^e!aE9qDb_Iy)!SJ_N~Q~ek8Txt{e;LM+K=Q#&AkP0~a9IhbBv_L6o4GRR?uRGMdl_V26Z%G&qm-u3H?s}W8fLZt z!nWUE*@c@l<5st$WumJz0ii9F)z!wy+v$t=qv3X%1PPMHG z2TDZicJlq-(}!2F;IAmxOR)^YNGG*DKg0JgKlQMh&nIdV-PoFwrBZ$je&t7u|4>v& z)$K<*S}S_&?a5*bL)mF&g5S|Jui0`)3;uph#iQD3M20P*cm$8EEN%{0K%Kze-dIXX zqr8weO~|?k8Fq|2M>4auTT2=O)W|9hvOz!JE1i#=Ljc=RV3G` zO0-;ZIC3d4Z@?xPOvoJIvJurOjci17c1vCTBEa)mG z<_5y2<2Mv?CLIgCIv`!G$)xGAYd*2FE*M;COB7O2Dee%;`C$U_N}s%lTaKQvQF zr`73l&zEuwrBm38OG0T$fZN6&x)<091ofnY zG4e%`eH#cl{!W7L?+5jK28;}Lg64aEK;yrvRyAbVobIc-axC$AHooG^%};YFaJ_@G z4a&W89)DY%Tusql$Fn?*GGD=PoBt089C4`&2EO3S%>iT8!y-jT1J-QTk^n&P;YBgirQ ze=;3#PUPPPJdlOj)cY`zv1v}9i0%>U!8!yzlC}{_B?3&*xfTVm;>Xnat;# z?h(FWmfOWBKRXG#^+J`a4^4Lafc&gA@y1GBN5|r|m{_Y4+ENVYuxkJXI`BGzJ)f6k zhBG8o2AoJArpq zWZD4A(1WN!f=9tt3t-oP(*ep1W8>inw-)e8FmvYTq5u_)q9@`wbp)>f12wU89lhyv zk<1UE1s3VPssaj+3#pTf3skdzeYs2UAz=6QBtPuYX3I+|Guzx6>7T5ziY?c z^CeHIECkF4$)w4gul2u_Xc+MRE#0T6lK6oRPnyf$*}z{q+wl>B0HXK=6L$+Y z$L-ReTfwgcE0RaOxzhOT%%cejdgppPo?#DrvbA80Ou+h0Yoc8$Y0=_|24oIUH>Sv` zDZb79NgFwAP;G$TBS`gidFPfq3zHKAsM(qUL{BU|1h7Mn5EzYrbgBP_8TKgPFjE3a z8Y1yG0FVcD!$*9jHi>9)hhFhW?h*kdVbLlU{{whl!P)|(#kL)tj{Ik`@1h1s$9w`D z$pBCcrpjwKfHg@OPsLp6Zfj{7jw7c8vJyOM*RXJx{MP}2YQ5gdcmJ#mv?d)D>Ev1P-?>bsL8?D+p|FOsYz9~q_4hugY5IM zi<^H0c#c?o&!E-pJry4bJyzUf-l7st`Pg$MH zv|y??AV73aMWzvHs=ayogjFwNt0f2Z-{h}~d%yky^c*t*4RMVxbA6f~r$ED@wW*Fo#1)E4@&?+ISqgFpR&R9@gmX3fBWeK!HzxXx6qjq-8I+%3+`Sd zCY@+5DVHoww| zDv4%v>ivV|2;&PP0fk5zaXojt_A-C!OJ!x{Pn{YVa=cQMQkBnJOb0ACXn`bp2EkSE zZ~S+E?XU6NqH~-$eP5#a+1Qi-4tPq2U_jk`{0vHCmtvW+{yfM9({RsGok*TE^YA@i zQ|1Z|4(@so7=TOhmO;i9#mM$dsQ8RKy{^>sn&_RWmU2tfeai=MgEXV0Ep$ z1dhZw&;_K7{{-@jI>-m{kOA0qIQO_KWD*GuulORCybkkzRaSi0koGQ7{BwLI{eB>wS1rxNdy`C1&?q4$de zbfKlV4ygDq$0WiauQ{naqCJ0(7w++iy(Y`_ud~?JY|42d6dM{An5S$2o#)i>t#;Y^ ziM>_%?*EbX)^Sm_-S@DFl(h8F4bqJ?(xM=u(hS|*F|>3_Bc&iH0@96icS&~$NQ1zT z^ImvA_w#(;&yTzuQ%z1LoQtv6Ryl@EYtZJn}YamJc{PbAaus2lB%Dp#w- z%B*ZC_jX2+3U(jr>b)oE`2ycXkjbrQ=G zmEz;R0(lLotlaMtHBg=d%2N@{zE|NI*}(x~3O_#JWTJp8`|(`s%hRSmdm4l{cp{kV z%HzIrQ(E7?xp+1;HX8L+H{CqT_C9KxvI=IA)R={Gkc< zm-qMpGA|opN%!(1r)|{#87@&6vCG6qn3aEynB;+wo8iY#?V?uF8EWq>rDQ_+M+0L% z5VH_@G@-f>=YBq2pGXuL9vuO~;8LS965y~;Fe^>{vtu#fnBMEWzViDUZ?Sluk)7Z( zvHV>v;g^ppEwMM4KER}hs*bB%)YGt#hcf^bmbin=H3&7)kELJxqoP%-vqoYhEXQVG zNItFv-Sv0iduP3@wX~NVRe)$;urpyQmkHUmQSppmi1}_D=*&_yByH)kIW!%_E$lK4 zq@A&FdP6!jG9?EFy~fDrCu^2AU#u!^_uWEQ|w9 zhXYDIuFsS;YRHkH@6Ol+#rHbJBZkVsRQb#vZhPM8@Or&KdQ+fo9{< z-{BRI-~F!*8^`^Kj5o0Rqw|BCqiz_=$+m)g7s`&WL2(990Jt~|UFufx_W!6x??L5< zB|<{Xu&4?y0*OQ!^fUkxCH=@%%yB;hd2?Uc_mI?OU#M=k)b1hiW`kR?&gE6;k8^p* zLL89MOGT055n&iTIPUCB!PpEuuRSI_0J;45^8XOV99rMXP~^=cdWk^8BL>W2qC?;5 zC-)kp1_wENF1tbi8A8eQO%*%)0ES!YOpSfb%;j-k5>7>uC1_w??OZda8MX(bMN{|2 z#DMV)fDeKt`iGoBD=X88M8D;}*&9RSk;S&0Iq#M^{f%f&#W->isI>#RtvDdOw`V31XP7!t;!PknS}Bk|(J zDUdle*vA}O^wo1auuE=Q1RdJlQl6}JzJe%7_|Vz`T6i1SEx+6mU4X>=-4>{Z&S&@F z#xNwdbVqa$*dA`a9l#I45r&tQvSLwio3wywFC}0>U>kx>`V2@wU%N_*`RVx-Pk4X- z{JEo<)z7!Wm9eB3T~UG*_r9u&Mx|EFz|6W`;HPn|E|wELOI z1jsizTmM4OMZu!Fy(lj#u_axT58rfnSB8v=MJlzdI@#ORgIk1ge+`XTzPPyfOgCEV ziarX6TshlV&X{7c`1k~s`yr?-EqY=tc_Hn=lgrX~l^ntHcOKBDFZpqn3?Q~0FQ9%N z3ncMAQUIzFfW|iWYL@t*OZ-?kILh|+&7k=mu?_G_TQK!$)$o{)uL4Yj9vxu-^bz>= zz|I>=0P*g-Uc($W$KzJoIIAJFoN-)#<=*>^(| zljlHM98lY~G=H2034vA&yC!Mm3mvEx;Cue^<&cE8k^`uBCkyBqgK#jEZC?T80=;$` zFUpINE~0VLP+_dGbkI942=I8Ad|m5S+<)12v3NcNE=Ocm^yjg_nyUksUOg~^$PhiR z224O8!fg&2(enF8V$ieXR_@8D|DCNmW1q8&Y*41!W@zKb1Zuy9m9`a{(68!2h6VH1 zB|kh*C{S@Gn7=kNH#T;Po_cSPI=KRSj>d~FPcF`>sHj08I@}yBAZGAR*y~T&y!++n zjKGbG%-B<8jomZmbkBx$l}?&H(mV|RF7Kru;?mYuEY!LcaaG?LE@!ud^0 zp*le@vs0W-qXIC9Kzk|32|e4HFgd@s>9-c`K9KmCru8Zi6l4V(nB$p1=kE_W=oghw z?{&%{ukXx1`JwN2@v5Qz=0iqCyj#isxYOD09lAa=^gTV~2e*N6EUcV@^ddm<0_LYwHj_gvTDdHy75aqe9vNvxu(W{;T zM~T6Tnum)>U5Qr*%~K)p7ER=uhCD4d0ikC=2<}Hb)Z*Bh&GQ*L?e<@-kbiou^Q_v& zp?o$i_we|g;HWW!+{WvN6j7wyR&WIb1!~MH9oGQli7wpDCCuX?&{CAtRhUPD@x=>lv+gvwfYLD;L<;nY|4$!v<{@eD9dGN9 zVsgF z@<6o3p#Is2q4sl;)*jr?8^8#_wx_s-hF5?r;V4{*Zv!>5a5Xg+fWPRycjBZB%74+C zV{%d!gC2j!7BZnp9y;U|;q$h6MQnsVVqQoS9)d-2 zv8m;#^CEuBmfG95M)IDu3kGAhe-t0APL7W=pU*qbhcKY#+Ld-8PcGh@jvt8@p4q+di|*Xxw21 zi-PfNUdX0JvD9-utAJ%LxEXmMg6A>3o5&d6a+Q>lq@?wvy0P=DW}|C4V3a9M7U+%h ziOW}vW3?D1aEBQfYDVD&0K&Njs6giR{`bG$ryAm3A!juhndW~h+#bwhqS=nJ?>IHj z`H)*%cfk$~|-IuI@!hRD3~(JKbk z&tf$jXz<7Fh5oI=?n=rTsS(Y91@RHf5FTS>d5n_tU9Lad7ecVy;0DR9rNfw)D%Y%Z zx3BS^GqRS-N67m6b%gNg*De`8xKHBn`~emOaGGAd(q#uKOH`>{7)W)j ze&c>E9b!OOP@Ja*;#qY<&WE7b>w0nKcP8A>%V5>s)13t~i88aS3&5FD^A&&=ejLEY z!67$QmH}HUS$*O5R|6b=;^g+~P{WVL!NnLyF%Ud+SX#&78F#6ZSx6v```5oj`n}Tes(>D_4gNo8MN-IX1|x2o3$hj!XFbLsDyAp zj;|Vvaf+5h7BEm0S^aoH7wsg(!;`nT0On}t`knQ}?qnkI?^#p73&7brJPZ>V_V&62 z+tD*-sQNgsXpO-bGNnH3O_v?TmZIU8hf|;0K0Hv@zi6p=&uMI&z`OsWeQT>$NuB*( z_|@P^V`IBGx{l5%Hk~FE`4mT_k7Y9{X@g1cNF`2VH!uL_5117DoM)N><*#hCXojG-1QHAFCICxDHG;$FQGadvTs+NM%S1z!f8M0};>E&%dg5sCg&f)G z81gn&Xr%(#QiXd>?-wvKNucm<&69;bPhY}{m@}FadMQHW`4uqvPMW+fe)Da}0NFAh zg1zI_lr^=4?=0zIT*qp2zMo)HW{;|%aVTQcyzi>r%)GiP-Q$cW7Dzi2zAfxTP5XX= z7p`NT25NktIClM^P+al+`EyX@32@zftk^oH*duYlBQfq;6{MD)USJ}hJ|oOzw7Lr3Oks0VJKnG!sbqw}E*|sQJ|hKA zXiQ60>SKVA#s&4ABMY&1uR3-WkB7bR^tqqu8`jhgJhMcG-*dfrL-bpO=%8Sm0Mt{+ zg&X~a^kr{TRGNMbw*CB@rUR-H7i=~H0<}~lvk-~8M-=0xEZKpaRFFR=BQ|nq4MFNa zims?erUXXPg8SqA*Y~2o3l<;QG-h|vtc|dE--AxS=lT#b6d9u3`!vI`SrD9b^tkb5 zo+h#jq{=cHCarBP66GQJ(Go5_A$BghuQTlYnV^q*QPQHLMdSKsl7MkzklOu+kdT4) zmjOd<@afM}b(!3mk}Q1|BcsxRyfw9@CizdKR$BSgh~vx^D~=<06YK^XK1A`(Y`F#+ zex3f&Fe6OLlHD=U^?>LQ8(ihoRDitDGGJL2`k z_;#u{ZhOX5epBw1_ZqQoz5pN7kN`tv+^C^)2MH^TFdPWz5g zi6b8)?yWuWq;DGx?*9^h(cN;w?7ig5Mb#q%;Fd0)=(78ueLmkH+v_}E$iwM9TFWXK zE_E$0-xWP@Z#@?nGe#+ZJF)G5HL@M5b7%tVN$^QT)394T;RE7(FPa{TVYDy3u7>L0 z06HOHOfJq{$vwz4UHGi7sg2BTQ}k2oP~SsvT_q@c(^s4VXPwPQB(ku5b3;Y_%l*&Uz#d&iImvVX&%~apbq8|pzKiER0b_CQ`V$k~l*tj?)8k;vVhc1E z%e|cf$i;ol9p&^*Mcj!OkRayO0Ny^(H8#NjPOge(0rDgfO`@092AYTu?tdT-88V(H z2S-DW&6^6{L{g6aul(toOr%&{#-D!tyt=i2lqp%&p*O{n&QKB17Oo~^COx)cs&y6v z=Z&G%G?)l>7A>Vrd+ zPNXFjgKf$q;J&atOo-_GlPD^7<}_@1sNRoRR)tuqw|;f=Dlg3q!-ObWSZGM)`Jr+c zv<%t*g;L`yEQ>VHMNo%4%FKS%UvB!6hC~^YGRL5|lem&HUcV=ntI4pBX>r(;?p8rn z!UCkYo`6&T*NQSs3RFFFyCY4VoNA@T3TKaU0RlQB4Hm2^V~$k*{z~=$0hYQ#B04D z&f>=%Wek;HbTy%)x=%Q5ib^lb{i1(q@dGOU zQ$^o@XC6$xw35hHqzCi36n#*mWLw1Fbl`R)Y->^bZU3Xza*IW9{GK2-g(h;VL}DBk zinVvkVF1qB{CuF+2%!#v5i<_OLD$n%hAXLyRu$gv7)mvJ$-0D=kD|Dxso*@;v zMxoR&>2#hRB5J?t?}M#%{i@EqWx;*wgik7AR7RGTmBj-FLgP$8X_@vxuE&jALc-E5 z9@vY;6=?9`N8y0sxI`d+G6RBR2#~$@Z3&+S^*+!a1$vH%{@ZVN<+v&P?H3g1Up!H| zL6TzzK+S&*_r+8*{c4me3%^0}!zkEDU`f07p{^Jdc@=X*9W&&&^*Cf(Oi0LF#2pN+ znR%T3EX=&4@~||Fk5~L^(k>Yf&BWG@rAoZo7i9%2d3JpjMW+jenm4`cCYO$zWMPKP zvw)JZuZkL1FH3=A3YcCn4)grJhiPpkeF2Epajw{Qze9;TWYQ`70Mo4b~uK@A4-v~_UP^aR?XXyUkRGb6-e={7+#X9L7A8olG-${422OR>;V!OY{} zV0!_YVV+kiGL109)`|RldBCn0I3Q5vzliuSSuYlfMIl_OV5C0Y*M`)yVu$|+o4%7&w~P_qppB|01x+A)-Q%#cz(BU&RKu*J0GwF8~?r$IY5(2Z`&{lKBJ{)8ZF1kBm4GF zU0iK||D$@0VH@Po4w&%(R#;EZ*ZQY&rXms&hCVA&o<9ROt;pp&NaP=K0Fe$Qa#DUP zMd-l!e*Gewt?eFg+O!>SXdx8=>F_Z7)8XVsVsLg>Xwv(;)we5y$xjGz9LdL zU9X1OSCw2=<~Xi3elLh}f9ihwd*a6IYQESI$q3jf@M0!+_In+q(ljzE?>QFel$XR+ zRPbqA@rhUc6@a!07M)3cxjrLHoLO1X*}$|6cdv8W_^|z)N~97RLv1fW`4fySRNW_u ze6hsO6cW%JzW2HcKp%zC>@4nIm#V8rVw5PXbWX^wV6GMkl+$pGrA^KbiQNGg$tR*rS8U{qdye$wEq;S41QoT2^M`O;uq^8Wl}N@<=KpE3E1pu$SrnWqb9~lOORhw3`g+$a>Zl;V*uSLok_%m4AH}7UHaie;I6*T=v?0xA=mqpETKasxgg{kLm zS$7eaWKSaPYMpFTFLq01x#Z2bWK#BZZ+0c^1=0qy_-CDvQbDxp&s-cFNx)Ti{uBn= z%yc(7SOnTh!&Kz))GvL}dnNzg=^j@ZMG3cQMgFf}nL>|c;-%}9UgY--f!QI@=^-;l zSKu?nNu+-2MDq83I{m~8bma+b5Lve7q?OjakeZSDf`V!ItHCM4rA34k*dH+Yum~}b zNndE`=E6i^$y zlY!cNNw+)_o{JA8{G$TD78LNHB5CA5;t?082c`!g%<0%?)&Q@9_a(u>a&<1--+(IY zQJbmpdN|racu7K)KoxihIMns^?bgvc`iH8l^?*>8wNF8KXMWyz1GGQgzR4PJXScUg zjvQ^Ts#sFE^6Z**#RJrUKgrMCmUxZ;C*&Ix-Up@aL!t+b=0gI&i6)i?%I9=b-*gkf z1oioi;x+24S2qTNY$Gs^0mv{QgmhW9*csf}Wsx~f`Txr$m6=F_ycB#XU-@y@)@Dlt zWkl;=-fur(qgGe5{St!Uh2Hx5T^(}2G;jV3pQBj{{dKmK&H|1k?RT=4n56iDw1ZpM zmMuTF_$>(~gf^rQHuHt*3~NNTE=D`Qe;38Z6$csiRaB%uGlgk|2Kpas=oYa}ky-{G z7tm%NqcHN4Zhpi#HTJojt@n-GJCi=l5&8^aV0ZKb;g6d0+m@ z&(ANyf_tUq7s+!?2~E_$WvYUP@nun?{>J_2sUTT=dWyY>fAA$7Z4q~Ps@XWPa`A9g zEiT!kw){)$&!7)tXlhxRN=GBV>e#00H33chzOCs!hmz_X={)?s8J!Z5L%Eo^J(?!= zI@C#|4VglIq+%oFF;-}A6pjC}Om(E}aZou)p00U43}pi?_iIiMEp4w8q>YC5x6T%O(8bs8>rgL$e+<963M0g-p5*NKbgk){zWP{(90g>0vPrIbZSh zc@DZU63&;GYxpC`B^aFwrJ+juLA80fwDXR?K1PgpMCA@Yo!Q`z&*W`d2-&yglT@2pSz zP#nDRa~OSZzxU(P*8TOGwPpVLpo8rpYtkq=dAY>dknMigapI4FxH-r6D`#o47iTyh zSh0kXnF#t~-8-8D;_iLtRAG{wQ7m^K=^5_&Q#EH|ZYMkuk(v^FDQ5BK9^dRy!QP~{_5qZkqr6~P`nhR6k*rh zBrd_^G)l3M?s?9nUlx;+4FVTA!q%p!_c|N#VkUK`1}Qy};#P^(?=aiTT$erGCUEad zcUMIP%_M)#eFt?d_b2;{k@*bc!#ZyWT6(m1L)IPhr!d`mKA)hD|Y=^Y$6 zS@jJ7FAGX$W@*n}!5LZx>c8m^tqvBgv>c%@#JyB>k(d zTJ!23Hwf09eq$!%N7h9tUOPnLy)z}vj|ZW?&sApQbdf{wb}6&j`Yc`om*bbd@nlaA zHy|N#CWMoeDS=}U@MUA(xA}wz7hg{|P|GTYdkQ&J`*U+|kID$np8BUUbDc)#9v@G2 zua9zNWAqHZwN$++np;2F2V)GvXijRr28!>Hs6ybf>@_#ZMY^}=lOzh#p9uH7nosAS zNvowLO8&9VviUiS_b7rHZ9I!&Ld(9ImrtC&!nvY`6n|}lac(^{4i@z28M+er(KA+& zBGzyOgk-sEti!fFIa9~dPkNG>b5`_(sD$7QOsTq0^ruk>2KviVX48bXmA`kmP`b$q z54jPoxjwU>Zg7gdCr-+m0f)4ya#q!38PP-vfjZA zj*f}~ni3%Nt;l2dgZTu&8q|c6>QH}lGiD`uT}Z|%Cxh<&HVam;AnK=;)q2o$Ykq9L$Y=Ls$$RoM$+GK%g#(12y~PPL zpHhAlCnslbhp_%zv~^`=c0rDl6Ojurrt+pYB~oY|vl+>%Q(KFR!N}OMEz9%~rW5f7 zg0VSD!ddWFuwh3hpFEEK{^5}Ah0pbl`Hy{~X)N!3YyyM6A6(R}#|N%mO{Y1Hx4Q|q z&btTdyKloEV# zakjq4h#(toghz;7%|;#%H+v4vr|o@w{;6q_S3-~$^q@hP6tIL;?^rC*?53MQ7$~Sg zmzCSe#^rShH4c~uJXe4t!fuUhmd6Up#qUjMqF~9%+3!1}191c<7ffBd>9jm5>!Jv@ z^8U+udUiV;w?y=8S!g4eOOpNPFX_db;p#pOM}!bnva{cYYZs?WuliEU0W=PA+e$$- zS)7RUF^NQ>718{|8>xIv*ScJjU)?C>_{3oDrzj* zz|~EyyV-u@{(SfI%OxM25sN>2w>bW-&zG(cWjGD0OQIw`s@Wx1f84LmY{xp0^rtqe za)+y!ouWx3mxwPEN4m3xue2TFPcI!k`*tc+Rhl0kpMFGSUH++r?fhxXwkhq;5;+S& zJPfcvWG5$N`^rlkb*6mR-}2EvErai!)qbgq4plng>2?1Mw^1af1%VwyP17WsB0{Bb$l_>(MLiw`JQCQ*!#JtEHRXT)NTiCB2xw_vJ$5vJPW}h9yrD zVMhtWJwFE{JE=qBYb*JSo~);#SFMmH*}dM#~ss;R(V zDxrt3r=}(`X0?}mIc=k-+$?v@LQmava1n@G7(|R@XKySw%p#KViBl|fEksZ6%4F(0 z+}lyMs;a7Q!JV47%EiM@Svj(xvmz*Pp~3wkIA|>Wu$OuL-NmyjW-7@QXp7egs_OOuuGZWqW3LVHhL4C^YUjgM4H0-dsRzyx=gd^s81*NFS#&Rn>oxTf9(ChQ1;2c#sn_s{1%RQt#ZVx>G7XsRm!U9c@ zc0Q*z)z>b)pN#Wk{fcc2HPF`eahNUPXnx*Zu zJ33M+glSd7&b=0QnfF!dr1^|?rfE;yL)UAYOO@-L&aC1|;)XDkyli$T#XO`$L`9!- zkY%|q`%5(KHynheDU)=h+`?Vq4#!9RS(RfixOqZJJ=a9Jbaxs`(n-r4okzSUCy3I> zsvX&mdla(X2W+F*@7Gd9G>a?e8;XVac*ASet*Q3wcl=#kwpKB_xl-V>MQa*5I_W1z z%HGHI=EHW)7R#w&Qr9_T43fKhlqTb;ufjwr>KJ&IxxY-{dVsjqKyrAxh1gR*t}E-mrPm5+`m*h(^~guE zm)Yr*R7|i{=pa*9&yOxxwwv!P0xxk`Y?Xr`O;Ce~Ud@SUm2nnvbAgx9k-3MHyxFi3 z<^KB%#EHH-#C%|B@BuWsH#?bRI1_zxXUSLXDQnt#_kL;Zx|Dd=%O5|!aIAIqFb@#D zlE&DV3}*O@+uP9~$r2i~@~zWAZMe?9|EOK7nZrtqX)>%lHXM(x-dNb7R*&jEGqt4g zcXa7i_X@{V;`yLu`PiOuD2Zn)ES@=Af^}S@dFLjh@n(&A@Z`6`q>As@WR05f=0LwR zwfG4@E?leyg}_TfaUf&KFXb?qJ`j^Gax5b*PN1Mc+JjaRCY)>1F z9z|r6`U-qSeD6_bZdiXm4BNA!Xrh5e&KHqtxc?j$f0}AS-zRtr<5ZJ3S=n@M54tZJ z%lgo+=^YS|Fr2BfPC;l+97As+inwsnKBQAho}!I;{ZY@Ck2dn~n9g1;*V2u##+3as zr{yALyVgZKABMF7KU(f5QG>rp-DtkWR(GD5f9x%BBBMUT=}**y zr4}89V{|{CT7fnMU8CM0TwR#<;LO095(#k3sojdqw-`sJGiuqODzJ`fS!HqTV zX%5wD=+(XYh1D+pU31x)Hd%ChTk;o_LWqJ_>I#&L;n6G%mT?Ocz9&oV_Pl!V8sBp!n!2ex3TAXF|@kTJ33vau7fAK;iN_7 zwMC046V6liV@hH@KUl1hyeu5^8dgn%2t0%dn7O!2#Abdj>5`gULn{kfQzN9oE_M-8ioWsSh zzxj?j>BX{FpPEGzl~T=~Z;6<-I#;d1QcnGyln_*Ulk^rlkHJ-FjV<`hU^=NQ@kmIp zUWoZ_Na~fn{JnQ&-+}x`u$E*NZw7b>$HFZ;g%= zFiui($>0T;+guP~%T8-+!_28k7Yxk*CpJ2$ls0`AWo=1iOc{^X8nOGn`GC>1?fwq; zlP9)Ty>UfbO%5{?W1ti7-Cbg;uKbpT-LtzvZIS*R3tKs?;`MV!qtNXppDPo@EoxK0 z#DZYYH7>~5mWE>%*Y343dyWz`?J+?_-K-*SUT<%W=H7}^hS~iDQzHXILziWv=;!E` z_Vy|C3Ni3l{Dp4?1*~8;ctadK2vMU-vgu4>_e!(7I5>avuE|-VVi@CC(QC&`EV`F0#p$xk6III>8qj(jI-z%Rvuc_LC_F6wl_imm{`B3^R zB4?Yd#RFG&rHb{-rZ>2b>X?hn5}ni#Nc@KB!g)`4+lb#`!d^9E!nqMo;QrlG(wBS0 z;)zyuE^gk_2&TaZ3_O^+uVQs1oPYK}2$n;xuX2jU%zE~Y-OGrWb4zrKkCKw|mTBmXf)mwu9O>z^&BLG?FNWzDva?GLpJspbPooSzT>+RCKmiZF3CK z{?({Qsbi3^6giC>ckKBPe1@X1P%bT-OVQlCVRvcCG}%saWjes(nnnc=xte-epHI+j zm#3#ICOT;=F|3;T?@J1(HR!nPJlMGMJe~f8>cG{8s@r&_xlrIUm1{pe;y7G`4_)l+k zdU#{XcW-JSz6)=EQ;_K%I11MYRegfT-k9ZbzSP2};or zh2#?A_Iv|KgC%uSc@MDHxdOKlUI77mcSmPv0zfa}E3hIpGxuIr(2umRtF^@+&_aNi$>GjNLHK!Y>4MwZ7C@ zIW)dDbRC=({k9aJsIn&G&ph6)Q0b%LP#j)$?Y-t)yX7C(U$RaDZecdbi%&i@QA8u9 z-;-WQJ}mc&pw5x+0FhYUga`lr*HE-%mNSzfmG+=lEl#DUrM0GsdR3?qZgrFvfj9g( zdHQHME4Q|r?dpVGs^tL8OiR~^!-XwNRDAaBJhgAYDnt63I}H!-Qh`5lTYNZd?>^dO zyOTQmD4H_S;XVVa+s8?#vGk0LzY1!RT50 z-|Jz-zrWI&liAHLMok$vyRmXnYLWR{)dk#S&Dlp(8Ix;Lm2aE7Cz20)wc+oEI5Aae z#gJ|p@g{yd>tAUT&(UV+)f)b!K}AzkM4 zSPTWm?gQGX$KJ3~fh%aBvjHx5pQYcrgy}Tw_n}9CC`t4P0-}g+>A2+@Jgb$U= zhUKKa-_5ZU40z6wYovFG6LU@1hGa)X!pcXurDPE)NzgyOkn!@Ss9~_TU=`f z1-0v6|MrDczSR7)lskFhdbTpDH8eC{aFEU3 zx6RoYo16}rPKIoeZ_#TnETsGlpY82swQr0Qsxk>XSzMG*R}Q_wzLlrn>=gU{aJc&}ZYa`*U<|GI!;S$y^sy;+5#NWt6Nf%%*t?n1gVIy!2X%+TKA zJoP;jJg&B(t|OHKlSp&&OOqD33xYMw!xlUI!uTy1_0^J}ibvfXgBrdr7ee4^t$$Aa zYketi-EAb>c^efft9~ei=}~2AZ|P`~>}^bO&WVMU&`2kt-b^z#InXG;X3hy>NJOhn zEO6hwR8~Hmw-5_n;jwYH^=oq;9lC_iW>m}1eahVPejW&K>t&q9Jv>-`VC`uscSb$E ziHI_n}ROJw* zLcS_N)$*+;0=iufrC}&SLoW!TOZd+`^sS3t9^cw#`0vmEWCdiB^Eo5uRm{2TSqTA* z%keJMRW`UOW2s+geP%b|PQaOrPAIftvY8;JgkNZUa@ID+qxEdk#<}Bu*3wMDoNB)9 z33sEUh4r}Y340ew^^?WyGvNrI-+K|jQm{RrX&(AQ)J)4cg6j|Npx5$^p^jBo!Ty}? z(&Y4_h5g?hSs5CQBpO#~^J9`wRE+Ji2G2L+xB9&F1dh?JaWz%uQ$iT_eaKrJw3c#Oxa|*=N2AIs>oA@>rz_Mn3 zvNC3)JK50K_!#~>fhSt$!~P%g=bhk)rbe7Grbo)*o1LG_9GZ?g0|ZhkniC!GD^Fe;S95 zgd@D|W>qN-*3!E@S}>?&>On95z#1>YVCKUT^ytKwa&K=e!ZK9I*7igd{&N4%jUuUJ zrJkV@h+hTXXS;&RHBTkaF@YW95u;)Z9D{)2+4J%aCY83Mw(0Qh z>6u$%;)lEv5{$T#*B_^vYkrYrkV?v)hQEK2cqb5P#<{S%8t)e*iH%jdd3=(e%$d@E zh3pocM0Cx+5mC_DPScLVe_U5rczSp&41{%Vl>+5K#TVR`Q;?VbVXQ_#pQNNDfB;fd zGdb{lh`ea$_@{+=WesXIb#?CB#U1K5CD|6sn~I}|O5oB_zt@g3;+9txFPV+X9L;Gf zE-r5UYwY4^V`S0-42+T>gWB{o%$iGw9cGJsqr?_gQeMC)gMhMc)zm4&R%E|wPVDuU zN@}KZrE1rwrD>h^9*&Kbk4q2%H$fQH9BiR^7WteQm^-CV2#+wjFZn0}{7wIf?{%s> zorkVUhQ~xUFk*^BOansIO`+_wMK^6t%|tNm587V|$|>I!-Oo+$dc~Lv;=KI)j{$Gl zw;|WhFgv&@Xph_%;R{Q0nWKOrTB}E$W`5aqmqZKM$3Tho zo!#D2LxBg@ZN2qJL<2>$?wRRyNbT`cdSTzX4vnFu1B(You4@uRK8=Oe*-^2osf=gv z4G6UN>UG0l8JCRByDRBODb+Zu%I?=r(ItZrYNPDmP86NBPeC}gv0THvQb^&!oFuz4 z#IEcdjL{lhynKyPc?GzR6 z=7?qUh`l^TM-_Gic@vg@dV0E@CY1*FSNU*|Q`8oR3ca*@Cg~{E>W|{XwO>{nnEklw z#IkgVXak7^&1~-;b#jU;nz@4xE>}|{!D9|F4Kn?2!Zo&Y6B8zvFQFobK4l_gv<-p$9hyJY0qP59p&^n>Ed-YlEK^_s-x6`XQCM{PRYkO6SE6qy( z8dlSTMRDKq^~05hYv`0z;Cd@sY;SI?1OsB38m36)&93;t#ija!If0V0RrNLpmxd=4 z`m05pG>!$NrSaKf(vMJ-5=KWc(lRp65>QD~jh7=_u^ur?Z_2_@c8YVwSCBf21(p1p zzECbBKMI{)BdM-)%Z`9MZL503TJ+zo)v)su-%rJB$=ylI@H{APkvmv86lU=kFOFj6 zlpU_WbxFAW*%H4s79N#eaD|5K-FJ+gG5VBQ<~JoJ>9jo+MzV*IyK`jisOERw=2Z)B zq<4q&o&+8CEJUumxcF$^>diUTC^6?1Z%TES;KcXt!Cm#LE8ahrwmN8Yy3MNCUF`Lz z4o{d{lgB&DO96(RM{HE;+E5Jmkc{G{cL0g*jk?8cca zk$Ohhsu}3huZcb~Z5h|Z|mgF%?V4PxB*(Ho!3-O1)p=MI8e!G+;{lsI+7cpf5u7LQzS1`I4yxvtMs}v_z zm&~t)`_oY_LWl#c`I-N^lCYZ78h>hc5;L5Lw%+GZy$$~4S17gb2sk3 zR<`mquw?5&Z@l$*#CXXrT;rn$3Un=8qKi#sOs|f23*qty3~voSqN5W6SfJ?xaAH~6 znKeH>wD9+-3`+A8h@{NPw+M5i64m$zC$%f=k61k#)e@L$RTV##G5`~TY^|rV>mmQW zcVBOVON3cd-cyuy>b&Jto|)+1_=cz=Pv^&or?bX7rWd- zyE#{oUH{+R4{M9#KT2TKXVvJ_AYe9Sc;5GhkK-yl&LMt%_PUDo5vvv;{Lq4p(rM~b zkQoO4d#dJliix!L8J=^Xl%gO_27+tq%GKrNj}MxcY^}Bzea;(;ij$sm%mDT0sHmuP zZQl7o#9+n22km-H5-$@+$KHwf!J%IpeMv~_%ID{Ph>$epsVO0vCkLkjrA;LDTSDr~rlmw|0G3X=l( z_KWJyS|Q+QIz7>YtuRH9aGim@sKR_nt$PaeTSb~8ItBbB3r-ik7t)EL_Klw;?R4v> z3u8;_P{7W@R|Vi#@Mdj+m$q`Q&Y1*sT}17h513K;0a@tg<{RqjPI1j5I4rCL$m*#p z|LWF=UL$!#K}oq+ZhfwT(0GkQDY8Y4h^1^$1yD57#Ym_G3-v`$=teuW@3fBFgdc2k zi1K#n`**^;8uOOevdG2iZX)8;?ps?Mm$s$u!C?|$8Qf`I5kU)Ays`as3d~?X z)VYLl?HysxHCB6|i&jSVY2Sbc zH-$=k4UmX#Dh|+;9K;H zJN^}GZQz4(gC@e~KVF?ub^oN_+uO@uwu%>FbCuFY9*bZ@)*ge$(gBpMvn^S?NO zZ=3guRWWM$Tb1>R^(|pI9)p~Sh{)%Pigwjx7q{xG-uS`5LJb^>t$3fSvk#22vLgR` z*(&u`o`DGCkOsq0FDW953?#s2kdI^7-rY?EEytV7z1je_rb+*jng#EJg}dh+=fA-L z-xT{1QSHIKc!t8l!t(N0iAH5x%yJIRj~L{*tIjzc1Fni_$c~qTa8v*Pr+m#7M}hH( zUYa%Kak(+5NoK3Yxf*y5WrEd#ZE<_*!6X{+_iukWKv1n)`go zPnODi7awp~<3J}!Ipxv>1{*`F7xQ^Q~%3m6erucoO`axXzGELcvRPh6_VYiTkMY=O z>;+t~zYg|XUxNpdE$P`Uc5fx>W$5QuqYJ4vK!pmSQX*ef&j}ca`N?)BCIg85(k_tb zf-x%)8I%{7pIJLOX@eF6hytSPv3G$-1jg*;;{&>%AlxuyX9MV=wrn0yDge#^fgD|h zqh3;8W)ojlO-@h0W9KSK)#eq$O;NQ5Tw~Bw86Urdx2ekK+*BUwd4NaR8w82p=gb@_ z@TUj`3JbmZbz*236kAs?_H@!Iyd*6xqgisi<&2z^RItiri+YP0vgHo1dkKz*P-Y&^ zz9fOdXT}C2xOOE$_MV??1LSn9o56cH1!4zcEKFWPaue$Kaix2T=;p}U!{fQn*ggab zCnDmYAOS63hiizdu&>_Htc<9cV>lN@GjK=xt1xeXkU%W8zzq?%;^M;=$P@6sasfZXg-42|Q&|8GE%q=zL!A=Crd(MqN7Uj2@VsogIjuZT#X@GD9I6 z7ft|Z2Jf@RJFSf=XXnPJkhL4nMydsFeJTYc@}324u6lylXr{zl$uo=m|8@25flU7I z|9HJr=!BFahny>?7v+=?QOT)NkztiXm?3hWtyB&<6`@c>lH-Uu+e*%doQ*An9EUmW zz|6kSsrUQy{{8k>vHQ^7eO=G%dK|9j!W$W9k{Vb9ibS1tqkn0;5`{V&0#6bNq`VYN z2Lu9P!ZV-X`#ijr_Phq223J`nwqRLQl_(riHsHS6;YxDqrIiwG#GuZc^^r2zB^$TZ zj&P+?juA0CsH8KDsdkxl4TGVjK_Jy|Z%QDYD7>5n;i-=z>fFK)4Em-Vf%=J81V}V#FZc@n>%LD>^G1_{QVhADYc1_AgEO`X4WyY@e zeg-$pz!)QT2X{KAaN0j#qkM&xV|F(Q9>{NUf_tCubWD3e`8t~AEmBee+92DrL}60? z=u{y*8d(4%rE9M(KgaKnfja5^Q%?8NlKl$HM9qxL9gBn!gwV&`E;~Ykm=|%P->g2& zHk+f2lCDZ*DEMskkDl=cNx)zix@Lt%x7?%6m_GyZ*!6S&S1C1S)$V_P(!|ea&-IDz zmVP^=UH4xJ{Ix4lw)lWkygIFMBQ)P5-`G2Tilk{kiT=Ne11r8Wgglk$jO#R~WY*Xv zR%R^j$xzQLs5DK^>$stCYd1|;!u6&J=OK1dfs0FBxaK=QC|>QmaR1OVwF7YqFjH91 z&u#=}luuOx+!Fg!7Ou@_xWWOhq4lBX=Bp{{ubVGhcwG(t<`DRkCGOAE>C+MJoIm9Z z^l@{o4qU22e~(=(nu&N|Y&?_ydgkC~_nRFyJA&i5`CJeGJ$6{HJH7*qgF18h5vOV~ z`T7LMk+k6*J5)H04D@aSMa0RAQ`1M=ZL0eFZ=FiG`YAErx}X(gm7FZO!vg)BKfsog zrvxGaV94>Ey-;RB?Cf4$o!l@#<9aHg)h=abfMaT!LVAK%NNNLLB2|}`y1CAFi4x2o zbp3GlIN+x1c3EMKXN~{Bn%0`GLK1S}OGn$}i~RhG{4k;#6asMG0rg0L4Cf9{&9K}- z=`Xqsn>xeR_zD1};DflAax}7vo_ibru56w9LJ1C$#IC^?zK9M^4L*RS58F_4K!?3h zpzWTI_V`T8neiT@_fYjRIu0#7EkmAJ&RXaM%el@G^sI{0Lrvs8=!DvBPh$35Q`nKF zi6kWASk#@;CZ%%aeed(cN$X>APpq4MTq$I$? zSp7`A1$nK=-rA;5#9(=3NffiPuF3j%!M;Ol@JB}nsNN!$jeuSUoi|);A6z_Q4Gcna z@R+CFJ#mXCUMz$yd`@VaWX79B1Ky)oug@_hvjDKT@aU&lV~Dw#8A!dyUb>iH%DaW9 zCsWLG&lc5UmM5z^R@Vc35Fc38QtAMDZ?`$O{~QJ$P3f4l8HZh}Rl1lqP}sB)*gqO$ zJtIcgDNH31;0FsCH{l^X+!Y8rls=pKo$3+CmKq62wKzSXFol%UhqD!xl=93?zy@wi zCPsjoqp`Dulwm8IQG{ttUYp$bP2OPL-kMv%C9TF!JGdL7YMxjJ^ZKOzTQzU-bRx+M zQSrMWNu?#kKBGQL{ji+ zYS6~S(MI`=XUkI}xDfABWEum0z2AxCc`p7*0lxo2kLUE@5$HWooO$2jdu#D3mDyxztB|1~?I}+S z8nvlDIfKn~GO9`ZByb11cjKnF_tLt*g+;vX^QH6GYuUvZ`{VaU`w1rErPRgB7d1%7 zTLP9o5ihU$TpSUVZ`Hb3)i!oYYi~oyW;cy>4LL|tS7n%@3eC5&#Wv+CHG;e4a};vO zSrp?8`HBCl?Y_t%Zn#VNz2o9S|!k5Yo~tXS_+aCOWg51i^Ij12l}8q|=z=&Hw#Snz)vO(~n1I^yX5H|`kmJZTey#Fg z0{sJiZ-eM(>=qY%sti}XHjbm4YJBq zeXv`j)zm@XCk_p?Ae@IQMx1*kA&n_Ags(JDPEOA9`($}(Dy$>3W&DtYN!+zswxA1q zocXif-Tn05EmRaozL1BQu;gjGOeVkl9t7E3_xxu0V7|B4j!I>eHD5Ms$+1uZLuH|7 zjJV9VfY5QCf2HRXxfsQGmKsnAF+*$!uGb%h#$}7Cbr##{Dw+p$25sD3^Dt}cnzeNN z%1E&o`GRiAlhHi%#@v7Y+!O^xY0?$v=)E$qc7^8m{uMtDtrqLXs^(_j|*3z6(bB(6gKw6{TXi3A>g;)ch$A6x1hL%)dUCz1L z_I6AAm)E|Q8Xpun{!%a{s@XL1JW`^6bVZ_LrjQJSHs*00Wy<{BhEER9!_sIXc)ys`k*5s&@-z~L9g zqt$zhq%Av&&@SB~UKo*5O|D@rMgFVHYSvvLC7OohCt6O9)=<%oNDASIX6C+3rfVYA=ieYXl%GDwwX~WA?4qZ z-11tXi_qsfRkNOWJ7B7bm4ldjYQ917JoGyjxx}eb9>_dkX+hSz*!TEvRZjS!ZyD`I zMmL712544fjJHpiw@Nw*#)+f_ZJKyu8m2p&`Eke_RWD~M#ze)%l~Jo-TROojk+zEd zCtwl04-O|E_V|3axAhud-WjkH#aarSfP|gTQ?G|v;Neh(zHyqU=`R$>! z(efXgm*$gAq1-~9FBxWez;Vh)+yb}1ypW) zeAm_bH_q+<)bHA#HsXV~#h`Tj@};2u0j;d{pP3BEBx=2_2C?{n+M>;{p5$TK{x~nU zWUgCjDT>WD-D+W-Q-p4HQrl4L@qTmD-VeQd@V~o~(y)m;#AS_i)2gw>imdsB(L|8N zp-q+!$b|`T;oIOMAbc)*+V?zDd~u%*??to68GvR(iJWx7;YjRj-` zvRhP@-_hzLYTmPPUi}viyJvt_x10IE#3N|oiQbpS=H@p5Ee0UP zjT|X576>+yu}8glvi6Y>#79(}r~lEHVS$XfLW?`)xd zC22T88)9I|F47b8_>jQlg)ZM;UqtmRyi*{BM9-2m^ki8_xdj5w)ag%vKp3n}8;EQ% zdiNWiqd##wd+X)&FX6#nTxttYz7RC_W6S?{X&cUw)1SF5ZOiu$dlk!MfuObPlI8^+ z(j<;y&I$^kUt7x#_@_d`0q)*`6Ox*J!CN8nkT+C^r<~zqyPH%TU@w55hE>)qQe9!p zKsR6FHo3Lwx}P0IL;O3wA}T%~5}(Rb<`H#`*VgJ$nkl@gzKN?WfkuCTBqVa}v{_#s z=V3)hDM+Edj12V1Kq)vSeNfqd?p}TY0OeoX zMa9rzGp|88J-1@ZXTs-;@ZnE?ocVC?|y_|ysw$gM!y00pwP%~Dv_ZDL{Zy+ z)so-HsitP9Mz|+Ak#%K$eZCGJ6T7-8ZFtd;@m8|xjN0icEvbL+=u^V~bI8fXdarh-WlzrXPZAzAXBxIL*&aTvFKY*r>n=hlI?@PBto41|rnWN9r$;#aN z(V^RLJiY%7fYUU6=ReQgP3Y`20ut`|p%S8ePQlI7K~K@C!azuWC(aF+5EpA{VDhtz zvau`Vjgo)U(1F=G!h^)VP_60MXtQSKNKoIK(s?5b(`rB_GQ`d%{*71ubg2B}yF^6pYWjOs+k5%}hlX0Hx8} z;1}lB>05vYi37g;``|l3`$9Yze7fWQJ)_oyF92j31!vdLkYMkX15k-T*Gts-VM#6D zN6FHddqGOTw8K{Y8^-}5OEU-0%z$K4N<7uq7Lm{vj;;|qBQ!V_Zg>l!aCkGvDaA66 zG~&B9PD0uuOaqrOD3;Fb%#~OGd;W@_uU_nFu*4mO^5jmy1|kb=kx^Nv1n`?0gHd$- zpK9u)@qM(w3yd+0Di8+&-shqn0t4>ggSqADBOb&@Z>2F~Nmsx`^nPX_NXDez>Kgi0 z4^scFdiKq}~)t!GI2@hw6 zGbfKhih==jfL`=4&Wgi{ee*3w^CN3d&L2I?JY1Akf9}_)@l%R5L@p=B=h{j64aSv@ zH@k}?HR`KA-hqxT#m>&;2@UGrV||G5UdyIXa@7m3=r-;cnXn_j`S`?VamLx)jr8d@ zx}c^o0{J+&%qeY$Gj7J}e`)E3O%)h*U z`MGjief`G``g8g@e^1<;<(rTRe6ru>xx3(h?%1)!iKbX!1(>%e7V8zp-L{NGqIzQP z6nR_@8eB-&HN9)bFB$tnBW!SqI+E?1!WnFi>nJ{$ zOt0fM?uU>?TIqEcgqatK0|a^<*Wje~eQn-p1%+e{8i62}!Uvtn)`Mhwcn9g{L@Vt_ z@^K{7^&$4@*~3#-s}XWth&3y;Qz z!85gy;(Lm09|4+qRECgN+7R>zPMT4>EJ6yUHdKutd(5wTxKw@a;qHhlTCw%K<7%7! z1tIa*^Y;#C_*&uI*TisV;T}=ToAjRg(YT2OUN$UETOD3q4 zR?Rhd#0G4+ZnmjO!{`CGizrfhX(04qt7j{@M5<+LZ& zzsMz*a>=UIH7J(!R!s7~_i(uKEM1{geX+prq0xR5cyV3Z{L0MXx>W-TXz~8?p(lys zhXluR6w2J6CtdW%c8?`#0z?N&bUy6T5)E#o&O**wHuvakxu5j*+Cx0Oxa+L7e0vyf z;PE>6{qf=x+;P6Eoq-p0RfTUvJ5RC!fO^k3}AbrbpzZfN;dfqNMJv_~W;^@=-0#@p_ty_j~L*at6WSD&L<1-50H1ZyHqO&yyH zP$T@KSTu=vF7Uo9$v@VLr&_+?uJ(9h(~&jl8sty=?hsnIe-scb3?5_4Ntd=tNjsf& zAyC@J)gEnqH{EFPLmPrnR~k?v$N?NI$bi*U#uhjLgln)YbjeH3rfF484hs%ssMN^V zLE0SK56xQuDDMY_o+To8R~sD7@C}BLVl(w1)gri368N|Jo8p6W+tixM*wE?4z#z^( ztrS3lzJ*%-i*Xnf1mko8)%oEAALXEGT3XW35Kx+ahf!%f;HQAlq?ma*HElTWAQ4{vT| zKSvg*>Zs+2vN?-pI{kG{7HOg1V-=`pP8}9t>n}T`v4CdTe3BaqW0&lJ9sxsvDe3!{ zpF`cd<{pNlffSPk$X4C2fiQrGab&0Be|URW1)nY(O*cjaFmBF=sTLhnT=toKp@VG{ z-qsh6s#(|8GMyAfs5$#uCys8Mq?%hZnHT5y;}JSam8tG&RaorUVQ2-~@F%w(cuQ9` zu<54qj8yBaPU6(Vwu#nKS{1Oxvc3f#0|X+8$^UTaW>mE4KcYpWkBZ*onnrXFY_esc93C)^2RL|D3x0DBxeG zJ5`ICw;u@(_&FHh%te+67pu)1{2N@_$7{<32JQr@#M%dFV0$W}j>yQ!)E0-kBf{x* zNIg<(f*wV4XCpTlkOUyyCW)w347jw{cgOY++Q4YGuI#bZ7$#xD($nl$!%>gKtFI5# z+&K-3Z>G_?x83h&&VB*9*wzUC*bOMh2Zx6(JaV(j8G-fMd$u_>fp6T-1@YGyRyN{@ zFV!dCiHMklJgcW1fw&q4Mt!z>h}Qd4r1qHI&YWL8UinMT=$+O%doftw__)(n=x4jX zoL;Hv((ju*v?nw3^Z%+|wPy%0j`3}42Mi2|W@{h&1*)8UvQDzgHX*}eGH*YqqS@EK ze#yYx9h}wj=}WGNps42&JQ-5|A?7~)%5F(XPGP3~ke2Xj`15@}$JW-B7xO<`&<(uZ z9e;C4xQe4oz#U&Q!O_*g!2K`IkYRGi+B)G~wQsTz->c&w*{b}m_3LbF49)p?Ohg2{ z^?&_9z_J6HNdMx?SF_md===AtmY0n{!(4EC@@M6~Pl<~_25`sDP8s6>q-a2*bss1Q zkc{9nN$&psH>aiqKnx7fu6p2r*~o7(1FuK|Me6wYvNv*T6*EE^H{Q3L(AM$R`U~K~N0HWpiW~Ig zgq9W;3uA$LJYWv-BuZ~W!Y$BDZ|R;t@8miQ&}C0ggKfrc|1xOdox$y)SRtc|LW8kD zw?!n^Jr}hw_M5$r9yLGf1a|lX!%@Ijg3v%5agiwQezwST&z?|IcB_1k^@JI|XT#at zJPkzbB33@(YC~TfU_7@Q=&Q7KV~}>Fu`BBVXw$W2$`7rL;M2ewdN~@%2Ab$|i4>pV zWf;Ol3|gUn#ZR3Oc#+aud938ur(7Y$WegT8PS$F`KbBXVi-E4L?)D(gOtb-LU1@T4 zV^1>T>V4u6o}=1JfP?;gI~5 z$09CSHxjA7!!Y}oqXPrERg#)!Dlcog^P56x^*6Z5%E5OztaxhNy}bj?j*_0xA}9l2 zpIoRGo%Cvegvq$LbeB@9P!~Kr@WVe3EOZYHEMhQsUhY6gPP?!TYhS8#?`c~@h?}&x zdq_`qy2A3x3ILSPoeJI*vbJm4dKG@;7A1P^1qF`k-a{!bq(#P%@|(hll_Cm~Fyd+yP<6ZSyUg`8zfIBwL-p{=DX{sKMHt zZezPPfV#-p9hSrQM`p#o@JasGtT-?O@Qu*_oUg3*YL-Ml#mB|fm3^D;+lmPlWo5^y9x(NQ zol(Hz_VqsS?e+D5_noc_%`Pr2l?SdN5X|=W7r{<0rV6%-Nl7rPO-gTV? zJWq$sxf(?~CE zKoW3YzO^)(eW}vs)zI5iplyBs{yO*~q990?N0Er|RU;Qv8-uNRt*gL{IW}Jv361Ew zfSB|Kb{6+m;%GjaG@f_O>b2vf&XHwR?Sj$lAO1`ZAkj>s*I~bBK_%@xC9fVh-zkm# zNzK`Sivm^XILy}~2HHPYC01>%se}d0nwxhLeOoFMj7X@NNYcT2p2v&gN|&EAU4X{U z0#fbfozF2~*tS4dofg4?n3LB)&X!lU1=0a)KY`B&2Sj}%v%ne*&*@W}$b}_>BI9^{A!M`{r$(?Idbf%1FXY^KrB=MN-zUEW z3ktx1A`zbfW<`GiXIZ2%1jubmiNY)-43ekbUw*kr1Vb9D@bHWBUVw4J3sCU#;Glk7 z?j^}^rCKVfpEezfiP>CB zDn;oTKxY2KyoC$#=nOUXyYlGJ9|M`)BX3|sVXvZm@F4n{AT(NBqUsiSvi*EEmwaXS zfuDDaG#t-5)D8dRK_*z*&(ju5M{BEZb#!!CZhygPZg!T|q>lYWK03i6iynSEOaHX(HN07v#E;cA->ib!#1cjPymazR_PbVPW@ zwkJl)mM_HNnP~{{bOk%jP6>&K0D&K~0)H9rtK)WT(S3!)N9%&C#_i;3 zkA@@S-@ktca|+)E8iq6XMpJWNS(BB`fP|-`!!#G)7?|#?Ym{>{w}+4m;n{zo=pcb& z1>lvDOm|Rqf6pp-O0lxUslCsGYiXV6)ft7SSTga$QSiktqJ`w{ET+w_HvCbxxXL1up;U)m-@9Y;v~mN@N~^vVxUHss|k=V0WA^8cHbtMG`6H2IGz4f zH=w#y(a>O5H-RMW?QDa`f8DT~T>y#7nMarlWVZPDUa;o+{Uk49L6fCuH~$KV5-g0> zv{K@?Nrq64ZY^>uP!e)*e7~^`ZUcqDELjXl7(H-IEa)W8om;H!1nmao&Yc9~q(C4= zTwhIETb_h_z!4wB-f8)D1B2dTr35I%L8bxCiFg=2kO5Dk3sJ{mfM2*5jJ6r7p(nq6 z59+ashm`00l0ahSg|anO(2g#5?|OVSc`o)iDE#dZ3ZRSvnh3F2YmiNWV9BepBT`8i zb`~hq*n+XXIL#*fq#RxZ=c)1}1fA;Z<6~?3?3dJ6%+L8zm%U~?*y8FAo7jaLHY1tF z#7cpD&W#@+C!RZ+FI9eSBLtSQ(*p3Nu+)y|@yaV^IFR zzWy02QkiOLUxyR@4viBZ|4a9$C@OLvdM}e>0*t3($g$8Hg6r6TJ>3+y5QUgeoqYXT z+i_ez@#3Wheci^U`yqAyPKiO-Hm^QwXs)vmk*g5KBUTt-*xZxHdhT}E`;$OVpS*}J z)zM{$mVU>xc1oM)ipBI!i_A-)8uq2U;046|BgsoU*qO9j^Vak{7t!+nlfqLgnmP3E=+*s^p?;FTvst3UrVX{|23v ztt|x)*|+fITU6uPT7LVFDbO+(o~Y^}0{&`)!O4`1ISZ;u2QM!IXvLv0;C*b_rqaHz z0zoebsfI{x&<2l;{?RE4;6HA&d_dftzBP2wjXdjO3u3LZ9uFoYP0dp2WEV5@TVjk6M2gfX{!dzQ;q1tB4s`Fwz`5ouV&`Jjb4N>jlZ<2bI za5CK|N$1vU&NGX09HLAE?}EXyIWbC6#oG;uEn2;>YtiC?lM^%Jdd^KId(sT7Da?6a zX+&Zh*6oqQ&rwU)yQ5m?^L3H;Mhk~M?h3!}B;AP2y3rEQ9)jy`o}TVw++6E7K1z9e zxpcVo#UlN=^6thkg`h>&Z*q#GL1}dGmZgsLyLa!lz^vc9VNae6f{_aChu*P=YEp27 z6$p+3af&eeaBWFx3=TZJfQ1rmItR>C$f|C2Qn=xJD$=$4^O$1#)P1tB_kX_7ep%I%LY8W*D})!dhy?>Fg(Q*Mk4Y<$Kn&! zJQ$d=vUNB-b`(r+Ef~UGJ&K)VvZmss7kIXM)L4P-&uUiYQ3_DzqTgh7hgd=<(&~2) zNzQ9CK4`MQWAhVY^7x1sxp*uHWruX7K~~05lq2gCd7bcAy_RXc5@M7sdU`-VY^rv8 zGpj~*>(=_5B~DI3mwLT&jfh)d27UJ;cW%9&4`vvycc3?mSstaEgAXRZuj5{$H;2k6 zG2KCv%X5&G0&?&zFu|xDWoZXtoT{Y71tXVeO}EXVw%KBfTW>a}#eRBh4Hu}Yt54I~ zJa^6~xgmxeG3h8@-?`XKfB5TC{OaV1O+<3B~#VPJQTFB$`~-NV=B_q;HD$0@b25mw|_3 zNtE4u3_0k&;$)A&DZHj9@f)azRn2t-Smd8|at zu(U(#!|s}6Or|7ez~%VY3QuR}X5HlmBU!V93*7hV2G_lNuG~ZqG9rY2f>EWjsk_)X zx>gQSx7@~xH(>m`?bfDb8W^}_SL@#Z@m)3ypbkBl9O^__jC_rYzhdC;BmX_v;9%6kJ*Qhg$9dYYB?wAvbd8Hv&_nO`Hd8-`_z`1b&3VxQ4U(gmZE(BUHu3W(W$$hyo=R2Wu6-R1D7#gSz_KC zPi>73GjCq@*wdD#ExXdb6t(}Inq|2EgeV)_Qc9;yu=5e!0X5#=VQamw6=$jsF6g) z)gx(aoV|w|B_X!;Ow>Xc_b~Ql=H*zKpOU}D0y!BejGL(HQORfSx>5TW3 zipbdCGC9?51Glxc_51jY)5qxc?c2%&7wzH(K$7KV9si=?w zV18emxMuh?Y*Ga9L_A~*j)hQaZ=LTQv}^f#?sv7(B}Tl!hD%mPG&)X<)4Fn~=B;rU zUV$yE-OqTce-Bij_fSunF?*=?;mXk|LsJ{{)8`f~0DuF}2CQ64MZKkB_UKgGh*}e) z@2gI~BXsJExsNuovN6dw1q`*Rkm8Ji5|sCCd?vnW&_crX5o4;$1ZyCGE%Q8HGDCAL zv==A`s@75Lmk{N)sd)?A#xRXg_W_+F@ac0-+h~1}ea}IH`f;<|v;SK8YHIoo&4OV@ zd47Q!yp_T;HiuN?Mo3lQ4`#<6{=&O^m6lL{3&U%Fd1i#lJ@)kM`Rgz3?5U~ z>+YS}rHR3Oe=_QGB7)kxGVzmPo@pf?053TH)6K0y8u#zx-ln4i7jYe{2Z_S)f@r0D zbX$w&#(Us2+?nc|V$Z55`ZX|c%W-9&a!hc{p27id&43FKd$FuBVW=W$>;M4+Mhhr0 z1MhYff`=0XGZ!sl+QdQIjGbY&2)<#na7Zeb>s{a@^@?G#Pa6Ezw><>^XefK6-KaQ` zp>r#(*$={#WPtVCTxr}VbWE(nHcerQ_76+nk*rcdQ`^b~0tO&EDRr`Y zfu=>u``=nHLk5^>de2ABOnuNjaNxi?GI^uq!~Zw&KMPW`@)fm!cuI4{Ok4>`7uN8+;JZMW6oWpDiXAzuYz;O9RUgs+!ol3B zPHjjyF=I{_bdnmh25kO_WWA1hs_?yxXF`JDmi{;jfkfsv=U_m}F1=!PlZIkpT-`GV zu9#V0&Ao|Z{;>fJTAxN;%YLrWT}SMdA^)nCaKBvgo*x}L^=uk60snIr!*~I;%6rqV zmWm<{n!r_d#SXcwdB|@b=r3Q=kV_uC&?9N%smk>}Hozpavz$G+Wm}*A*O)lJS5~Qx z`vAo4&@Kxp{UUjUA=eA$ap%5W&EB2bvP!jUaXG32S|fX9)vwJbh+RPr2GU)58R}#I z|GV%V+O_9dypZpRjCMu~PxbzP#ok+YpX(2+Ffi2kz{i|3_7?;r(XRfFg2wh?OQh$0 zj@?bGONTAPpNWMnC7kdT>@WB1kwDxEe@6fsofXN{UB#W;U|@`SU-+?|27)V@V((PF z)tf{QV}O(6VMn|4aWIB!jO`fC_YzhvO&KsB8=^IzV%Yp>3F`Rdrp53T9@Nliy;bn`E>SudzG zsN`dUkma~R;71EWMn@9`b)qq5?CO4 z0h^>p9Mcd2zmZocpxetrc*-TWY>7wb4>P_vj~36@ysN4|y0K>@Q{esuF{pjHDz>iZ z$Dq1%xr(QLK!x`*x4? zCUkm%+w2jTePm%DA)_{bY(EgLejH%a?a|u?)L)kN?DbVCxjx+3(;&Os9GQvMiC0=uxvD>|UVXeH{={}m{hDtZ^6roQ z=)m={KO*js$U`F@u!wGk?b3v)9_se%5;0{oGr)wx-gp>rB@{AkZxpWd&Uj zhztS(ktklH0M7V$%3J}yNIZ0vd!S*2&as7k6myR4esM9A zC^34sE?3yYe&fvlZ*R&BRwb!>QsVG*!#xU+=YRUK?K-jte#{UtgxL(NtAblJ6z#M+ zkKE@_h|ANJBK*&F)pG}xwykVGSK`lpDTONuk%B;h140}xjCJkGkaH~wD+9*9C?t)* zR^R1qwDs75rv`x@mYv%gdw)_2ui`r9ZWX#=^KO_R1)P_8G%HlpY$(l$t=PN?|7b_# z#vxMqm0p)p13#i4#-N)23h`Y|fMR1L0lw}~#$5jLzj!#G?y<2hDUV28$vP79s=IIY zQ{)sRR%~frl^t}0LnIEzDTSDXID5W6x&~71L+?4LB(_LJFj(iR!T8R!kv8NYv4uhj zd0UO)8^+&z^_x3IlMpGZT_Eib%8E3>U6q%TDFgwR0T0HmtoZ*Q{x50+5zvb*@NO{l zD86S6*LZF;@gR%0c%K^7Fk$5JxijfIL#1A7L0B9tyJ$ntf+BE0`2V&2T+e`2h?WO- zXa566m7Dy7z&8Eg+&up~8}I+SvGeI9PEl2gz4*RvNCicEuc*9&P;iTZ!CDf@0E2<* z8~1cBYayC(DV0Q4N?wET!j-#{Z$BZv8$Q|z)wWA<-T(4VRz(ult8MgZKPZ0E_r5q@ z*<>mocatu#8pF5D?6W5Wk?deQyLYR%Ioc*%`w|o&C%&9tf2&mUh&-~k8l`0 zjXPtfNBm(~^?etLttlRp$DoKP#^UrOLRHE{xC?!~v3I4bBV?~ms_EIZ%E$CNI&_2`08Y@@beVRp1oAb{zdo5r=1kN(dEVRx}3Tw8xGA& zbG9Mx7%u-N7hgE75!6jxCogHt9Gc~6@BPi6hoUb2SAf2o(U}IqEK_nX^yROn)lIi_ z<(9L7dLk6-_{-$HK>H>;M5edd=ngy<9ZGNfjTwwAo^5M(+WH5y7~pS5-wMq|1+AApP7|;3 z7C9j5k{6;rM_mUux6z&$W8v!QNL4Kl;)}XhNoX^L+XIRjr!!atk@pz;|x%!nsT?dTw zsjgj=r&5Iu_lD3g!)w+@9}_eBRmKf);x!Ld_B`J7%s-qprR<5cGOraFCyO_v>QJ_F zI$95{DxNm0qbX@|Vn61I*K0jHVLqSg^nDar;gHA2#I2FKpQPSLo$RAHLjXHEpVCsd zC8X|^>0nH34SPHWqR%r$@~FDf7RjeMd?PO;rQ0|v|9~HdefH(8U}@zbU6-}!v#z^D%*WlZ!(0Cgbgm*pHo<#-R*@BO`p zi9&A|W0`vJEC|Vg@ZuT_o8tF#%UYse>K`0m|Qxqhw(ehT8Y|BQeK795yCC2;Z1Be z_f%sVS7vS39sUEQ6Hv3p@4^QIA{XzB+!ub2qs%izxmJaq)ZaeS;ov-~-LNl?ogKc2 zUh4I**_ovFX9;}H#wdH-K1C~}L1BPZAq^;$yefXoRvqEy@ht_yxSP$%f$2nFadEH) zE%tgwGfVbiGL|grU`@Ic?T06#?aUR)hFC)$-M$sr6#uKb$wW@jM3@b^?GGmBBk2cZ zug_lwTl~}X3N?K}>`{)B@-qtRo(H5e**Q*jJY@ncHb>qcMW2`H89dKPU%Oj0#$Q8v z#kv~=FJflD2JXk7J4#sp{8%hjL-S+gA)_p`Yx@1zKC|i|n&@;<{c`b9n6sf;7> zU0&UrhLS7VtDFM&IWy2sLU738S<|bQY>P&0|&R*o13FfJ0r;*F# zzsPi280+BSk~=i*_Zyxc27vlj+zthAmSD{hSUJor?(P)~%eZ@rp5Ak|Td5XSQ8XDL zwLSt&l1*&D8kONvB@4o7Ugd0RRD{GK?Lu{35|qAPb0UeMM#r}n8&e7)9XldH!46Jb z9~*UPe~DzZf;cj;Yfo1w!GEGQhVN%QB5S=l5_*F}`32{#6)kMiA(>{W}qA_>h+34}lzjJ`Fu_ z4KNxw;}RUmq&~dg9E!sna9PFk$3s)E9zO#`*pT-_6T~%{my@e1)rv_u)M;s6@y5jk zbecaSWot5aVvAhKw+cGh8}ve#yqvrzAxB&g82B^B&d0a+6V!5*gt}CbdCr6V+ERpI zupdX~M^x}W+~jeON8;|4c8qYnS1gBFz@LLHq48$&Cql};7DeTIVOPemm4P7;Mp?JT zC93007as=bmu}BLwjJx7)pK6Zqzh^bl8h#%%rr)i@WHjOR+J6RPE?uE#h@H z6tIV$Z=mP~-NH97K4>{K?9X{x0rPktsaIJE`%-i^O7l!&5NJem-b zyH`_zStX%t{)~KBrWk*J`-Zbm-!?tBuU&@C0tHkPe|#Ne^40C9`2g*7W={S%qpdUo zU7a@r$nQ}uyiP_b4+;C_fw2?yG-q!WIbmx4RF8WhQsH^^St}kiN+59jEnl`vw= zyx#ssj^LS*1z9swa8v9zyKBNgUQNX3I+XgFx;|)H)t}y^1_8%9iVWc_^xIneH^JWzj&yEMtZUayrG1o zT2ZwgDvfF_zEK|h10)EHJ54TsgCpK1+BTksyejF3p)OiA(u!FXKsHOWG7 zb-p5wC(?}J_&S@IvSRdAOP-UVgtY8P4v77Ow)Qtu0p`i6P$uP|{2RCFVCUbMFSsqp z7BFSYtGI)!lk!1NA27(z({SF_59nypsd&^)!Mwccid*B$dwATi|rCkkF zV!(;bIJW&6Dbl&w?qWiYdyhsPd~*#JX!V_rw<$KaSRQ^8%>kt8EtUk4!~vgZWZ=#c zh26g@JT#no{>}qo1@;uaSccoX^fF@)7)iA6Z6Pr&gx!L@XEZNqKIWl&=`Y#u6Y zY3DLYN@q@NY59?(qyIWVH;h=-A7;&@?WmDN)JgpTcAz%za>^CYlSm^()rbWhqC9f#7Uhjj+EO6D8e7U@rp5r z9*}6SB5qEu=vqo~pvfTMX#HR;`i?D-?8+#tkGXGXh)RJ}Ut1G}lfqJFAsKr{k}qo} zXyN2&5kNOOj1PD}NJs_^h>14l-GlB^YqL&juUZwO2bXT{i^_C7WF)!60vWWKD?}p$ z2j)1%y4x>SK0j;5gBE|czh%@LP~)JuAQ8O`M{Tr4l2? zW|i@BhW72(u4?|fx-F;@+pXI3oNVBAFWq1`I3TAs#e`l?w(CP z49Ol?XSWs4_6ELP=DSqQFZ!Ns-)vAc+`Ei?_T4b9H-wbfvv|-LD~CA z1AYv(Hx-6a`0I+cSe_Cr9ae_aeqXX0wI(&y4gRypud*-2UTzm|D%HV~Sb-H7ljGz2 z_}sVsK&aG;i&FPwP4nQ%+1~wZ=_L?c>u^{o9Nsi01=pZLb+t*&@v8p*zm&r={We0O z9o^f+vB-8eKKh+=$VGNemOK4uAcCI?rbf*0|IDCvXOaW*Z1P3Pf2p|jL~`^(s5qM;DUf$*(|NM2ZK2mOP70`#*ccyi z#-L8!4qfhfAf+yp*Ke2&^pCFL(NXb6`aACX;dPLUI)3oL3`Pwr*3zz$q(1#G>tBlm zW0_@M!jQM0-iKc}u59au`b2DdA*xlA@IILm_b}hoe%~Ap@#iU^nBjhnG)gVt7mnQZ z9>%FxR5!&|!5K3}CzI{6jV8v#z;T!5+JD9H!|Dqi)C*Od^YbZr-?dT0o@kgqpS$L) z7m(54<{tN6e1gMyKWA{G`J}sDbily-yTH7&#*J2(v?;As>>ten!f?}>cv6JvL*X^n zow&u#N{id*kC4^{%c8F44*A)|khmv~Cl&{xE0=pNZ~b4i`_KMD() zuTcd8Zv4loMi&fcjnc&(-`1R@qfI{UCnCEllS5BH+{G0o%-P}XeKd(-hDIEjXBHjg zn|QBPwK7$Ol!B&8<8fF37i3dP_d-*=oA3fluVSO9a`;AEj!|0(NKYu%RVf!ueTq)B zp+%{iJns9p-x{mc6!pOTmhaPQEuWMX^Q^4B_=(9{wv&-@FJzs}V5w^KL(?%jYgTF8 zx*Z)w9JH|`Nc+K`W69)M_Ql)7>XCP0QaJ%975*CK_awM{DVen&0yi2jY}63?c{L|% zBc>d?;^HxRD}!a;|8ef-lQXi-{i^os(u5oR%&Y0t0kO+bf2!?1BV&GcTGJ%W=M+)*u9q8@ww>ycJRKb?*7p!RI6HjSrQLSncoKnFBh$1+5*$I`w{b>dvWxy|W+92-sw- zLr;gjX|c2Urss|6%`pyZYL)N}FVm2UsBddYfK>JzD^?R^HBV~uvWpH3HI#AR32$|K zWAwr)o0hv|vH{IltvALPd1O~c*8hIZEe;7$bVbGfcf$#>X0Mj+IRjxkDpH4 z&rgPUi!~@3=-uZps-hwq7k1s_C3s5H(w>Im-e?iWYb9SMEk9wn`w+;q2P_GGM*6$d zi9S1OI8LL<$T+b-QSS%(q7yE0ZC66H)O!V^~O-x`xg1fJvMa~B#(<9~qIm-X+oU}M$ zfR+tq-=|6Cy30EsoF?18$xDHd;_ z**E<(%=?BC+#*J7a$y4DCeO;{%IQR>Z$ZEXMY+ezRqu%cO<;ptNJj7XYVK$s`8++ zpxEJs3|5kfjVihA1XVMD;|^kDBr4uNs2Z-Lrb-qOrG#rt)^_y=Dn}#OuJ^6PA3_ra z!+jORmLtZOs|JVYCLr$2n1S2#I=Gt)eM3gURc8`iO52u%H7lZH4m^dEojWXwc$-=n zl13<#P|nS6I|6a@#8A^=s$S5HzFOEJa^NVED|TPt*Vf`gEIIM^3r{Raceq~dKANia z+S5@VHJ?gOe^|sN6snvVdD?Zjzm3PT+3C_8S{Cbk1yt+A0DZW_V9^?%AP7NEs|Onz z&B9`SCXvA@+o@oYoY+@zI`Bveq-ek9=-Q`+@%hdSSvBuv9xK}zk?w*6+YAR_j^_sF zJV<=y(KQo@ukiOXEMWcZvc;H1$eRgKfu{Q`yf)b$pFW zFsolTsbh&LRDoZC+_=o9*{&Bo(bnw!zFDqq?z9Ui5@or0o40gi1@qb`s3MS zDOHel_V-PBDNuD!qS?e3)GN5&`Q$fE&gc64Mesp<#_0H}-P*|(FSsQ{La*2KPKEDx z?Nwl&_XhDDOuHIjOOri)bKVi$V%AaDDAf^7sq@GlOCC!R6F5%Vx&&42=6!Tc=dFj$ z*w$KI?3cl}x}=3&mg zQUMjM84Ns-+Y=F>|104Zy|a^FFWcA0z|hi3Bb1E)I%MJHXs*DzBe*&F*5N)!#s#N^ zxH=-`Z`O{z?PpWb2NL$mYGz7Pu6r-AgGz31zljJ;PGLR<0i|!I*@L2eK8HFUR&syC{skUsi$dS!>wg7kWn z4f&@b4h;5KFxp$6I9{jrcdobn4dYNnBQmgPS+T#|9gXZvt_r|JxmEau#LV?ZjqdKh zh-e$5=56Z$>Ov{f>i}YK_(yO`|$VcG84^U14c+a>fftGQxtAVZpqN_gkU|>MbFvfx8a2zD!BPD zD9Gw6@_B{#326k!P4HIE`*ORC27fxGrBY9)JF!Oi9^ReP-4{}F*lR&eC%Ra*@E!*% zD!SW13EZWsuBm}Rl|JOhbGn9TsMuZUxK=Iad_l3iW2{?LbE2W1s_N1dax;iS1~vfxM@n!`O{DPN2|}O^S@n@z zzfynH{gFqY0LiF9>uq%EckC+3mEX5nkF6J6aUoVWdZgix{3mU}vS4TWYKo3)(c-ys zLSC+5kWbs?MFd)0$k&Q(RuQ(?h~F9++-X!mW8TN|VEYQ&CIH9?Fm{BL90^76DQ}OH z0X?RT^M}tgWu%OxUUA8rL&uou6$At+f5Vfn0uQb;*6V_u`9U;=ToqsL z9XQLFV=cdaB+dM(h-+9#rw*5$T6h>(?iC_H($289WKwpMJtzo(k^S$<3(@4HrLkI& z#_-X%E$fqvf&g@*vSf&4Zt0Rtz|xmWIyE?t!1I3{WJ z+DQzEP0lMr0Mwp0eKTC@I?#(hw!G;904Z03PlW^huIu_$U-mZIkxwN1u7DJ}5=e05 zZ0YJULjM&zX&^U1clh*1p8unQERf4+sn-aAWI~*BfLXa9^f>Ybdcgc3PZ)tK zuQ5_!DFTH6(E53)LD2AjxDKNL{@zYT8*vMN1Ok1vPCTatbpjY9h)vO{L;wWp1YQ0u zg)kzhfdYkOfDUySbr(k#NL%reFtBa;NdgJVKmgbeXzn+;Y3>z}Z3xiHz5|&x!GS>B zWWX%~pBgQtlYndkFVBMw%(po}GK`n>h~`j1H8>?mo8odNL96&9Qc%Xt|I7p#p6^B2 zxRZb~hAycbbJlUS{grhd|L5_1QJPCx`J@sk@DiQ@g(1gcg{&4frAR^OrvUIH(=fVa zY4JY}l<&j*9;7*K0c#FYy$?K$lk!tGI^Ilx{E>tf0Q@VG2rOBdWdoHJn-KGKDbsRZ z3%RFMMC_u_+{UbswW0_MVrkETK>qkZC&fm=VDq6fSJS#R-Ncyp?U)Vx>j*)&Nh6!= zzIBe*LsFabb5AQ^zAKa#fAH^0OTuGkR7|E6ETYm&CWYiaj5K8D>|8uIJds}^)ORb% zy!%{e#3?Od8;88^8gY=QTedEY?J&ywuV;B4gR%m4P-Zk@VP@!MFvq&Tj@rZ`k5%)y z&wzQrSL9sY?6j(&n)z_w?*m+}pHcG2p<@a%aDnZ={DHt*E55w%@Ek|ncIt5g&9B-l z;dK?r4mf1VP@)1BNCjf^0@VISe#D6(LR3&gah=T~?2&a#XQLE1u=hJxx8|it14jYu z5SYlHqt{+>?mf6$@@z~R(}eaL0ru^GhW{2ChvGTnzn^*djdf3L2r{50g$<+vUPS`) zUt)@H8w=MsSy#F(b>-4@?D`L<3lJpYuDk+H6v4 zBqrOj3#xJx7ytzDeIGR$o7<(tSx$bjD8y6?W)7%Kd7C>QUS^Vt?WL>2et;mC2Ec5+ zC~6Urp7D_J5KO*AVG;3VcL6{k_H04izQBGj6;w3e&bfEpd;in>HBs^UHq$#~t?T^3 zUXEMTAoPn%IcjasCHxWV3u@b99_u%(7Ae6k!J;#7XcRzhvj4q-r$xkrT~>=QaqG#) z>*7m62~Wzu4DI>PD1o#`E~T_aL~h#}A2bdO#30}@pSlN>FlfM@>{aJa509^bI`3X; zL%sF!e$G)@d40n>Lm`(CT;YM`nJ;)-$29vLgTJj6jVh?@C4>UR{EB*>c{GxdozwFh z(yG_+>_S{Bj0yrqaVVuMLeMe!W^Jn0coHenQp*249*k&C4Lk9pQ%c|J+r}?{KC0TT z)9Vtnqrkx85+0>qRcYn_Lw=;Gp)5s!kvNfGO44oe0=6gE3vrAD1gihK@#rjgb6Y3H zDX)G9E95!y1`JWXwR?d1G5s~2@6i8dKl#x!pudZ)eBRxC{6i7&2|rrzKj}}U58nkTe1b?O;7(=cf;s{&&e z&NvGb>rR-Z>Er_%1}sn`y;<1D=KoI6Dcy!X4tz}bzAtB|Gz_79+xs6R8gwjD2MfBq$Z9;OSnY zn>#x9_0=4fyU=?&Qi>01kIk4QP1u!t`jNdOeRym2Moz&)6%RZ$jzanI<<6^QIQ8&j z^!O%{mv_WHZP$IRgmYm_m!q=`hogm04=3Ox~rtYc5qO2Y_-?a7p5}RQb2( zMr&ea)>bxhZbN3Jpw5WNg>`QJ==%4mZ^k7{L52k*?7R4ZAx~VqiZU@v;*6Ei9G;Bx zlOA|o`Zktevi&4&mb;1a&cF=6f-HiPq?y~U7ZHT z`IzWURqGF#>Npp}Vwz;ngVf;i-&>z|zTv{!j>dnvWALBsszq`H;)Uxej*Qv#B-D6LD3S(Bh#iN6P zuZn7j1pLd&9+j8F>L(3IFMeLRl~A|USe`A+$LW#G`GA`0-2MS$nP?s#vvk4_jt8*y5_`2ha*Hp%|YpyQh40qZ4&z(vM>EDQc!rXhx{C|txV=5(I|D>?; zjr>0%cXEAy`N@9VxDjHWx)-sR*tife?tnBG_v#(K>(PMR5-XT`3Mr_nRPWM)<*m5E z{WT9C2tdzviPMeWS*G#(#~Nh?zP9a2Qa;nQ73z$MFLWlX=0A@`8gzZVZZv)t9e>cL z&mLU`ANTKOplsrG-bbRFPjtCA(xNevRKt9S$wYDFU=oeGjMy)DHL0);01s}9Zi~J= zQO`F#NZgyZ&0X_-4{%bE!ay*2x&9j$=xOU(-7{@bUJx(+mHyQ`r4u;3v3ThAhDOZw z3m#5CMIK?b?DCzLE3BsijOJ&BH%oksX&c%+u@*_sE*?j&NLKnK^NV*?_QI!-{7LIu zJqNI2r43k2Tdi8n0()6&BiJJcAUeP$<~-k8uNM9sJ4ug!H}xPBYjUv^TUG5xL*3S% zMl(KTt}>2QF9?tn)Md zAk@;dUQku6COH?dqvLKHOCzjFi;{NG-ANF6mj)ShnQSTHlirZ}l3Sc5N{KZU61^B| z+I%+Es<@_$I|D<4Mci<5NdrxGcSxH_{&Ux~?nWTXVDm@NzY^4!oQ3&anRL+oX!;mV z-=|`|EN6yGY3#2by87g;z071{YU^H$P{YzALhVd`fLevuocERm^;N}-i`j z!*Tff4~FVNrLclD92h!qEC8NOEe{VL(?XoZE7qv~*^OBbIHFBTFy+!PvxozF!HbCE zsq0<`?BWkHnMn%FnFeR#a`*Vk7qH9h)@dXBJtZQ9gl`t2?1wEXmesLOJR3Q~Fv|a- zg>W-7_kLy6D0p&DP`i`r(&3Sxw;0oW!c-weDfsJ(NU3aNP_1{Ipmhn2!ASP38#qd7=M$~w1Y ztqjn_?lT{uKtC1J1&vybh5vPFkE4b)6~bS#rxbSByDziKGI~^0t9^M)sFT%979M&{ z$^qciAb{==ORaWQlzM!PAuNs;=q6{;+P)c{$*lImI)hoX0o4Q9$;zq*%1}>`{HKyF5`k+h=xTY_@VY zERqIPZP4D+6TThWU!cGvRl>p9pdjIq+Ll;exf`(9T}*=I`2jQ1%j{?kP_-ti0(3aO z#OVN4hs{5tyDg*4p9zq z=HfXuT6$Mvg>n;}|Ape@I{O#^XN%@1@T=8>hyNJ}F1?#loP08#zfQcR7LG!VIX|); zD9=9G9Fqv>UA<^jxwMd~XRQ~nz=^KxD&uP<00jYPrl%FwN|^ccVvt6t0-1-{uKlEE zTJfbsK@A@Mp`^n_gT3JTn|~4*%X3#B0deMvgv(kV8pz~i0!FuJEaY?xixUfx0UcLl zjEJ8&N7Sn^Hn(=Y&>X2Ao?K!R=~s-pI0_8TIYt0*SDk@UYv{;O@Zg@=s%H`N&Z*x#wweS$h(#7ye2^gHN-O(d0l&l8;kV$Gf##I%sG9 zSpo#scR(HEV7!c@A^#!@{f1X)J~GCH6w)x8Lo!+TAO$m7lxxe`jRy~*%;H+(Yj=Sj zY--&*Ri!BorT`uDHUAR-3+g%zv&@;P)RPBM=juR~{AaRW6jBS+sWJiG_XfE#Pk$*X zM~u*(=bTK$zT@gvKjj9G@JE0D`!3OP($-k{Ywnd36~()-U!^4_ZYs*(CR#mX$~Q3w zZh4*E#3q|V=VW1DUBz+7O?X%uLeVpA|n4YJF(+YLFG$t@rwsg<2C!WdGWn7 z`w%`fKWsC4c$ijMfo6R&*-?SV)K~Q3>&(LET$zk~MV{isBxk&Fld^hWnehkq*C=IY0|~g7Np;Wmj=u~57}|L~-tonXg>Jq|?+428F*TkO ze?suBOW~c}hC^!hw(GH=a4SUI0kH&{4|z?3z6NuR%~&F*L4|k-=Q8hdfUyKCEcucR zw(@}9V*bkJAO&GdL%==zyb!rUmH4%o<`TTYd0#I4o+wGnK{*pR9` z{wcWO#$`Mz;1PZW2KQZ2PHFp|x1xFe0Zh_ge!yvD_NvYFA*HyRfod#;_mUUkn~_!1 zbS0zV+){jBbP4$Omvzdoqv1aVee7* zYmh6k-~W18EmByJDg5SWfz-EoD3X`U?}sNnxxWOdah~KS?Bj_JgN5){nB|Mc+KT%I z@oP!blMIE+fU9g;?1ELLZu!Df;%=Z(hwTGNkED#L0NEFQxBD_UTED<6DKlZB4H7Q~ zSj6p>?_4REn5GvhUlJCw)l&>sojmPR;@_a9O(AG%9VKbp=z%=uZfIiKtBJ2!FNuxyDl9mCsK9ZP`bY!9B^4>`K{c$_%4-}bpr$A zh?}rP7M^`|A(i$BC4uv4b8lSpjIA7o3owv&+O>uY(n}4DniABG`^CIIZY+*~Uk;2O z9uE-Zd~*a=@D9^D57PJhn=UO|stSK@%)BylbWLT!4$JD+Ct(%586BPRcAa^=pSd9X zk~r2*xj=5A>y@LY=S^gjM!1Ey5Dz)7cm<2+b?t0OI||qK(v!7nMAfohDudU$O6NRc z12hsf8@1qOuMF@BGU|=Q3pvGUAwY!E&fer+`F4?2LgH?GS3gV?e1H-{F2HT7a*waB zDn1=hk`No+;^?g4+IN_9&qGdD`(|8r{d6wN{sj}*nNUYUY~`L}F=_a)fp3+K6QQ0T z3LCXZ z2eeTf5@TL*1kj27+|u;wIy4Dk}MgHHp3=ZaH;e#0zcJEF?pyVlZUc{~ss;U&AGKSti` zI31}#3+kWxQnHl_W=iswZy4S18N^fvNtgySS5|#VDUNfY@Dx|Yk1MpfdBl((sDqtH zB?GQ97mLFr0Y%a2`c4T{3O4;wQZ846&7iBt`~F28GcyS?kXIwBZm@UzX^*#^{ltTq zF~(ffZcY!VK92MjcQcQ;VH_Fwr zF&2qZL4(O0LyP$8x@Xc(PfM=|xzrogmk9nGKT)bqAPn2udKq~S{CZrYIr+9RJ^t;& zNluo_)QK!WL_5HttqD99o&Qm}2S&eg*#9;2-_5mO9Czdm zCA7ZTX1BYpDY%LzfdL+UsqU7)&*%vIGsDXze~uMK^l7IDoaZVY&OTy-UL_4*eg-$d zag=z19}xh^)c8$Tr0ITyi~QPcu5;OohQ-1?+X=3k?)wGdWrdAHqZ+37#%}{a<5>ku zvQ@Uqk{~ z#@9c_7YTY4F7cBVQ2P+rugu9Dzm3N#t1`FvhY4Q=ULzCS@e?&L(z>R1L*6z5i{v{$ zbC;keWgCk>@`%glu%grgX^)S{N~{nW`?cbB&5&A^I2Xe=T8EyN?~AC;|8t8N=2d-W z$K99CSr)rh%5}V2@6EnoY!B4F zX$+^RN#pEF?0&~rDEt^Jq4*iEzV)+S6adaR+0wUBlF2(%j8#(~4!=AF*5l42Z;^(L zfSj-P($4t-c`B4jN~n3oNyma0iildQO4@D}xBZCJ*VN|iq>%eHFXg|Ip3QDZlpnN7 zxl=^g_|J8ndAEck|79`f`m-G90ytb4)rh@sAHK~q^zM0F_r2X>Vcg}iU*|s=f{n{2 zW*2!k@>3lfv!`(u6nWx1R#(qN9~^)7dSZUStF2l1JX0-rl;QR;y9ci1%Jt=a7jKE7 zS6CTF37y>TlWZhWpxd;N@TbWyyPI3EVCF@c>)>RT=(fui>g48Q)XzBS{eIy6Mo@;0 z%_if&7Z^+!-C~x~ZB^zuic?RM{Pnu9>O6&zJR*oQ@z%FE(21htR2w{{I#OQP zlXm73^Q?*=!KOE-2{3@02ClAMLR-4?UJqIJF%bm+bT7MHdn7lB=7Cg}+e~}}iLb5q z#w1MdJL@0TaeG_O9;q?*g@JMJZmU~#KihqmSLdJE&vkR>7bbA!%^XD;jZH8rhAsyuTrFZqZpP+xkIC^4;3`Kbq*AgU0wTH^TYnNdw-Q96{-$HsRbUi%8s zs4%`-Mb`e$aKh$)^fFd*E&tmk<$3JQnO**ig8>$b_nd&f9Da{or#*g=;5MF;DO9>+ ze0I`GWLnlgs;r+kFIuWhEp`nkg{~_5o+Yg7Skq9WBY3q;c|Lm55{erYIyK}H zRKl-3G`KX*k#(a_$xAcKMd{#n77k=Wq?={|@Rp@lSo%(yYrwa!jV-VCyDv@Rm64kK$76c%nIg+1=nce?CL zp|iR;8@Qc48PX?m-DtIs<#wA2lpJ^j$sJ@+8pJ&OTjX zU;YF)u;{oz;f7?1OaThCKe}?}X(7CjRc`00wg}28>YcO7{F08M$F!5eF%|&-_pyXy0OdLZJ6dJjZM17&?$p>RLUPSv zU6|;Xb@0U>Qd0E(N%B_aR;_*ZlgWip&IV02r6ryb+bIWEm#t69d&mA$#Pq=uxV&zh z!3W=JOvDhYv3FSUNr*qe5^J+$#)^8f3k8z8TBJCyrM=?gNP)oj^?u#gSH3mskHeZc zojpUa)EUuut^)-k7eC9maDiyi)*Quh<#yL^`aHnoYYI^y#g- z*WNu(=WiN(Jj<%_EB(*HoQg`$`FGz)39Iavn0{tug*s2HvVlGrCmJ4Bff9zKzS;|Z zABePSF6^f`PA1;((L6$6ebA@|b{rHN!nwP~%Tk$2bWg02^9KT`bCX#oz)oP~> zY}XI7tNaQ1woW%7w{}0T?VhkJ!<8q9RF1>`W22qd%$Uz)d3q6a;nfU1slIBhQFu-7 zqR(xsC(`XD>PZamjIy2o`Q=}~+mTb%y-i!SwC5}0CiSZ3l?63KOxd1_>7_I9>Or=9 zk9XDHOS2JdwrJsN67$vydPR-ZBJ*w|?ytBh=4lw&0sCg2{p2|rd78lLXd2bz?Pcghxah<7h z9}Z#g7(Tl+$x#|b&fe=bp|&62ZR&GOj9CugD`NG?f3u@4X%Z~Q%L@8aW9y}JjGO!d z4+s>iyD~-0*)ypF`}9f8|NPrr=GMOy-_~7jU*xJqh2P1!WM;hEmm2d$7Tz!J3a#{V zg8&egi%QmgBoI9y?Th@2GNICwxAoM)@-m}u&EI(|$KLIczY z5U7-J>E9`e@9}e1Rd8ObceHvUM_N%4vY{(bK#5???5mqjm8BKWc6iC}=2I_VRb)8t za%Xc`Ay26*$;MDzey|aiIUzHESC0*siUA1NRxh;UW49ZBT quH5`-t9SVi0RCs6z!;953z8utuKWi}nt7L3sl3!wD3*H@^nU=-W@6(2 literal 0 HcmV?d00001 diff --git a/samples/contrib/intel-oneapi-samples/intel_xgboost_daal4py_pipeline.py b/samples/contrib/intel-oneapi-samples/intel_xgboost_daal4py_pipeline.py new file mode 100644 index 0000000000..377722136c --- /dev/null +++ b/samples/contrib/intel-oneapi-samples/intel_xgboost_daal4py_pipeline.py @@ -0,0 +1,502 @@ +from kfp import dsl +from kfp import compiler +from kfp.dsl import (Input, Output, Dataset, Model, Metrics, ClassificationMetrics) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["numpy", "pandas", "loguru"]) +def load_data( + data_url: str, + data_size: int, + credit_risk_dataset: Output[Dataset]): + + ''' + Downloads credit_risk_dataset.csv file and generates + additional synthetic data for benchmarking and testing purposes. + + Input Parameters + ---------------- + data_url : str + url where the dataset is hosted + data_size : int + size of final dataset desired, default 1M rows + + Output Artifacts + ---------------- + credit_risk_dataset : Dataset + data that has been synthetically augmented or loaded from URL provided + ''' + + import numpy as np + import pandas as pd + from loguru import logger + + logger.info("Loading csv from {}", data_url) + data = pd.read_csv(data_url) + logger.info("Done!") + + # number of rows to generate + if data_size < data.shape[0]: + pass + else: + logger.info("Generating {:,} rows of data...", data_size) + repeats = data_size // len(data) + data = data.loc[np.repeat(data.index.values, repeats + 1)] + data = data.iloc[:data_size] + + # perturbing all int/float columns + person_age = data["person_age"].values + np.random.randint( + -1, 1, size=len(data) + ) + person_income = data["person_income"].values + np.random.normal( + 0, 10, size=len(data) + ) + person_emp_length = data[ + "person_emp_length" + ].values + np.random.randint(-1, 1, size=len(data)) + loan_amnt = data["loan_amnt"].values + np.random.normal( + 0, 5, size=len(data) + ) + loan_int_rate = data["loan_int_rate"].values + np.random.normal( + 0, 0.2, size=len(data) + ) + loan_percent_income = data["loan_percent_income"].values + ( + np.random.randint(0, 100, size=len(data)) / 1000 + ) + cb_person_cred_hist_length = data[ + "cb_person_cred_hist_length" + ].values + np.random.randint(0, 2, size=len(data)) + + # perturbing all binary columns + perturb_idx = np.random.rand(len(data)) > 0.1 + random_values = np.random.choice( + data["person_home_ownership"].unique(), len(data) + ) + person_home_ownership = np.where( + perturb_idx, data["person_home_ownership"], random_values + ) + perturb_idx = np.random.rand(len(data)) > 0.1 + random_values = np.random.choice( + data["loan_intent"].unique(), len(data) + ) + loan_intent = np.where(perturb_idx, data["loan_intent"], random_values) + perturb_idx = np.random.rand(len(data)) > 0.1 + random_values = np.random.choice( + data["loan_grade"].unique(), len(data) + ) + loan_grade = np.where(perturb_idx, data["loan_grade"], random_values) + perturb_idx = np.random.rand(len(data)) > 0.1 + random_values = np.random.choice( + data["cb_person_default_on_file"].unique(), len(data) + ) + cb_person_default_on_file = np.where( + perturb_idx, data["cb_person_default_on_file"], random_values + ) + data = pd.DataFrame( + list( + zip( + person_age, + person_income, + person_home_ownership, + person_emp_length, + loan_intent, + loan_grade, + loan_amnt, + loan_int_rate, + data["loan_status"].values, + loan_percent_income, + cb_person_default_on_file, + cb_person_cred_hist_length, + ) + ), + columns = data.columns, + ) + + data = data.drop_duplicates() + assert len(data) == data_size + data.reset_index(drop = True) + + data.to_csv(credit_risk_dataset.path, index = None) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["pandas", "scikit-learn", "loguru"]) +def create_train_test_set( + data: Input[Dataset], + x_train_data: Output[Dataset], + y_train_data: Output[Dataset], + x_test_data: Output[Dataset], + y_test_data: Output[Dataset]): + + ''' + Creates 75:25 split of input dataset for model evaluation. + + Input Artifacts + --------------- + data : Dataset + dataset that has been synthetically augmented by the load_data() function + + Output Artifacts + ---------------- + x_train_data : Dataset + training features, 75% of original dataset + y_train_data : Dataset + training labels of target variable, loan_status + x_test_data : Dataset + test features, 25% of original dataset + y_test_data : Dataset + test labels of target variable, loan_status + ''' + + import pandas as pd + from loguru import logger + from sklearn.model_selection import train_test_split + + data = pd.read_csv(data.path) + + logger.info("Creating training and test sets...") + train, test = train_test_split(data, test_size = 0.25, random_state = 0) + + X_train = train.drop(["loan_status"], axis = 1) + y_train = train["loan_status"] + + X_test = test.drop(["loan_status"], axis = 1) + y_test = test["loan_status"] + + logger.info("Training and test sets created.\n" \ + "X_train size: {}, y_train size: {}\n" \ + "X_test size: {}, y_test size: {}", + X_train.shape, y_train.shape, X_test.shape, y_test.shape) + + X_train.to_csv(x_train_data.path, index = False) + y_train.to_csv(y_train_data.path, index = False, header = None) + X_test.to_csv(x_test_data.path, index = False) + y_test.to_csv(y_test_data.path, index = False, header = None) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["pandas", "scikit-learn"]) +def preprocess_features( + x_train: Input[Dataset], + x_test: Input[Dataset], + x_train_processed: Output[Dataset], + x_test_processed: Output[Dataset]): + + ''' + Performs data preprocessing of training and test features. + + Input Artifacts + --------------- + x_train : Dataset + original unprocessed training features + x_test : Dataset + original unprocessed test features + + Output Artifacts + ---------------- + x_train_processed : Dataset + processed and scaled training features + x_test_processed : Dataset + processed and scaled test features + ''' + + import pandas as pd + from sklearn.compose import ColumnTransformer + from sklearn.impute import SimpleImputer + from sklearn.pipeline import Pipeline + from sklearn.preprocessing import OneHotEncoder, PowerTransformer + + X_train = pd.read_csv(x_train.path) + X_test = pd.read_csv(x_test.path) + + # data processing pipeline + num_imputer = Pipeline(steps=[("imputer", SimpleImputer(strategy = "median"))]) + pow_transformer = PowerTransformer() + cat_transformer = OneHotEncoder(handle_unknown = "ignore") + preprocessor = ColumnTransformer( + transformers = [ + ( + "num", + num_imputer, + [ + "loan_int_rate", + "person_emp_length", + "cb_person_cred_hist_length", + ], + ), + ( + "pow", + pow_transformer, + ["person_age", "person_income", "loan_amnt", "loan_percent_income"], + ), + ( + "cat", + cat_transformer, + [ + "person_home_ownership", + "loan_intent", + "loan_grade", + "cb_person_default_on_file", + ], + ), + ], + remainder="passthrough", + ) + + preprocess = Pipeline(steps = [("preprocessor", preprocessor)]) + + X_train = pd.DataFrame(preprocess.fit_transform(X_train)) + X_test = pd.DataFrame(preprocess.transform(X_test)) + + X_train.to_csv(x_train_processed.path, index = False, header = None) + X_test.to_csv(x_test_processed.path, index = False, header = None) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["pandas", "xgboost", "joblib", "loguru"]) +def train_xgboost_model( + x_train: Input[Dataset], + y_train: Input[Dataset], + xgb_model: Output[Model]): + + ''' + Trains an XGBoost classification model. + + Input Artifacts + --------------- + x_train : Dataset + processed and scaled training features + y_train : Dataset + training labels of target variable, loan_status + + Output Artifacts + ---------------- + xgb_model : Model + trained XGBoost model + ''' + + import joblib + import pandas as pd + import xgboost as xgb + from loguru import logger + + X_train = pd.read_csv(x_train.path, header = None) + y_train = pd.read_csv(y_train.path, header = None) + + dtrain = xgb.DMatrix(X_train.values, y_train.values) + + # define model parameters + params = { + "objective": "binary:logistic", + "eval_metric": "logloss", + "nthread": 4, # num_cpu + "tree_method": "hist", + "learning_rate": 0.02, + "max_depth": 10, + "min_child_weight": 6, + "n_jobs": 4, # num_cpu, + "verbosity": 1 + } + + # train XGBoost model + logger.info("Training XGBoost model...") + clf = xgb.train(params = params, + dtrain = dtrain, + num_boost_round = 500) + + with open(xgb_model.path, "wb") as file_writer: + joblib.dump(clf, file_writer) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["daal4py", "joblib", "loguru"]) +def convert_xgboost_to_daal4py( + xgb_model: Input[Model], + daal4py_model: Output[Model]): + + ''' + Converts XGBoost model to inference-optimized daal4py classifier. + + Input Artifacts + --------------- + xgb_model : Model + trained XGBoost classifier + + Output Artifacts + ---------------- + daal4py_model : Model + inference-optimized daal4py classifier + ''' + + import daal4py as d4p + import joblib + from loguru import logger + + with open(xgb_model.path, "rb") as file_reader: + clf = joblib.load(file_reader) + + logger.info("Converting XGBoost model to Daal4py...") + daal_model = d4p.get_gbt_model_from_xgboost(clf) + logger.info("Done!") + + with open(daal4py_model.path, "wb") as file_writer: + joblib.dump(daal_model, file_writer) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["daal4py", "pandas", "scikit-learn", + "scikit-learn-intelex", "joblib"]) +def daal4py_inference( + x_test: Input[Dataset], + y_test: Input[Dataset], + daal4py_model: Input[Model], + prediction_data: Output[Dataset], + report: Output[Dataset], + metrics: Output[Metrics] +): + + ''' + Computes predictions using the inference-optimized daal4py classifier + and evaluates model performance. + + Input Artifacts + --------------- + x_test : Dataset + processed and scaled test features + y_test : Dataset + test labels of target variable, loan_status + daal4py_model : Model + inference-optimized daal4py classifier + + Output Artifacts + ---------------- + prediction_data : Dataset + dataset containing true test labels and predicted probabilities + report : Dataset + summary of the precision, recall, F1 score for each class + metrics : Metrics + scalar classification metrics containing the model's AUC and accuracy + ''' + + import daal4py as d4p + import joblib + import pandas as pd + + from sklearnex import patch_sklearn + patch_sklearn() + from sklearn.metrics import roc_auc_score, accuracy_score, classification_report + + X_test = pd.read_csv(x_test.path, header = None) + y_test = pd.read_csv(y_test.path, header = None) + + with open(daal4py_model.path, "rb") as file_reader: + daal_model = joblib.load(file_reader) + + daal_prediction = d4p.gbt_classification_prediction( + nClasses = 2, + resultsToEvaluate = "computeClassLabels|computeClassProbabilities" + ).compute(X_test, daal_model) + + y_pred = daal_prediction.prediction + y_prob = daal_prediction.probabilities[:,1] + + results = classification_report( + y_test, y_pred, + target_names = ["Non-Default", "Default"], + output_dict = True + ) + results = pd.DataFrame(results).transpose() + results.to_csv(report.path) + + auc = roc_auc_score(y_test, y_prob) + metrics.log_metric('AUC', auc) + + accuracy = (accuracy_score(y_test, y_pred)*100) + metrics.log_metric('Accuracy', accuracy) + + predictions = pd.DataFrame({'y_test': y_test.values.flatten(), + 'y_prob': y_prob}) + predictions.to_csv(prediction_data.path, index = False) + +@dsl.component( + base_image="python:3.10", + packages_to_install=["numpy", "pandas", "scikit-learn", + "scikit-learn-intelex"]) +def plot_roc_curve( + predictions: Input[Dataset], + class_metrics: Output[ClassificationMetrics] +): + + ''' + Function to plot Receiver Operating Characteristic (ROC) curve. + + Input Artifacts + --------------- + predictions : Dataset + dataset containing true test labels and predicted probabilities + + Output Artifacts + ---------------- + class_metrics : ClassificationMetrics + classification metrics containing fpr, tpr, and thresholds + ''' + + import pandas as pd + from numpy import inf + + from sklearnex import patch_sklearn + patch_sklearn() + from sklearn.metrics import roc_curve + + prediction_data = pd.read_csv(predictions.path) + + fpr, tpr, thresholds = roc_curve( + y_true = prediction_data['y_test'], + y_score = prediction_data['y_prob'], + pos_label = 1) + thresholds[thresholds == inf] = 0 + + class_metrics.log_roc_curve(fpr, tpr, thresholds) + +@dsl.pipeline +def intel_xgboost_daal4py_pipeline( + data_url: str, + data_size: int): + + load_data_op = load_data( + data_url = data_url, data_size = data_size + ) + + create_train_test_set_op = create_train_test_set( + data = load_data_op.outputs['credit_risk_dataset'] + ) + + preprocess_features_op = preprocess_features( + x_train = create_train_test_set_op.outputs['x_train_data'], + x_test = create_train_test_set_op.outputs['x_test_data'] + ) + + train_xgboost_model_op = train_xgboost_model( + x_train = preprocess_features_op.outputs['x_train_processed'], + y_train = create_train_test_set_op.outputs['y_train_data'] + ) + + convert_xgboost_to_daal4py_op = convert_xgboost_to_daal4py( + xgb_model = train_xgboost_model_op.outputs['xgb_model'] + ) + + daal4py_inference_op = daal4py_inference( + x_test = preprocess_features_op.outputs['x_test_processed'], + y_test = create_train_test_set_op.outputs['y_test_data'], + daal4py_model = convert_xgboost_to_daal4py_op.outputs['daal4py_model'] + ) + + plot_roc_curve_op = plot_roc_curve( + predictions = daal4py_inference_op.outputs['prediction_data'] + ) + +if __name__ == '__main__': + # Compiling the pipeline + compiler.Compiler().compile( + pipeline_func = intel_xgboost_daal4py_pipeline, + package_path = 'intel-xgboost-daal4py-pipeline.yaml') \ No newline at end of file From d3a15d9230681a62563c152166923b6a9cec32da Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Oct 2023 21:23:50 -0400 Subject: [PATCH 190/253] chore: Update OWNERS (#10064) * Update OWNERS Remove Linchin from approvers and reviewers * remove Linchin in OWNERS files --------- Co-authored-by: Chen Sun --- backend/OWNERS | 2 -- kubernetes_platform/OWNERS | 2 -- test/OWNERS | 2 -- 3 files changed, 6 deletions(-) diff --git a/backend/OWNERS b/backend/OWNERS index 5954ef1c39..9729f8b179 100644 --- a/backend/OWNERS +++ b/backend/OWNERS @@ -1,8 +1,6 @@ approvers: - chensun - gkcalat - - Linchin reviewers: - chensun - gkcalat - - Linchin diff --git a/kubernetes_platform/OWNERS b/kubernetes_platform/OWNERS index ba9cb373cb..59bb717307 100644 --- a/kubernetes_platform/OWNERS +++ b/kubernetes_platform/OWNERS @@ -2,9 +2,7 @@ approvers: - chensun - connor-mccarthy - gkcalat - - Linchin reviewers: - chensun - connor-mccarthy - gkcalat - - Linchin diff --git a/test/OWNERS b/test/OWNERS index ce9eb0876f..9c160e6419 100644 --- a/test/OWNERS +++ b/test/OWNERS @@ -2,9 +2,7 @@ approvers: - connor-mccarthy - gkcalat - jlyaoyuli - - Linchin reviewers: - chensun - gkcalat - jlyaoyuli - - Linchin From b273aabb894338c85093b0fb564bb5b3094e36b7 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 6 Oct 2023 09:36:16 -0700 Subject: [PATCH 191/253] feat(components): Add LLM implementation component that uploads tensorboard metrics after training PiperOrigin-RevId: 571359958 --- .../llm/upload_tensorboard_metrics.py | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_tensorboard_metrics.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_tensorboard_metrics.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_tensorboard_metrics.py new file mode 100644 index 0000000000..caa90e8bc3 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/upload_tensorboard_metrics.py @@ -0,0 +1,122 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Component that uploads tensorboard metrics.""" + +from google_cloud_pipeline_components import _image +from kfp import dsl + +_TB_UPLOADER_SHELL_SCRIPT = """ +set -e -x +TENSORBOARD_RESOURCE_ID="$0" +METRICS_DIRECTORY_URI="$1" +EXPERIMENT_NAME="$2" +TENSORBOARD_URI="$3" + +mkdir -p "$(dirname ${TENSORBOARD_URI})" +if [ -z "${TENSORBOARD_RESOURCE_ID}" ]; +then + echo "TensorBoard ID is not set. Skip uploading the TensorBoard." + echo -n "" > "${TENSORBOARD_URI}" + exit 0 +fi + +if [ -z "${METRICS_DIRECTORY_URI}" ]; then + echo "Metrics directory uri is not set." + exit 1 +elif [ -z "${EXPERIMENT_NAME}" ]; then + echo "Experiment name is not set." + exit 1 +elif [ -z "${TENSORBOARD_URI}" ]; then + echo "TensorBoard URI is not set." + exit 1 +fi + +case "${METRICS_DIRECTORY_URI}" in + "gs://"*) ;; + "/gcs/"*) + METRICS_DIRECTORY_URI=${METRICS_DIRECTORY_URI/"/gcs/"/"gs://"} + echo "Replaced /gcs/ path with ${METRICS_DIRECTORY_URI}" + ;; + *) + echo "Invalid metrics directory uri. Metrics directory uri must start with gs:// or /gcs/." + exit 1 + ;; +esac + +if [[ "${TENSORBOARD_RESOURCE_ID}" =~ ^projects/[^/]+/locations/[^/]+/tensorboards/[0-9]+$ ]]; then + echo "Split tensorboard resource id" + TENSORBOARD_RESOURCE_ARR=(${TENSORBOARD_RESOURCE_ID//\\// }) + PROJECT=${TENSORBOARD_RESOURCE_ARR[1]} + LOCATION=${TENSORBOARD_RESOURCE_ARR[3]} + TENSORBOARD_ID=${TENSORBOARD_RESOURCE_ARR[5]} +else + echo '[ERROR]: Invalid format of tensorboard_resource_id. It must be a string with format projects/${PROJECT_NUMBER}/locations/${LOCATION}/tensorboards/${TENSORBOARD_ID}' + exit 1 +fi + +set +e + +/opt/conda/bin/tb-gcp-uploader --tensorboard_resource_name \\ + "${TENSORBOARD_RESOURCE_ID}" \\ + --logdir="${METRICS_DIRECTORY_URI}" \\ + --experiment_name="${EXPERIMENT_NAME}" \\ + --one_shot=True + +if [ $? -ne 0 ]; then + exit 13 +fi + +set -e + +web_server_uri="tensorboard.googleusercontent.com" +tensorboard_resource_name_uri="projects+${PROJECT}+locations+${LOCATION}+tensorboards+${TENSORBOARD_ID}+experiments+${EXPERIMENT_NAME}" +echo -n "https://${LOCATION}.${web_server_uri}/experiment/${tensorboard_resource_name_uri}" > "${TENSORBOARD_URI}" +""" + + +@dsl.container_component +def upload_tensorboard_metrics( + tensorboard_resource_id: str, + experiment_name: str, + metrics_directory: dsl.Input[dsl.Artifact], # pytype: disable=unsupported-operands + tensorboard_uri: dsl.OutputPath(str), # pytype: disable=invalid-annotation +) -> dsl.ContainerSpec: + # fmt: off + # pylint: disable=g-doc-args + """Uploads tensorboard metrics. + + Args: + tensorboard_resource_id: TensorBoard resource ID in the form `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. + experiment_name: Name of this tensorboard experiment. Must be unique to a given `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. + metrics_directory_uri: Cloud storage location of the TensorBoard logs. + + Returns: + tensorboard_uri: URI of the uploaded tensorboard experiment. + """ + # pylint: enable=g-doc-args + # fmt: on + return dsl.ContainerSpec( + image='us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-11:latest', + command=[ + 'bash', + '-c', + _TB_UPLOADER_SHELL_SCRIPT, + ], + args=[ + tensorboard_resource_id, + metrics_directory.path, + experiment_name, + tensorboard_uri, + ], + ) From 067033762db315f83b84cfe1d6dc039c96a0e9f2 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 6 Oct 2023 15:55:36 -0700 Subject: [PATCH 192/253] feat(components): internal change PiperOrigin-RevId: 571455446 --- .../_implementation/llm/utils.py | 8 +++ .../_implementation/llm/utils_test.py | 62 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils_test.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py index ab6474d26d..2c5a6369bc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils.py @@ -27,6 +27,7 @@ def build_payload( args: List[str], accelerator_type: str = '', accelerator_count: int = 0, + encryption_spec_key_name: str = '', ) -> Dict[str, Any]: """Generates payload for a custom training job. @@ -41,6 +42,10 @@ def build_payload( requested. accelerator_count: Number of accelerators. By default no accelerators are requested. + encryption_spec_key_name: Customer-managed encryption key. If this is set, + then all resources created by the CustomJob will be encrypted with the + provided encryption key. Note that this is not supported for TPU at the + moment. Returns: Custom job payload. @@ -78,6 +83,9 @@ def build_payload( f'Received accelerator_type == {accelerator_type}.' ) + if encryption_spec_key_name: + payload['encryption_spec'] = {'kms_key_name': encryption_spec_key_name} + return payload diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils_test.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils_test.py new file mode 100644 index 0000000000..a16c878fb3 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/utils_test.py @@ -0,0 +1,62 @@ +"""Unit test for utils.""" + +from google_cloud_pipeline_components._implementation.llm import utils +import unittest + + +class UtilsTest(unittest.TestCase): + + def test_build_payload_basic_success(self): + machine_type = "n1-standard-1" + image_uri = "fake_image_uri" + args = ["--foo=bar"] + + expected_payload = { + "display_name": "test_with_encryption_spec_key_name", + "job_spec": { + "worker_pool_specs": [{ + "replica_count": "1", + "machine_spec": {"machine_type": machine_type}, + "container_spec": {"image_uri": image_uri, "args": args}, + }] + }, + } + + actual_payload = utils.build_payload( + display_name="test_with_encryption_spec_key_name", + machine_type=machine_type, + image_uri=image_uri, + args=args, + ) + self.assertDictEqual(expected_payload, actual_payload) + + def test_build_payload_with_encryption_spec_key_name(self): + machine_type = "n1-standard-1" + image_uri = "fake_image_uri" + args = ["--foo=bar"] + encryption_spec_key_name = "fake_cmek_key" + + expected_payload = { + "display_name": "test_with_encryption_spec_key_name", + "job_spec": { + "worker_pool_specs": [{ + "replica_count": "1", + "machine_spec": {"machine_type": machine_type}, + "container_spec": {"image_uri": image_uri, "args": args}, + }] + }, + "encryption_spec": {"kms_key_name": encryption_spec_key_name}, + } + + actual_payload = utils.build_payload( + display_name="test_with_encryption_spec_key_name", + machine_type=machine_type, + image_uri=image_uri, + args=args, + encryption_spec_key_name=encryption_spec_key_name, + ) + self.assertDictEqual(expected_payload, actual_payload) + + +if __name__ == "__main__": + unittest.main() From fcdff294a6323f6cb1c0e574fc7aa5ccc25e420b Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 9 Oct 2023 10:04:18 -0700 Subject: [PATCH 193/253] fix(sdk): fix incorrect sub-DAG output type when using `dsl.Collected` (#10069) --- sdk/RELEASE.md | 1 + sdk/python/kfp/compiler/compiler_test.py | 25 ++++++++++++++++++ .../kfp/compiler/pipeline_spec_builder.py | 2 +- .../pipelines/if_elif_else_complex.yaml | 2 +- .../conditional_producer_and_consumers.yaml | 12 ++++----- .../nested_with_parameters.yaml | 14 +++++----- .../parameters_complex.yaml | 26 +++++++++---------- .../parallelfor_fan_in/parameters_simple.yaml | 8 +++--- .../pipeline_producer_consumer.yaml | 16 ++++++------ 9 files changed, 66 insertions(+), 40 deletions(-) diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 3f171205cd..09b20e1b54 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -7,6 +7,7 @@ ## Deprecations ## Bug fixes and other changes +* Fix type on `dsl.ParallelFor` sub-DAG output when a `dsl.Collected` is used. Non-functional fix. [\#10069](https://github.com/kubeflow/pipelines/pull/10069) ## Documentation updates diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index a8b0f37215..9d2a7cb2de 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -2861,6 +2861,31 @@ def my_pipeline(x: Input[Artifact] = Artifact( class TestCrossTasksGroupFanInCollection(unittest.TestCase): + def test_correct_subdag_return_type(self): + from typing import List + + from kfp import dsl + + @dsl.component + def double(num: int) -> int: + return 2 * num + + @dsl.component + def add(nums: List[int]) -> int: + return sum(nums) + + @dsl.pipeline + def math_pipeline() -> int: + with dsl.ParallelFor([1, 2, 3]) as v: + t = double(num=v) + + return add(nums=dsl.Collected(t.output)).output + + self.assertEqual( + math_pipeline.pipeline_spec.components['comp-for-loop-2'] + .output_definitions.parameters['pipelinechannel--double-Output'] + .parameter_type, type_utils.LIST) + def test_missing_collected_with_correct_annotation(self): from typing import List diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index e6083d8ba9..1c0b7aa463 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -626,7 +626,7 @@ def build_component_spec_for_group( else: component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( - channel.channel_type) + output.channel_type) return component_spec diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml index 0726ea48e0..9f14ee8b69 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml @@ -439,7 +439,7 @@ components: outputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-for-loop-16: dag: tasks: diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml index bf110a3192..6cf59971e9 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/conditional_producer_and_consumers.yaml @@ -44,7 +44,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-condition-4: dag: outputs: @@ -74,7 +74,7 @@ components: outputDefinitions: parameters: pipelinechannel--add-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-double: executorLabel: exec-double inputDefinitions: @@ -117,7 +117,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST deploymentSpec: executors: exec-add: @@ -132,7 +132,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -160,7 +160,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -229,4 +229,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml index e8a4ff9021..ffefb1fac6 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/nested_with_parameters.yaml @@ -74,7 +74,7 @@ components: outputDefinitions: parameters: pipelinechannel--add-two-nums-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-for-loop-4: dag: outputs: @@ -135,7 +135,7 @@ components: outputDefinitions: parameters: pipelinechannel--add-two-nums-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST deploymentSpec: executors: exec-add: @@ -150,7 +150,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -179,7 +179,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -207,7 +207,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -235,7 +235,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -291,4 +291,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml index efade68e71..9e2c0288af 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_complex.yaml @@ -90,9 +90,9 @@ components: outputDefinitions: parameters: pipelinechannel--double-2-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST pipelinechannel--double-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-for-loop-4: dag: outputs: @@ -120,7 +120,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-2-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-for-loop-6: dag: outputs: @@ -167,9 +167,9 @@ components: outputDefinitions: parameters: pipelinechannel--nested-add-2-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST pipelinechannel--simple-add-2-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-nested-add: executorLabel: exec-nested-add inputDefinitions: @@ -224,7 +224,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -253,7 +253,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -281,7 +281,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -309,7 +309,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -338,7 +338,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -367,7 +367,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -395,7 +395,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -491,4 +491,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml index e537b147b8..c315ffee16 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/parameters_simple.yaml @@ -60,7 +60,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST deploymentSpec: executors: exec-add: @@ -75,7 +75,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -113,7 +113,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -184,4 +184,4 @@ root: Output: parameterType: LIST schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml index dc9c8b9ada..2c6e4e4612 100644 --- a/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml +++ b/sdk/python/test_data/pipelines/parallelfor_fan_in/pipeline_producer_consumer.yaml @@ -129,7 +129,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-pipeline-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-for-loop-2-2: dag: outputs: @@ -157,7 +157,7 @@ components: outputDefinitions: parameters: pipelinechannel--echo-and-return-Output: - parameterType: STRING + parameterType: LIST comp-for-loop-4: dag: outputs: @@ -185,7 +185,7 @@ components: outputDefinitions: parameters: pipelinechannel--double-pipeline-Output: - parameterType: NUMBER_INTEGER + parameterType: LIST comp-join-and-print: executorLabel: exec-join-and-print inputDefinitions: @@ -206,7 +206,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -235,7 +235,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -263,7 +263,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -292,7 +292,7 @@ deploymentSpec: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.3'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh @@ -364,4 +364,4 @@ root: Output: parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.1.3 +sdkVersion: kfp-2.3.0 From 55f174e5bb9d2c8e68ce11aa1746b2e68b17ff65 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 9 Oct 2023 13:50:18 -0700 Subject: [PATCH 194/253] chore(sdk): clean up compiler_test.py (#10070) --- sdk/python/kfp/compiler/compiler_test.py | 485 +++++++---------------- 1 file changed, 136 insertions(+), 349 deletions(-) diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 9d2a7cb2de..6cf0761461 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -62,18 +62,106 @@ - {outputPath: output_value} """) +### components used throughout tests ### + + +@dsl.component +def flip_coin() -> str: + import random + return 'heads' if random.randint(0, 1) == 0 else 'tails' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + +@dsl.component +def roll_three_sided_die() -> str: + import random + val = random.randint(0, 2) + + if val == 0: + return 'heads' + elif val == 1: + return 'tails' + else: + return 'draw' + + +@dsl.component +def int_zero_through_three() -> int: + import random + return random.randint(0, 3) + + +@dsl.component +def print_op(message: str): + print(message) + + +@dsl.component +def producer_op() -> str: + return 'a' + + +@dsl.component +def dummy_op(msg: str = ''): + pass + + +@dsl.component +def hello_world(text: str) -> str: + """Hello world component.""" + return text + + +@dsl.component +def add(nums: List[int]) -> int: + return sum(nums) + + +@dsl.component +def comp(): + pass + + +@dsl.component +def return_1() -> int: + return 1 + + +@dsl.component +def args_generator_op() -> List[Dict[str, str]]: + return [{'A_a': '1', 'B_b': '2'}, {'A_a': '10', 'B_b': '20'}] + + +@dsl.component +def my_comp(string: str, model: bool) -> str: + return string + + +@dsl.component +def print_hello(): + print('hello') + + +@dsl.component +def double(num: int) -> int: + return 2 * num + + +########### + class TestCompilePipeline(parameterized.TestCase): def test_can_use_dsl_attribute_on_kfp(self): - @kfp.dsl.component - def identity(string: str) -> str: - return string - @kfp.dsl.pipeline def my_pipeline(string: str = 'string'): - op1 = identity(string=string) + op1 = print_and_return(text=string) with tempfile.TemporaryDirectory() as tmpdir: compiler.Compiler().compile( @@ -492,14 +580,6 @@ def my_pipeline(): def test_invalid_data_dependency_loop(self): - @dsl.component - def producer_op() -> str: - return 'a' - - @dsl.component - def dummy_op(msg: str = ''): - pass - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.ParallelFor context unless the downstream is within that context too or the outputs are begin fanned-in to a list using dsl\.Collected\. Found task dummy-op which depends on upstream task producer-op within an uncommon dsl\.ParallelFor context\.' @@ -514,14 +594,6 @@ def my_pipeline(val: bool): def test_invalid_data_dependency_condition(self): - @dsl.component - def producer_op() -> str: - return 'a' - - @dsl.component - def dummy_op(msg: str = ''): - pass - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.Condition context unless the downstream is within that context too\. Found task dummy-op which depends on upstream task producer-op within an uncommon dsl\.Condition context\.' @@ -536,14 +608,6 @@ def my_pipeline(val: bool): def test_valid_data_dependency_condition(self): - @dsl.component - def producer_op() -> str: - return 'a' - - @dsl.component - def dummy_op(msg: str = ''): - pass - @dsl.pipeline(name='test-pipeline') def my_pipeline(val: bool): with dsl.Condition(val == False): @@ -557,14 +621,6 @@ def my_pipeline(val: bool): def test_invalid_data_dependency_exit_handler(self): - @dsl.component - def producer_op() -> str: - return 'a' - - @dsl.component - def dummy_op(msg: str = ''): - pass - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.ExitHandler context unless the downstream is within that context too\. Found task dummy-op which depends on upstream task producer-op-2 within an uncommon dsl\.ExitHandler context\.' @@ -580,14 +636,6 @@ def my_pipeline(val: bool): def test_valid_data_dependency_exit_handler(self): - @dsl.component - def producer_op() -> str: - return 'a' - - @dsl.component - def dummy_op(msg: str = ''): - pass - @dsl.pipeline(name='test-pipeline') def my_pipeline(val: bool): first_producer = producer_op() @@ -639,10 +687,6 @@ def my_pipeline(text: bool): def test_task_final_status_parameter_type_is_used(self): # previously compiled to STRUCT type, so checking that this is updated - @dsl.component - def identity(string: str) -> str: - return string - @dsl.component def exit_comp(status: dsl.PipelineTaskFinalStatus): print(status) @@ -651,7 +695,7 @@ def exit_comp(status: dsl.PipelineTaskFinalStatus): def my_pipeline(): exit_task = exit_comp() with dsl.ExitHandler(exit_task=exit_task): - identity(string='hi') + print_and_return(text='hi') self.assertEqual( my_pipeline.pipeline_spec.components['comp-exit-comp'] @@ -722,13 +766,9 @@ def my_pipeline(text: bool): def test_pipeline_in_pipeline(self): - @dsl.component - def print_op(msg: str): - print(msg) - @dsl.pipeline(name='graph-component') def graph_component(msg: str): - print_op(msg=msg) + print_op(message=msg) @dsl.pipeline(name='test-pipeline') def my_pipeline(): @@ -755,43 +795,31 @@ def test_pipeline_with_invalid_output(self): with self.assertRaisesRegex( ValueError, r'Pipeline or component output not defined: msg1'): - @dsl.component - def print_op(msg: str) -> str: - print(msg) - @dsl.pipeline def my_pipeline() -> NamedTuple('Outputs', [ ('msg', str), ]): - task = print_op(msg='Hello') + task = print_and_return(text='Hello') output = collections.namedtuple('Outputs', ['msg1']) return output(task.output) def test_pipeline_with_missing_output(self): with self.assertRaisesRegex(ValueError, 'Missing pipeline output: msg'): - @dsl.component - def print_op(msg: str) -> str: - print(msg) - @dsl.pipeline def my_pipeline() -> NamedTuple('Outputs', [ ('msg', str), ]): - task = print_op(msg='Hello') + task = print_and_return(text='Hello') with self.assertRaisesRegex(ValueError, 'Missing pipeline output: model'): - @dsl.component - def print_op(msg: str) -> str: - print(msg) - @dsl.pipeline def my_pipeline() -> NamedTuple('Outputs', [ ('model', dsl.Model), ]): - task = print_op(msg='Hello') + task = print_and_return(text='Hello') class V2NamespaceAliasTest(unittest.TestCase): @@ -830,11 +858,6 @@ def test_import_modules(self): from kfp.v2 import compiler from kfp.v2 import dsl - @dsl.component - def hello_world(text: str) -> str: - """Hello world component.""" - return text - @dsl.pipeline(name='hello-world', description='A simple intro pipeline') def pipeline_hello_world(text: str = 'hi there'): """Hello world pipeline.""" @@ -881,10 +904,6 @@ class TestWriteToFileTypes(parameterized.TestCase): def make_pipeline_spec(self): - @dsl.component - def dummy_op(): - pass - @dsl.pipeline(name=self.pipeline_name) def my_pipeline(): task = dummy_op() @@ -979,11 +998,6 @@ class TestCompileComponent(parameterized.TestCase): @parameterized.parameters(['.json', '.yaml', '.yml']) def test_compile_component_simple(self, extension: str): - @dsl.component - def hello_world(text: str) -> str: - """Hello world component.""" - return text - with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, f'component{extension}') compiler.Compiler().compile( @@ -1051,11 +1065,6 @@ def hello_world(text: str = 'default_string') -> str: def test_compile_component_with_pipeline_parameters_override(self): - @dsl.component - def hello_world(text: str) -> str: - """Hello world component.""" - return text - with tempfile.TemporaryDirectory() as tempdir: output_json = os.path.join(tempdir, 'component.yaml') compiler.Compiler().compile( @@ -1322,11 +1331,6 @@ class TestSetRetryCompilation(unittest.TestCase): def test_set_retry(self): - @dsl.component - def hello_world(text: str) -> str: - """Hello world component.""" - return text - @dsl.pipeline(name='hello-world', description='A simple intro pipeline') def pipeline_hello_world(text: str = 'hi there'): """Hello world pipeline.""" @@ -1358,10 +1362,6 @@ class TestMultipleExitHandlerCompilation(unittest.TestCase): def test_basic(self): - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): first_exit_task = print_op(message='First exit task.') @@ -1404,10 +1404,6 @@ def my_pipeline(): def test_nested_unsupported(self): - @dsl.component - def print_op(message: str): - print(message) - with self.assertRaisesRegex( ValueError, r'ExitHandler can only be used within the outermost scope of a pipeline function definition\.' @@ -1580,22 +1576,6 @@ def my_pipeline(): .runtime_value.constant.bool_value) -# helper component defintions for the ValidLegalTopologies tests -@dsl.component -def print_op(message: str): - print(message) - - -@dsl.component -def return_1() -> int: - return 1 - - -@dsl.component -def args_generator_op() -> List[Dict[str, str]]: - return [{'A_a': '1', 'B_b': '2'}, {'A_a': '10', 'B_b': '20'}] - - class TestValidLegalTopologies(unittest.TestCase): def test_inside_of_root_group_permitted(self): @@ -1817,16 +1797,12 @@ def test_inner_parallelfor_can_iter_over_upstream_output(self): def str_to_list(string: str) -> List: return [string] - @dsl.component - def identity(string: str) -> str: - return string - @dsl.pipeline def my_pipeline(): with dsl.ParallelFor(['a', 'b', 'c']) as itema: t1 = str_to_list(string=itema) with dsl.ParallelFor(t1.output) as itemb: - identity(string=itemb) + print_and_return(text=itemb) with tempfile.TemporaryDirectory() as tempdir: package_path = os.path.join(tempdir, 'pipeline.yaml') @@ -1839,10 +1815,6 @@ def test_permitted_nested_parallelfor_complex(self): def str_to_list(string: str) -> List: return [string] - @dsl.component - def identity(string: str) -> str: - return string - @dsl.pipeline def my_pipeline(): @@ -1851,18 +1823,18 @@ def my_pipeline(): t1 = str_to_list(string=itema) t2 = str_to_list(string=itema) - sequential_task1 = identity(string=itema) - identity(string=sequential_task1.output) + sequential_task1 = print_and_return(text=itema) + print_and_return(text=sequential_task1.output) # for-loop-3 with dsl.ParallelFor(t1.output) as itemb: t3 = str_to_list(string=itema) with dsl.ParallelFor(t3.output) as itemc: - identity(string=itemc) + print_and_return(text=itemc) with dsl.ParallelFor(t2.output) as itemd: - identity(string=itemd) + print_and_return(text=itemd) with dsl.ParallelFor(t2.output) as iteme: - identity(string=iteme) + print_and_return(text=iteme) with tempfile.TemporaryDirectory() as tempdir: package_path = os.path.join(tempdir, 'pipeline.yaml') @@ -1926,10 +1898,6 @@ def test_inner_task_prevented(self): r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.ExitHandler context unless the downstream is within that context too\. Found task print-op-4 which depends on upstream task print-op-2 within an uncommon dsl\.ExitHandler context\.' ): - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): first_exit_task = print_op(message='First exit task.') @@ -1954,10 +1922,6 @@ def test_exit_handler_task_prevented(self): r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.ExitHandler context unless the downstream is within that context too\. Found task print-op-4 which depends on upstream task print-op-2 within an uncommon dsl\.ExitHandler context\.' ): - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): first_exit_task = print_op(message='First exit task.') @@ -1978,10 +1942,6 @@ def my_pipeline(): def test_within_same_exit_handler_permitted(self): - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): first_exit_task = print_op(message='First exit task.') @@ -2008,14 +1968,6 @@ def test_outside_of_condition_blocked(self): r'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.Condition context unless the downstream is within that context too\. Found task print-op-3 which depends on upstream task print-op within an uncommon dsl\.Condition context\.' ): - @dsl.component - def print_op(message: str): - print(message) - - @dsl.component - def return_1() -> int: - return 1 - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): return_1_task = return_1() @@ -2032,14 +1984,6 @@ def my_pipeline(): def test_inside_of_condition_permitted(self): - @dsl.component - def print_op(message: str): - print(message) - - @dsl.component - def return_1() -> int: - return 1 - @dsl.pipeline(name='pipeline-with-multiple-exit-handlers') def my_pipeline(): return_1_task = return_1() @@ -2055,11 +1999,6 @@ def my_pipeline(): pipeline_func=my_pipeline, package_path=package_path) -@dsl.component -def identity(string: str, model: bool) -> str: - return string - - class TestYamlComments(unittest.TestCase): def test_comments_include_inputs_and_outputs_and_pipeline_name(self): @@ -2067,7 +2006,8 @@ def test_comments_include_inputs_and_outputs_and_pipeline_name(self): @dsl.pipeline() def my_pipeline(sample_input1: bool = True, sample_input2: str = 'string') -> str: - op1 = identity(string=sample_input2, model=sample_input1) + + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2102,7 +2042,7 @@ def test_no_description(self): @dsl.pipeline() def pipeline_with_no_description(sample_input1: bool = True, sample_input2: str = 'string') -> str: - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2141,7 +2081,7 @@ def test_description_from_docstring(self): def pipeline_with_description(sample_input1: bool = True, sample_input2: str = 'string') -> str: """This is a description of this pipeline.""" - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2180,7 +2120,7 @@ def test_description_from_decorator(self): def pipeline_with_description(sample_input1: bool = True, sample_input2: str = 'string') -> str: """Don't prefer this description.""" - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2218,7 +2158,7 @@ def test_comments_on_pipeline_with_no_inputs_or_outputs(self): @dsl.pipeline() def pipeline_with_no_inputs() -> str: - op1 = identity(string='string', model=True) + op1 = my_comp(string='string', model=True) result = op1.output return result @@ -2238,7 +2178,7 @@ def pipeline_with_no_inputs() -> str: @dsl.pipeline() def pipeline_with_no_outputs(sample_input1: bool = True, sample_input2: str = 'string'): - identity(string=sample_input2, model=sample_input1) + my_comp(string=sample_input2, model=sample_input1) with tempfile.TemporaryDirectory() as tmpdir: pipeline_spec_path = os.path.join(tmpdir, 'output.yaml') @@ -2259,7 +2199,7 @@ def test_comments_follow_pattern(self): def my_pipeline(sample_input1: bool = True, sample_input2: str = 'string') -> str: """This is a definition of this pipeline.""" - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2391,7 +2331,7 @@ def test_comments_idempotency(self): def my_pipeline(sample_input1: bool = True, sample_input2: str = 'string') -> str: """My description.""" - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2433,7 +2373,7 @@ def pipeline_with_multiline_definition( """docstring short description. docstring long description. docstring long description. """ - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2462,7 +2402,7 @@ def pipeline_with_multiline_definition( docstring long description. docstring long description. """ - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2492,7 +2432,7 @@ def my_pipeline(sample_input1: bool = True, docstring long description. docstring long description. """ - op1 = identity(string=sample_input2, model=sample_input1) + op1 = my_comp(string=sample_input2, model=sample_input1) result = op1.output return result @@ -2773,13 +2713,9 @@ def comp(x: Optional[Input[Artifact]] = None): def test_pipeline(self): - @dsl.component - def comp(): - print('hello') - @dsl.pipeline def my_pipeline(x: Optional[Input[Artifact]] = None): - comp() + print_hello() artifact_spec_from_root = my_pipeline.pipeline_spec.root.input_definitions.artifacts[ 'x'] @@ -2787,13 +2723,9 @@ def my_pipeline(x: Optional[Input[Artifact]] = None): def test_pipeline_without_optional_type_modifier(self): - @dsl.component - def comp(): - print('hello') - @dsl.pipeline def my_pipeline(x: Input[Artifact] = None): - comp() + print_hello() artifact_spec_from_root = my_pipeline.pipeline_spec.root.input_definitions.artifacts[ 'x'] @@ -2862,13 +2794,6 @@ def my_pipeline(x: Input[Artifact] = Artifact( class TestCrossTasksGroupFanInCollection(unittest.TestCase): def test_correct_subdag_return_type(self): - from typing import List - - from kfp import dsl - - @dsl.component - def double(num: int) -> int: - return 2 * num @dsl.component def add(nums: List[int]) -> int: @@ -2887,17 +2812,6 @@ def math_pipeline() -> int: .parameter_type, type_utils.LIST) def test_missing_collected_with_correct_annotation(self): - from typing import List - - from kfp import dsl - - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) with self.assertRaisesRegex( type_utils.InconsistentTypeException, @@ -2913,10 +2827,6 @@ def math_pipeline() -> int: def test_missing_collected_with_incorrect_annotation(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - @dsl.component def add(nums: int) -> int: return nums @@ -2934,35 +2844,8 @@ def math_pipeline() -> int: return add(nums=t.output).output - def test_producer_condition_legal1(self): - from kfp import dsl - - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - - @dsl.pipeline - def math_pipeline(text: str) -> int: - with dsl.Condition(text == 'text'): - with dsl.ParallelFor([1, 2, 3]) as v: - t = double(num=v) - - return add(nums=dsl.Collected(t.output)).output - def test_producer_condition_legal2(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - @dsl.pipeline def my_pipeline(a: str): with dsl.ParallelFor([1, 2, 3]) as v: @@ -2974,14 +2857,6 @@ def my_pipeline(a: str): def test_producer_condition_illegal1(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. When using dsl\.Collected to fan-in outputs from a task within a dsl\.ParallelFor context, the dsl\.ParallelFor context manager cannot be nested within a dsl.Condition context manager unless the consumer task is too\. Task add consumes from double within a dsl\.Condition context\.' @@ -2998,14 +2873,6 @@ def my_pipeline(a: str = '', b: str = ''): def test_producer_condition_illegal2(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. When using dsl\.Collected to fan-in outputs from a task within a dsl\.ParallelFor context, the dsl\.ParallelFor context manager cannot be nested within a dsl\.Condition context manager unless the consumer task is too\. Task add consumes from double within a dsl\.Condition context\.' @@ -3020,18 +2887,10 @@ def my_pipeline(a: str = ''): def test_producer_exit_handler_illegal1(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - @dsl.component def exit_comp(): print('Running exit task!') - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'Illegal task dependency across DSL context managers\. When using dsl\.Collected to fan-in outputs from a task within a dsl\.ParallelFor context, the dsl\.ParallelFor context manager cannot be nested within a dsl\.ExitHandler context manager unless the consumer task is too\. Task add consumes from double within a dsl\.ExitHandler context\.' @@ -3084,14 +2943,6 @@ def my_pipeline(): def test_producer_and_consumer_in_same_context(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'dsl\.Collected can only be used to fan-in outputs produced by a task within a dsl\.ParallelFor context to a task outside of the dsl\.ParallelFor context\. Producer task double is either not in a dsl\.ParallelFor context or is only in a dsl\.ParallelFor that also contains consumer task add\.' @@ -3105,14 +2956,6 @@ def math_pipeline(): def test_no_parallelfor_context(self): - @dsl.component - def double(num: int) -> int: - return 2 * num - - @dsl.component - def add(nums: List[int]) -> int: - return sum(nums) - with self.assertRaisesRegex( compiler_utils.InvalidTopologyException, r'dsl\.Collected can only be used to fan-in outputs produced by a task within a dsl\.ParallelFor context to a task outside of the dsl\.ParallelFor context\. Producer task double is either not in a dsl\.ParallelFor context or is only in a dsl\.ParallelFor that also contains consumer task add\.' @@ -3352,13 +3195,9 @@ def fail_op(message: str = 'message') -> str: sys.exit(1) return message - @dsl.component - def identity(message: str = 'message') -> str: - return message - @dsl.pipeline def wrapped_pipeline(message: str = 'message') -> str: - task = identity(message=message) + task = print_and_return(text=message) return task.output @dsl.pipeline @@ -3384,10 +3223,6 @@ def fail_op(message: str) -> str: sys.exit(1) return message - @dsl.component - def print_op(message: str): - print(message) - with self.assertRaisesRegex( ValueError, r'Tasks can only use .ignore_upstream_failure()'): @@ -3407,10 +3242,6 @@ def fail_op(message: str) -> str: sys.exit(1) return message - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline() def my_pipeline(sample_input1: str = 'message'): task = fail_op(message=sample_input1) @@ -3433,10 +3264,6 @@ def fail_op(message: str) -> str: sys.exit(1) return message - @dsl.component - def print_op(message: str): - print(message) - @dsl.pipeline() def my_pipeline(sample_input1: str = 'message'): task = fail_op(message=sample_input1) @@ -3585,11 +3412,6 @@ def baz_platform_set_bat_feature(task: pipeline_task.PipelineTask, return task -@dsl.component -def comp(): - pass - - def compile_and_reload( pipeline: graph_component.GraphComponent ) -> yaml_component.YamlComponent: @@ -3604,49 +3426,45 @@ class TestResourceConfig(unittest.TestCase): def test_cpu_memory_optional(self): - @dsl.component - def predict_op() -> str: - return 'a' - @dsl.pipeline def simple_pipeline(): - predict_op() - predict_op().set_cpu_limit('5') - predict_op().set_memory_limit('50G') - predict_op().set_cpu_request('2').set_cpu_limit( + return_1() + return_1().set_cpu_limit('5') + return_1().set_memory_limit('50G') + return_1().set_cpu_request('2').set_cpu_limit( '5').set_memory_request('4G').set_memory_limit('50G') dict_format = json_format.MessageToDict(simple_pipeline.pipeline_spec) self.assertNotIn( 'resources', dict_format['deploymentSpec']['executors'] - ['exec-predict-op']['container']) + ['exec-return-1']['container']) self.assertEqual( - 5, dict_format['deploymentSpec']['executors']['exec-predict-op-2'] + 5, dict_format['deploymentSpec']['executors']['exec-return-1-2'] ['container']['resources']['cpuLimit']) self.assertNotIn( 'memoryLimit', dict_format['deploymentSpec']['executors'] - ['exec-predict-op-2']['container']['resources']) + ['exec-return-1-2']['container']['resources']) self.assertEqual( - 50, dict_format['deploymentSpec']['executors']['exec-predict-op-3'] + 50, dict_format['deploymentSpec']['executors']['exec-return-1-3'] ['container']['resources']['memoryLimit']) self.assertNotIn( 'cpuLimit', dict_format['deploymentSpec']['executors'] - ['exec-predict-op-3']['container']['resources']) + ['exec-return-1-3']['container']['resources']) self.assertEqual( - 2, dict_format['deploymentSpec']['executors']['exec-predict-op-4'] + 2, dict_format['deploymentSpec']['executors']['exec-return-1-4'] ['container']['resources']['cpuRequest']) self.assertEqual( - 5, dict_format['deploymentSpec']['executors']['exec-predict-op-4'] + 5, dict_format['deploymentSpec']['executors']['exec-return-1-4'] ['container']['resources']['cpuLimit']) self.assertEqual( - 4, dict_format['deploymentSpec']['executors']['exec-predict-op-4'] + 4, dict_format['deploymentSpec']['executors']['exec-return-1-4'] ['container']['resources']['memoryRequest']) self.assertEqual( - 50, dict_format['deploymentSpec']['executors']['exec-predict-op-4'] + 50, dict_format['deploymentSpec']['executors']['exec-return-1-4'] ['container']['resources']['memoryLimit']) @@ -4322,37 +4140,6 @@ def my_pipeline( 'Component output artifact.') -@dsl.component -def flip_coin() -> str: - import random - return 'heads' if random.randint(0, 1) == 0 else 'tails' - - -@dsl.component -def print_and_return(text: str) -> str: - print(text) - return text - - -@dsl.component -def flip_three_sided_coin() -> str: - import random - val = random.randint(0, 2) - - if val == 0: - return 'heads' - elif val == 1: - return 'tails' - else: - return 'draw' - - -@dsl.component -def int_zero_through_three() -> int: - import random - return random.randint(0, 3) - - class TestConditionLogic(unittest.TestCase): def test_if(self): @@ -4397,7 +4184,7 @@ def test_if_elif_else(self): @dsl.pipeline def flip_coin_pipeline(): - flip_coin_task = flip_three_sided_coin() + flip_coin_task = roll_three_sided_die() with dsl.If(flip_coin_task.output == 'heads'): print_and_return(text='Got heads!') with dsl.Elif(flip_coin_task.output == 'tails'): @@ -4409,20 +4196,20 @@ def flip_coin_pipeline(): flip_coin_pipeline.pipeline_spec .components['comp-condition-branches-1'].dag.tasks['condition-2'] .trigger_policy.condition, - "inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads'" + "inputs.parameter_values['pipelinechannel--roll-three-sided-die-Output'] == 'heads'" ) self.assertEqual( flip_coin_pipeline.pipeline_spec .components['comp-condition-branches-1'].dag.tasks['condition-3'] .trigger_policy.condition, - "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails'" + "!(inputs.parameter_values['pipelinechannel--roll-three-sided-die-Output'] == 'heads') && inputs.parameter_values['pipelinechannel--roll-three-sided-die-Output'] == 'tails'" ) self.assertEqual( flip_coin_pipeline.pipeline_spec .components['comp-condition-branches-1'].dag.tasks['condition-4'] .trigger_policy.condition, - "!(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--flip-three-sided-coin-Output'] == 'tails')" + "!(inputs.parameter_values['pipelinechannel--roll-three-sided-die-Output'] == 'heads') && !(inputs.parameter_values['pipelinechannel--roll-three-sided-die-Output'] == 'tails')" ) def test_if_multiple_elif_else(self): From 2bb57ece351757ab5aefa57c74fda397425abd00 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 9 Oct 2023 14:25:45 -0700 Subject: [PATCH 195/253] feat(components): Upload tensorboard metrics from RLHF pipeline if a tensorboard resource id is provided PiperOrigin-RevId: 572040791 --- components/google-cloud/RELEASE.md | 1 + .../preview/llm/rlhf/component.py | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 07250f9356..c5d3d1e68c 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,4 +1,5 @@ ## Upcoming release +* Upload tensorboard metrics from RLHF pipelines if a tensorboard resource id is provided at runtime. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index 33f8b48504..b421cc2c8a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -25,6 +25,7 @@ from google_cloud_pipeline_components._implementation.llm import reinforcer from google_cloud_pipeline_components._implementation.llm import reward_model_trainer from google_cloud_pipeline_components._implementation.llm import upload_llm_model +from google_cloud_pipeline_components._implementation.llm import upload_tensorboard_metrics from google_cloud_pipeline_components.preview.llm.infer import component import kfp @@ -54,6 +55,7 @@ def rlhf_pipeline( eval_dataset: Optional[str] = None, project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, + tensorboard_resource_id: Optional[str] = None, ) -> PipelineOutput: # fmt: off """Performs reinforcement learning from human feedback. @@ -75,6 +77,7 @@ def rlhf_pipeline( eval_dataset: Optional Cloud storage path to an evaluation dataset. If provided, inference will be performed on this dataset after training. The dataset format is jsonl. Each example in the dataset must contain a field `input_text` that contains the prompt. project: Project used to run custom jobs. If not specified the project used to run the pipeline will be used. location: Location used to run custom jobs. If not specified the location used to run the pipeline will be used. + tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location. Returns: model_resource_name: Path to the model uploaded to the Model Registry. This will be an empty string if the model was not deployed. @@ -176,6 +179,23 @@ def rlhf_pipeline( .set_caching_options(False) ) + has_tensorboard_id = function_based.value_exists( + value=tensorboard_resource_id + ) + with kfp.dsl.Condition( # pytype: disable=wrong-arg-types + has_tensorboard_id.output == True, # pylint: disable=singleton-comparison, g-explicit-bool-comparison + name='Upload Reward Model Tensorboard Metrics', + ): + _ = upload_tensorboard_metrics.upload_tensorboard_metrics( + tensorboard_resource_id=tensorboard_resource_id, + metrics_directory=reward_model.outputs['tensorboard_metrics'], + experiment_name=( + 'reward-model-tuner-' + f'{kfp.dsl.PIPELINE_JOB_ID_PLACEHOLDER}-' + f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + ) + rl_image_uri = function_based.resolve_private_image_uri( image_name='reinforcer', accelerator_type=machine_spec.outputs['accelerator_type'], @@ -214,6 +234,20 @@ def rlhf_pipeline( .set_caching_options(False) ) + with kfp.dsl.Condition( # pytype: disable=wrong-arg-types + has_tensorboard_id.output == True, # pylint: disable=singleton-comparison, g-explicit-bool-comparison + name='Upload Reinforcement Learning Tensorboard Metrics', + ): + _ = upload_tensorboard_metrics.upload_tensorboard_metrics( + tensorboard_resource_id=tensorboard_resource_id, + metrics_directory=rl_model.outputs['tensorboard_metrics'], + experiment_name=( + 'rl-model-tuner-' + f'{kfp.dsl.PIPELINE_JOB_ID_PLACEHOLDER}-' + f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' + ), + ) + should_perform_inference = function_based.value_exists(value=eval_dataset) with kfp.dsl.Condition( should_perform_inference.output == True, name='Perform Inference' # pylint: disable=singleton-comparison From 15c24e344f4c624d151116e548c470be935b8e70 Mon Sep 17 00:00:00 2001 From: Changyu Zhu Date: Mon, 9 Oct 2023 16:38:43 -0700 Subject: [PATCH 196/253] feat(components): Switch v1 AutoMLImageTrainingJob to use the pipeline remote runner PiperOrigin-RevId: 572073436 --- components/google-cloud/RELEASE.md | 1 + .../v1/automl_training_job/image/launcher.py | 189 +++++++++++++++++- .../image/remote_runner.py | 138 ++++++++++++- .../v1/gcp_launcher/utils/parser_util.py | 4 + .../automl_image_training_job/component.py | 120 +++++++---- 5 files changed, 408 insertions(+), 44 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index c5d3d1e68c..8a60889f94 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,6 @@ ## Upcoming release * Upload tensorboard metrics from RLHF pipelines if a tensorboard resource id is provided at runtime. +* Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py index 25ad5d2484..1662994efa 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/launcher.py @@ -13,20 +13,197 @@ # limitations under the License. """GCP launcher for AutoML image training jobs based on the AI Platform SDK.""" +import argparse +import json import logging import sys +from typing import List from google_cloud_pipeline_components.container.v1.automl_training_job.image import remote_runner from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import parser_util -def _parse_args(args): +def _parse_args(args: List[str]): """Parse command line arguments.""" - _, parsed_args = parser_util.parse_default_args(args) - return vars(parsed_args) + args.append('--payload') + args.append('"{}"') # Unused but required by parser_util. + parser, _ = parser_util.parse_default_args(args) + # Parse the conditionally required arguments + parser.add_argument( + '--display_name', + dest='display_name', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--prediction_type', + dest='prediction_type', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--multi_label', + dest='multi_label', + type=parser_util.parse_bool, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_type', + dest='model_type', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--labels', + dest='labels', + type=json.loads, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--dataset', + dest='dataset', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--disable_early_stopping', + dest='disable_early_stopping', + type=parser_util.parse_bool, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--training_encryption_spec_key_name', + dest='training_encryption_spec_key_name', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_encryption_spec_key_name', + dest='model_encryption_spec_key_name', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_display_name', + dest='model_display_name', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--training_fraction_split', + dest='training_fraction_split', + type=float, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--validation_fraction_split', + dest='validation_fraction_split', + type=float, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--test_fraction_split', + dest='test_fraction_split', + type=float, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--budget_milli_node_hours', + dest='budget_milli_node_hours', + type=int, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--training_filter_split', + dest='training_filter_split', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--validation_filter_split', + dest='validation_filter_split', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--test_filter_split', + dest='test_filter_split', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--base_model', + dest='base_model', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--incremental_train_base_model', + dest='incremental_train_base_model', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--parent_model', + dest='parent_model', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--is_default_version', + dest='is_default_version', + type=parser_util.parse_bool, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_version_aliases', + dest='model_version_aliases', + type=json.loads, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_version_description', + dest='model_version_description', + type=str, + required=False, + default=argparse.SUPPRESS, + ) + parser.add_argument( + '--model_labels', + dest='model_labels', + type=json.loads, + required=False, + default=argparse.SUPPRESS, + ) + parsed_args, _ = parser.parse_known_args(args) + args_dict = vars(parsed_args) + del args_dict['payload'] + return args_dict -def main(argv): +def main(argv: List[str]): """Main entry. Expected input args are as follows: @@ -34,9 +211,9 @@ def main(argv): Region - Required. The region of which the resource will be launched. Type - Required. GCP launcher is a single container. This Enum will specify which resource to be launched. - Request payload - Required. The full serialized json of the resource spec. - Note this can contain the Pipeline Placeholders. gcp_resources - placeholder output for returning job_id. + Extra arguments - For constructing request payload. See remote_runner.py for + more information. Args: argv: A list of system arguments. diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py index e0d46c825b..a48616cc23 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/automl_training_job/image/remote_runner.py @@ -14,15 +14,143 @@ """GCP remote runner for AutoML image training pipelines based on the AI Platform SDK.""" import logging -from typing import Any +from typing import Any, Dict, Optional, Sequence from google.api_core import retry +from google.cloud.aiplatform import datasets from google.cloud.aiplatform import gapic +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform import models +from google.cloud.aiplatform import schema +from google.cloud.aiplatform import training_jobs +from google.cloud.aiplatform_v1.types import model +from google.cloud.aiplatform_v1.types import training_pipeline from google_cloud_pipeline_components.container.v1.gcp_launcher import pipeline_remote_runner from google_cloud_pipeline_components.container.v1.gcp_launcher.utils import error_util +from google.protobuf import struct_pb2 +from google.protobuf import json_format + _GET_PIPELINE_RETRY_DEADLINE_SECONDS = 10.0 * 60.0 +_CLASSIFICATION = 'classification' +_OBJECT_DETECTION = 'object_detection' + + +# pylint: disable=protected-access +def create_payload( + project: str, + location: str, + display_name: Optional[str] = None, + prediction_type: str = _CLASSIFICATION, + multi_label: bool = False, + model_type: str = 'CLOUD', + labels: Optional[Dict[str, str]] = None, + dataset: Optional[str] = None, + disable_early_stopping: bool = False, + training_encryption_spec_key_name: Optional[str] = None, + model_encryption_spec_key_name: Optional[str] = None, + model_display_name: Optional[str] = None, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + budget_milli_node_hours: Optional[int] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, + base_model: Optional[str] = None, + incremental_train_base_model: Optional[str] = None, + parent_model: Optional[str] = None, + is_default_version: Optional[bool] = True, + model_version_aliases: Optional[Sequence[str]] = None, + model_version_description: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, +) -> str: + """Creates a AutoML Image Training Job payload.""" + # Override default model_type for object_detection + if model_type == 'CLOUD' and prediction_type == _OBJECT_DETECTION: + model_type = 'CLOUD_HIGH_ACCURACY_1' + + training_encryption_spec = initializer.global_config.get_encryption_spec( + encryption_spec_key_name=training_encryption_spec_key_name + ) + model_encryption_spec = initializer.global_config.get_encryption_spec( + encryption_spec_key_name=model_encryption_spec_key_name + ) + + # Training task inputs. + training_task_inputs = { + # required inputs + 'modelType': model_type, + 'budgetMilliNodeHours': budget_milli_node_hours, + # optional inputs + 'disableEarlyStopping': disable_early_stopping, + } + if prediction_type == _CLASSIFICATION: + training_task_inputs['multiLabel'] = multi_label + if incremental_train_base_model: + training_task_inputs['uptrainBaseModelId'] = incremental_train_base_model + + training_task_definition = getattr( + schema.training_job.definition, f'automl_image_{prediction_type}' + ) + + # Input data config. + input_data_config = training_jobs._TrainingJob._create_input_data_config( + dataset=dataset and datasets.ImageDataset(dataset_name=dataset), + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, + ) + + # Model to upload. + model_to_upload = model.Model( + display_name=model_display_name or display_name, + labels=model_labels or labels, + encryption_spec=model_encryption_spec, + version_aliases=models.ModelRegistry._get_true_alias_list( + model_version_aliases, is_default_version + ), + version_description=model_version_description, + ) + + # Sets base_model. + if base_model: + training_task_inputs['baseModelId'] = base_model + + # Create training task inputs. + training_task_inputs_struct = struct_pb2.Struct() + training_task_inputs_struct.update(training_task_inputs) + + # Gets parent_model. + parent_model = models.ModelRegistry._get_true_version_parent( + parent_model=parent_model, + project=project, + location=location, + ) + + pipeline = training_pipeline.TrainingPipeline( + display_name=display_name, + training_task_definition=training_task_definition, + training_task_inputs=struct_pb2.Value( + struct_value=training_task_inputs_struct + ), + model_to_upload=model_to_upload, + parent_model=parent_model, + input_data_config=input_data_config, + labels=labels, + encryption_spec=training_encryption_spec, + ) + + return json_format.MessageToJson( + pipeline._pb, preserving_proto_field_name=True + ) + + +# pylint: enable=protected-access def create_pipeline_with_client( @@ -39,7 +167,8 @@ def create_pipeline_with_client( pipeline_spec, ) created_pipeline = pipeline_client.create_training_pipeline( - parent=parent, training_pipeline=pipeline_spec + parent=parent, + training_pipeline=training_pipeline.TrainingPipeline(**pipeline_spec), ) except (ConnectionError, RuntimeError) as err: error_util.exit_with_internal_error(err.args[0]) @@ -65,8 +194,8 @@ def create_pipeline( type: str, # pylint: disable=redefined-builtin project: str, location: str, - payload: str, gcp_resources: str, + **kwargs: Dict[str, Any], ): """Create and poll AutoML Vision training pipeline status till it reaches a final state. @@ -92,8 +221,8 @@ def create_pipeline( type: Job type. project: Project name. location: Location to start the training job. - payload: Serialized JSON payload. gcp_resources: URI for storing GCP resources. + **kwargs: Extra args for creating the payload. """ remote_runner = pipeline_remote_runner.PipelineRemoteRunner( type, project, location, gcp_resources @@ -103,6 +232,7 @@ def create_pipeline( # Create AutoML vision training pipeline if it does not exist pipeline_name = remote_runner.check_if_pipeline_exists() if pipeline_name is None: + payload = create_payload(project, location, **kwargs) logging.info( 'AutoML Vision training payload formatted: %s', payload, diff --git a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/parser_util.py b/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/parser_util.py index a1f2e0f48a..a2f4bfe434 100644 --- a/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/parser_util.py +++ b/components/google-cloud/google_cloud_pipeline_components/container/v1/gcp_launcher/utils/parser_util.py @@ -61,3 +61,7 @@ def parse_default_args(args): parsed_args, _ = parser.parse_known_args(args) return (parser, parsed_args) + + +def parse_bool(value: str) -> bool: + return value is not None and value.lower() != 'false' diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py index 68a5d62700..7f670e19ef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/automl_image_training_job/component.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Optional +from typing import Dict, List, Optional from google_cloud_pipeline_components import _image from google_cloud_pipeline_components.types.artifact_types import VertexDataset @@ -21,6 +21,7 @@ from kfp import dsl from kfp.dsl import Input from kfp.dsl import Output +from kfp.dsl import OutputPath @dsl.container_component @@ -29,12 +30,18 @@ def automl_image_training_job( display_name: str, dataset: Input[VertexDataset], model: Output[VertexModel], + gcp_resources: OutputPath(str), location: Optional[str] = 'us-central1', prediction_type: Optional[str] = 'classification', multi_label: Optional[bool] = False, model_type: Optional[str] = 'CLOUD', base_model: Optional[Input[VertexModel]] = None, - labels: Optional[dict] = {}, + incremental_train_base_model: Optional[Input[VertexModel]] = None, + parent_model: Optional[Input[VertexModel]] = None, + is_default_version: Optional[bool] = True, + model_version_aliases: Optional[List[str]] = None, + model_version_description: Optional[str] = None, + labels: Optional[Dict[str, str]] = {}, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, training_fraction_split: Optional[float] = None, @@ -45,7 +52,7 @@ def automl_image_training_job( test_filter_split: Optional[str] = None, budget_milli_node_hours: Optional[int] = None, model_display_name: Optional[str] = None, - model_labels: Optional[dict] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: Optional[bool] = False, ): # fmt: off @@ -64,12 +71,17 @@ def automl_image_training_job( budget_milli_node_hours: The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. Defaults by `prediction_type`: `classification` - For Cloud models the budget must be: 8,000 - 800,000 milli node hours (inclusive). The default value is 192,000 which represents one day in wall time, assuming 8 nodes are used. `object_detection` - For Cloud models the budget must be: 20,000 - 900,000 milli node hours (inclusive). The default value is 216,000 which represents one day in wall time, assuming 9 nodes are used. The training cost of the model will not exceed this budget. The final cost will be attempted to be close to the budget, though may end up being (even) noticeably smaller - at the backend's discretion. This especially may happen when further model training ceases to provide any improvements. If the budget is set to a value known to be insufficient to train a Model for the given training set, the training won't be attempted and will error. model_display_name: The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. model_labels: The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. - disable_early_stopping: bool = False If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. + disable_early_stopping: If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. display_name: The user-defined name of this TrainingPipeline. prediction_type: The type of prediction the Model is to produce, one of: "classification" - Predict one out of multiple target values is picked for each row. "object_detection" - Predict a value based on its relation to other values. This type is available only to columns that contain semantically numeric values, i.e. integers or floating point number, even if stored as e.g. strings. - multi_label: bool = False Default is False. If false, a single-label (multi-class) Model will be trained (i.e. assuming that for each image just up to one annotation may be applicable). If true, a multi-label Model will be trained (i.e. assuming that for each image multiple annotations may be applicable). This is only applicable for the "classification" prediction_type and will be ignored otherwise. - model_type: str = "CLOUD" One of the following: "CLOUD" - Default for Image Classification. A Model best tailored to be used within Google Cloud, and which cannot be exported. "CLOUD_HIGH_ACCURACY_1" - Default for Image Object Detection. A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a higher latency, but should also have a higher prediction quality than other cloud models. "CLOUD_LOW_LATENCY_1" - A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a low latency, but may have lower prediction quality than other cloud models. "MOBILE_TF_LOW_LATENCY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have low latency, but may have lower prediction quality than other mobile models. "MOBILE_TF_VERSATILE_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device with afterwards. "MOBILE_TF_HIGH_ACCURACY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have a higher latency, but should also have a higher prediction quality than other mobile models. - base_model: Optional[models.Model] = None Only permitted for Image Classification models. If it is specified, the new model will be trained based on the `base` model. Otherwise, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same model_type. + multi_label: Default is False. If false, a single-label (multi-class) Model will be trained (i.e. assuming that for each image just up to one annotation may be applicable). If true, a multi-label Model will be trained (i.e. assuming that for each image multiple annotations may be applicable). This is only applicable for the "classification" prediction_type and will be ignored otherwise. + model_type: One of the following: "CLOUD" - Default for Image Classification. A Model best tailored to be used within Google Cloud, and which cannot be exported. "CLOUD_HIGH_ACCURACY_1" - Default for Image Object Detection. A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a higher latency, but should also have a higher prediction quality than other cloud models. "CLOUD_LOW_LATENCY_1" - A model best tailored to be used within Google Cloud, and which cannot be exported. Expected to have a low latency, but may have lower prediction quality than other cloud models. "MOBILE_TF_LOW_LATENCY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have low latency, but may have lower prediction quality than other mobile models. "MOBILE_TF_VERSATILE_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device with afterwards. "MOBILE_TF_HIGH_ACCURACY_1" - A model that, in addition to being available within Google Cloud, can also be exported as TensorFlow or Core ML model and used on a mobile or edge device afterwards. Expected to have a higher latency, but should also have a higher prediction quality than other mobile models. + base_model: Only permitted for Image Classification models. If it is specified, the new model will be trained based on the `base` model. Otherwise, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same model_type. + incremental_train_base_model: Optional for both Image Classification and Object detection models, to incrementally train a new model using an existing model as the starting point, with a reduced training time. If not specified, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same prediction_type and model_type. + parent_model: The resource name or model ID of an existing model. The new model uploaded by this job will be a version of `parent_model`. Only set this field when training a new version of an existing model. + is_default_version: When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of the model produced by this job without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the model version produced by this job will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. + model_version_aliases: User provided version aliases so that the model version uploaded by this job can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] + model_version_description: The description of the model version being uploaded by this job. project: Project to retrieve dataset from. location: Optional location to retrieve dataset from. labels: The labels with user-defined metadata to organize TrainingPipelines. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. @@ -78,6 +90,7 @@ def automl_image_training_job( Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. + gcp_resources: Serialized gcp_resources proto tracking the batch prediction job. For more details, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md. """ # fmt: on @@ -86,100 +99,139 @@ def automl_image_training_job( command=[ 'python3', '-m', - 'google_cloud_pipeline_components.container.v1.aiplatform.remote_runner', - '--cls_name', - 'AutoMLImageTrainingJob', - '--method_name', - 'run', + 'google_cloud_pipeline_components.container.v1.automl_training_job.image.launcher', ], args=[ - '--init.project', + '--type', + 'AutoMLImageTrainingJob', + '--project', project, - '--init.location', + '--location', location, - '--init.display_name', + '--display_name', display_name, - '--init.prediction_type', + '--prediction_type', prediction_type, - '--init.multi_label', + '--multi_label', multi_label, - '--init.model_type', + '--model_type', model_type, - '--init.labels', + '--labels', labels, - '--method.dataset', + '--dataset', dataset.metadata['resourceName'], - '--method.disable_early_stopping', + '--disable_early_stopping', disable_early_stopping, dsl.IfPresentPlaceholder( input_name='training_encryption_spec_key_name', then=[ - '--init.training_encryption_spec_key_name', + '--training_encryption_spec_key_name', training_encryption_spec_key_name, ], ), dsl.IfPresentPlaceholder( input_name='model_encryption_spec_key_name', then=[ - '--init.model_encryption_spec_key_name', + '--model_encryption_spec_key_name', model_encryption_spec_key_name, ], ), dsl.IfPresentPlaceholder( input_name='model_display_name', - then=['--method.model_display_name', model_display_name], + then=['--model_display_name', model_display_name], ), dsl.IfPresentPlaceholder( input_name='training_fraction_split', then=[ - '--method.training_fraction_split', + '--training_fraction_split', training_fraction_split, ], ), dsl.IfPresentPlaceholder( input_name='validation_fraction_split', then=[ - '--method.validation_fraction_split', + '--validation_fraction_split', validation_fraction_split, ], ), dsl.IfPresentPlaceholder( input_name='test_fraction_split', - then=['--method.test_fraction_split', test_fraction_split], + then=['--test_fraction_split', test_fraction_split], ), dsl.IfPresentPlaceholder( input_name='budget_milli_node_hours', then=[ - '--method.budget_milli_node_hours', + '--budget_milli_node_hours', budget_milli_node_hours, ], ), dsl.IfPresentPlaceholder( input_name='training_filter_split', - then=['--method.training_filter_split', training_filter_split], + then=['--training_filter_split', training_filter_split], ), dsl.IfPresentPlaceholder( input_name='validation_filter_split', then=[ - '--method.validation_filter_split', + '--validation_filter_split', validation_filter_split, ], ), dsl.IfPresentPlaceholder( input_name='test_filter_split', - then=['--method.test_filter_split', test_filter_split], + then=['--test_filter_split', test_filter_split], ), dsl.IfPresentPlaceholder( input_name='base_model', then=[ - '--init.base_model', + '--base_model', base_model.metadata['resourceName'], + '--model_labels', + base_model.metadata['labels'], + ], + else_=[ + dsl.IfPresentPlaceholder( + input_name='model_labels', + then=['--model_labels', model_labels], + ) + ], + ), + dsl.IfPresentPlaceholder( + input_name='incremental_train_base_model', + then=[ + '--incremental_train_base_model', + incremental_train_base_model.metadata['resourceName'], + ], + ), + dsl.IfPresentPlaceholder( + input_name='parent_model', + then=[ + '--parent_model', + parent_model.metadata['resourceName'], ], ), dsl.IfPresentPlaceholder( - input_name='model_labels', - then=['--method.model_labels', model_labels], + input_name='is_default_version', + then=[ + '--is_default_version', + is_default_version, + ], + ), + dsl.IfPresentPlaceholder( + input_name='model_version_aliases', + then=[ + '--model_version_aliases', + model_version_aliases, + ], + ), + dsl.IfPresentPlaceholder( + input_name='model_version_description', + then=[ + '--model_version_description', + model_version_description, + ], ), + '--gcp_resources', + gcp_resources, '--executor_input', '{{$}}', '--resource_name_output_artifact_uri', From 8b0e6a39a958330b98a41721d593489c7fad6966 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Mon, 9 Oct 2023 18:02:19 -0700 Subject: [PATCH 197/253] chore(backend): Update driver and launcher license files (#10072) --- backend/Makefile | 12 +++++++++++- backend/third_party_licenses/driver.csv | 12 ++++++------ backend/third_party_licenses/launcher.csv | 12 ++++++------ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/backend/Makefile b/backend/Makefile index f5aab0d57e..91cda22c8b 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -5,7 +5,7 @@ CSV_PATH=backend/third_party_licenses # Whenever build command for any of the binaries change, we should update them both here and in backend/Dockerfiles. .PHONY: all -all: license_apiserver license_persistence_agent license_cache_server license_swf license_viewer +all: license_apiserver license_persistence_agent license_cache_server license_swf license_viewer license_driver license_launcher .PHONY: clean clean: @@ -21,6 +21,10 @@ $(BUILD)/swf: GO111MODULE=on go build -o $(BUILD)/swf github.com/kubeflow/pipelines/backend/src/crd/controller/scheduledworkflow $(BUILD)/viewer: GO111MODULE=on go build -o $(BUILD)/viewer github.com/kubeflow/pipelines/backend/src/crd/controller/viewer +$(BUILD)/driver: + GO111MODULE=on go build -o $(BUILD)/driver github.com/kubeflow/pipelines/backend/src/v2/cmd/driver +$(BUILD)/launcher: + GO111MODULE=on go build -o $(BUILD)/launcher github.com/kubeflow/pipelines/backend/src/v2/cmd/launcher-v2 # Update licenses info after dependencies changed. # See README.md#updating-licenses-info section for more details. @@ -39,6 +43,12 @@ license_swf: $(BUILD)/swf .PHONY: license_viewer license_viewer: $(BUILD)/viewer cd $(MOD_ROOT) && go-licenses csv ./backend/src/crd/controller/viewer > $(CSV_PATH)/viewer.csv +.PHONY: license_driver +license_driver: $(BUILD)/driver + cd $(MOD_ROOT) && go-licenses csv ./backend/src/v2/cmd/driver > $(CSV_PATH)/driver.csv +.PHONY: license_launcher +license_launcher: $(BUILD)/launcher + cd $(MOD_ROOT) && go-licenses csv ./backend/src/v2/cmd/launcher-v2 > $(CSV_PATH)/launcher.csv .PHONY: image_all image_all: image_apiserver image_persistence_agent image_cache image_swf image_viewer image_visualization diff --git a/backend/third_party_licenses/driver.csv b/backend/third_party_licenses/driver.csv index ac9080347e..2e0d171ebe 100644 --- a/backend/third_party_licenses/driver.csv +++ b/backend/third_party_licenses/driver.csv @@ -32,7 +32,7 @@ github.com/json-iterator/go,https://github.com/json-iterator/go/blob/v1.1.12/LIC github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/758c91f76784/api/LICENSE,Apache-2.0 github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0 github.com/kubeflow/pipelines/kubernetes_platform/go/kubernetesplatform,https://github.com/kubeflow/pipelines/blob/bd9f74e34de6/kubernetes_platform/LICENSE,Apache-2.0 -github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e78ed557ddcb/third_party/ml-metadata/LICENSE,Apache-2.0 +github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e1f0c010f800/third_party/ml-metadata/LICENSE,Apache-2.0 github.com/mailru/easyjson,https://github.com/mailru/easyjson/blob/v0.7.7/LICENSE,MIT github.com/modern-go/concurrent,https://github.com/modern-go/concurrent/blob/bacd9c7ef1dd/LICENSE,Apache-2.0 github.com/modern-go/reflect2,https://github.com/modern-go/reflect2/blob/v1.0.2/LICENSE,Apache-2.0 @@ -40,11 +40,11 @@ github.com/munnerz/goautoneg,https://github.com/munnerz/goautoneg/blob/a7dc8b61c github.com/stoewer/go-strcase,https://github.com/stoewer/go-strcase/blob/v1.2.0/LICENSE,MIT go.opencensus.io,https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/LICENSE,Apache-2.0 gocloud.dev,https://github.com/google/go-cloud/blob/v0.22.0/LICENSE,Apache-2.0 -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause golang.org/x/xerrors,https://cs.opensource.google/go/x/xerrors/+/5ec99f83:LICENSE,BSD-3-Clause google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/LICENSE,BSD-3-Clause @@ -54,7 +54,7 @@ google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apac google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 -gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/496545a6307b/LICENSE,MIT +gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT k8s.io/api,https://github.com/kubernetes/api/blob/v0.24.3/LICENSE,Apache-2.0 k8s.io/apimachinery/pkg,https://github.com/kubernetes/apimachinery/blob/v0.24.3/LICENSE,Apache-2.0 k8s.io/apimachinery/third_party/forked/golang/reflect,https://github.com/kubernetes/apimachinery/blob/v0.24.3/third_party/forked/golang/LICENSE,BSD-3-Clause diff --git a/backend/third_party_licenses/launcher.csv b/backend/third_party_licenses/launcher.csv index 1b86cf9b45..974640725a 100644 --- a/backend/third_party_licenses/launcher.csv +++ b/backend/third_party_licenses/launcher.csv @@ -28,18 +28,18 @@ github.com/josharian/intern,https://github.com/josharian/intern/blob/v1.0.0/lice github.com/json-iterator/go,https://github.com/json-iterator/go/blob/v1.1.12/LICENSE,MIT github.com/kubeflow/pipelines/api/v2alpha1/go,https://github.com/kubeflow/pipelines/blob/758c91f76784/api/LICENSE,Apache-2.0 github.com/kubeflow/pipelines/backend,https://github.com/kubeflow/pipelines/blob/HEAD/LICENSE,Apache-2.0 -github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e78ed557ddcb/third_party/ml-metadata/LICENSE,Apache-2.0 +github.com/kubeflow/pipelines/third_party/ml-metadata/go/ml_metadata,https://github.com/kubeflow/pipelines/blob/e1f0c010f800/third_party/ml-metadata/LICENSE,Apache-2.0 github.com/mailru/easyjson,https://github.com/mailru/easyjson/blob/v0.7.7/LICENSE,MIT github.com/modern-go/concurrent,https://github.com/modern-go/concurrent/blob/bacd9c7ef1dd/LICENSE,Apache-2.0 github.com/modern-go/reflect2,https://github.com/modern-go/reflect2/blob/v1.0.2/LICENSE,Apache-2.0 github.com/munnerz/goautoneg,https://github.com/munnerz/goautoneg/blob/a7dc8b61c822/LICENSE,BSD-3-Clause go.opencensus.io,https://github.com/census-instrumentation/opencensus-go/blob/v0.23.0/LICENSE,Apache-2.0 gocloud.dev,https://github.com/google/go-cloud/blob/v0.22.0/LICENSE,Apache-2.0 -golang.org/x/net,https://cs.opensource.google/go/x/net/+/27dd8689:LICENSE,BSD-3-Clause +golang.org/x/net,https://cs.opensource.google/go/x/net/+/v0.10.0:LICENSE,BSD-3-Clause golang.org/x/oauth2,https://cs.opensource.google/go/x/oauth2/+/d3ed0bb2:LICENSE,BSD-3-Clause -golang.org/x/sys,https://cs.opensource.google/go/x/sys/+/a9b59b02:LICENSE,BSD-3-Clause -golang.org/x/term,https://cs.opensource.google/go/x/term/+/03fcf44c:LICENSE,BSD-3-Clause -golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.3.7:LICENSE,BSD-3-Clause +golang.org/x/sys/unix,https://cs.opensource.google/go/x/sys/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3-Clause +golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause golang.org/x/xerrors,https://cs.opensource.google/go/x/xerrors/+/5ec99f83:LICENSE,BSD-3-Clause google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/LICENSE,BSD-3-Clause @@ -49,7 +49,7 @@ google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apac google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 -gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/496545a6307b/LICENSE,MIT +gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT k8s.io/api,https://github.com/kubernetes/api/blob/v0.24.3/LICENSE,Apache-2.0 k8s.io/apimachinery/pkg,https://github.com/kubernetes/apimachinery/blob/v0.24.3/LICENSE,Apache-2.0 k8s.io/apimachinery/third_party/forked/golang/reflect,https://github.com/kubernetes/apimachinery/blob/v0.24.3/third_party/forked/golang/LICENSE,BSD-3-Clause From 2131dfaf6da3eff1c577a8cd2777c4606b121545 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 10 Oct 2023 07:09:27 -0700 Subject: [PATCH 198/253] chore(components): Fix formatting in upcoming release notes PiperOrigin-RevId: 572237593 --- components/google-cloud/RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 8a60889f94..c7d7a82f5f 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,5 +1,5 @@ ## Upcoming release -* Upload tensorboard metrics from RLHF pipelines if a tensorboard resource id is provided at runtime. +* Upload tensorboard metrics from `preview.llm.rlhf_pipeline` if a `tensorboard_resource_id` is provided at runtime. * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. ## Release 2.4.1 From 5835824e9cca76af70b733e7d494bb4bbdd8e2b7 Mon Sep 17 00:00:00 2001 From: MGSousa <31368750+MGSousa@users.noreply.github.com> Date: Tue, 10 Oct 2023 21:25:21 +0100 Subject: [PATCH 199/253] feat(backend): Added metrics to be collected from failed/successful workflows (#9576) * feat(backend): Allow more metrics to be collected from Workflows * Fixed remaining tests * Updated licenses dependencies * FIX comment in resource_manager.go --- backend/src/apiserver/main.go | 1 + .../apiserver/resource/resource_manager.go | 65 +++++++++++++-- .../resource/resource_manager_test.go | 80 +++++++++---------- backend/src/apiserver/server/api_util_test.go | 4 +- .../server/experiment_server_test.go | 58 +++++++------- backend/src/apiserver/server/fakes_test.go | 14 ++-- .../src/apiserver/server/job_server_test.go | 8 +- .../apiserver/server/pipeline_server_test.go | 44 +++++----- .../server/pipeline_upload_server_test.go | 4 +- backend/src/apiserver/server/run_server.go | 6 +- .../src/apiserver/server/run_server_test.go | 6 +- .../server/visualization_server_test.go | 4 +- backend/src/common/util/metrics.go | 60 ++++++++++++++ backend/third_party_licenses/apiserver.csv | 4 +- backend/third_party_licenses/cache_server.csv | 4 +- backend/third_party_licenses/driver.csv | 2 +- backend/third_party_licenses/launcher.csv | 2 +- .../persistence_agent.csv | 4 +- backend/third_party_licenses/swf.csv | 4 +- backend/third_party_licenses/viewer.csv | 4 +- go.mod | 3 +- go.sum | 6 +- 22 files changed, 253 insertions(+), 134 deletions(-) create mode 100644 backend/src/common/util/metrics.go diff --git a/backend/src/apiserver/main.go b/backend/src/apiserver/main.go index 276a39ee1b..23ae020006 100644 --- a/backend/src/apiserver/main.go +++ b/backend/src/apiserver/main.go @@ -63,6 +63,7 @@ func main() { clientManager := cm.NewClientManager() resourceManager := resource.NewResourceManager( &clientManager, + &resource.ResourceManagerOptions{CollectMetrics: *collectMetricsFlag}, ) err := loadSamples(resourceManager) if err != nil { diff --git a/backend/src/apiserver/resource/resource_manager.go b/backend/src/apiserver/resource/resource_manager.go index f84c62c097..94442f2bd9 100644 --- a/backend/src/apiserver/resource/resource_manager.go +++ b/backend/src/apiserver/resource/resource_manager.go @@ -49,11 +49,31 @@ import ( // Metric variables. Please prefix the metric names with resource_manager_. var ( + extraLabels = []string{ + // display in which Kubeflow namespace the runs were triggered + "profile", + + // display workflow name + "workflow", + } + // Count the removed workflows due to garbage collection. workflowGCCounter = promauto.NewCounter(prometheus.CounterOpts{ Name: "resource_manager_workflow_gc", Help: "The number of gabarage-collected workflows", }) + + // Count the successfull workflow runs + workflowSuccessCounter = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "resource_manager_workflow_runs_success", + Help: "The current number of successfully workflows runs", + }, extraLabels) + + // Count the failed workflow runs + workflowFailedCounter = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "resource_manager_workflow_runs_failed", + Help: "The current number of failed workflows runs", + }, extraLabels) ) type ClientManagerInterface interface { @@ -77,6 +97,10 @@ type ClientManagerInterface interface { Authenticators() []kfpauth.Authenticator } +type ResourceManagerOptions struct { + CollectMetrics bool `json:"collect_metrics,omitempty"` +} + type ResourceManager struct { experimentStore storage.ExperimentStoreInterface pipelineStore storage.PipelineStoreInterface @@ -96,9 +120,10 @@ type ResourceManager struct { time util.TimeInterface uuid util.UUIDGeneratorInterface authenticators []kfpauth.Authenticator + options *ResourceManagerOptions } -func NewResourceManager(clientManager ClientManagerInterface) *ResourceManager { +func NewResourceManager(clientManager ClientManagerInterface, options *ResourceManagerOptions) *ResourceManager { return &ResourceManager{ experimentStore: clientManager.ExperimentStore(), pipelineStore: clientManager.PipelineStore(), @@ -118,6 +143,7 @@ func NewResourceManager(clientManager ClientManagerInterface) *ResourceManager { time: clientManager.Time(), uuid: clientManager.UUID(), authenticators: clientManager.Authenticators(), + options: options, } } @@ -613,6 +639,18 @@ func (r *ResourceManager) DeleteRun(ctx context.Context, runId string) error { if err != nil { return util.Wrapf(err, "Failed to delete a run %v", runId) } + + if r.options.CollectMetrics { + if run.Conditions == string(exec.ExecutionSucceeded) { + if util.GetMetricValue(workflowSuccessCounter) > 0 { + workflowSuccessCounter.WithLabelValues(run.Namespace, run.DisplayName).Dec() + } + } else { + if util.GetMetricValue(workflowFailedCounter) > 0 { + workflowFailedCounter.WithLabelValues(run.Namespace, run.DisplayName).Dec() + } + } + } return nil } @@ -1079,8 +1117,9 @@ func (r *ResourceManager) ReportWorkflowResource(ctx context.Context, execSpec u return nil, util.NewInternalServerError(err, "Failed to delete the completed workflow for run %s", runId) } } - // TODO(jingzhang36): find a proper way to pass collectMetricsFlag here. - workflowGCCounter.Inc() + if r.options.CollectMetrics { + workflowGCCounter.Inc() + } } // If the run was Running and got terminated (activeDeadlineSeconds set to 0), // ignore its condition and mark it as such @@ -1119,8 +1158,10 @@ func (r *ResourceManager) ReportWorkflowResource(ctx context.Context, execSpec u } return nil, util.NewInternalServerError(err, "Failed to delete the obsolete workflow for run %s", runId) } - // TODO(jingzhang36): find a proper way to pass collectMetricsFlag here. - workflowGCCounter.Inc() + + if r.options.CollectMetrics { + workflowGCCounter.Inc() + } // Note, persistence agent will not retry reporting this workflow again, because updateError is a not found error. return nil, util.Wrapf(updateError, "Failed to report workflow name=%q namespace=%q runId=%q", execSpec.ExecutionName(), execSpec.ExecutionNamespace(), runId) } @@ -1208,6 +1249,20 @@ func (r *ResourceManager) ReportWorkflowResource(ctx context.Context, execSpec u return nil, util.Wrapf(err, message) } } + + if r.options.CollectMetrics { + execNamespace := execSpec.ExecutionNamespace() + execName := execSpec.ExecutionName() + + if execStatus.Condition() == exec.ExecutionSucceeded { + workflowSuccessCounter.WithLabelValues(execNamespace, execName).Inc() + } else { + glog.Errorf("pipeline '%s' finished with an error", execName) + + // also collects counts regarding retries + workflowFailedCounter.WithLabelValues(execNamespace, execName).Inc() + } + } } execSpec.SetLabels("pipeline/runid", runId) return execSpec, nil diff --git a/backend/src/apiserver/resource/resource_manager_test.go b/backend/src/apiserver/resource/resource_manager_test.go index f63fe7599e..c1221ec6ca 100644 --- a/backend/src/apiserver/resource/resource_manager_test.go +++ b/backend/src/apiserver/resource/resource_manager_test.go @@ -135,7 +135,7 @@ var testWorkflow = util.NewWorkflow(&v1alpha1.Workflow{ func initWithPipeline(t *testing.T) (*FakeClientManager, *ResourceManager, *model.Pipeline, *model.PipelineVersion) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) p1 := createPipeline("p1", "", "ns1") p, _ := manager.CreatePipeline(p1) pv1 := createPipelineVersion( @@ -155,7 +155,7 @@ func initWithPipeline(t *testing.T) (*FakeClientManager, *ResourceManager, *mode func initWithExperiment(t *testing.T) (*FakeClientManager, *ResourceManager, *model.Experiment) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) apiExperiment := &model.Experiment{Name: "e1", Namespace: "ns1"} experiment, err := manager.CreateExperiment(apiExperiment) assert.Nil(t, err) @@ -165,7 +165,7 @@ func initWithExperiment(t *testing.T) (*FakeClientManager, *ResourceManager, *mo func initWithExperimentAndPipeline(t *testing.T) (*FakeClientManager, *ResourceManager, *model.Experiment, *model.Pipeline, *model.PipelineVersion) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) apiExperiment := &model.Experiment{Name: "e1"} experiment, err := manager.CreateExperiment(apiExperiment) assert.Nil(t, err) @@ -434,7 +434,7 @@ func TestCreatePipeline(t *testing.T) { // setup store := NewFakeClientManagerOrFatalV2() defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) if test.badObjectStore { manager.objectStore = &FakeBadObjectStore{} } @@ -584,7 +584,7 @@ func TestCreatePipelineVersion(t *testing.T) { t.Run(test.msg, func(t *testing.T) { store := NewFakeClientManagerOrFatalV2() defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline before versions. p0 := createPipelineV1("my_pipeline") @@ -674,7 +674,7 @@ func TestCreatePipelineOrVersion_V2PipelineName(t *testing.T) { t.Run(fmt.Sprintf("%+v", testClone), func(t *testing.T) { store := NewFakeClientManagerOrFatalV2() defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) if test.template == "" { test.template = strings.TrimSpace(v2compatPipeline) @@ -807,7 +807,7 @@ func TestResourceManager_CreatePipelineAndPipelineVersion(t *testing.T) { t.Run(tt.name, func(t *testing.T) { store := NewFakeClientManagerOrFatalV2() defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) pipelineStore, ok := manager.pipelineStore.(*storage.PipelineStore) assert.True(t, ok) pipelineStore.SetUUIDGenerator(util.NewFakeUUIDGeneratorOrFatal(FakeUUIDOne, nil)) @@ -1001,7 +1001,7 @@ func TestGetPipelineTemplate_FromPipelineURI(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) p, _ := manager.CreatePipeline(createPipelineV1("new_pipeline")) manager.objectStore.AddFile([]byte(testWorkflow.ToStringForStore()), p.UUID) @@ -1023,7 +1023,7 @@ func TestGetPipelineTemplate_FromPipelineVersionId(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) p, _ := manager.CreatePipeline(createPipelineV1("new_pipeline")) pv := &model.PipelineVersion{ @@ -1051,7 +1051,7 @@ func TestGetPipelineTemplate_FromPipelineId(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) p, _ := manager.CreatePipeline(createPipelineV1("new_pipeline")) pv := &model.PipelineVersion{ @@ -1079,7 +1079,7 @@ func TestGetPipelineTemplate_PipelineMetadataNotFound(t *testing.T) { defer store.Close() template := []byte("workflow: foo") store.objectStore.AddFile(template, store.objectStore.GetPipelineKey(fmt.Sprint(1))) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) _, err := manager.GetPipelineLatestTemplate("1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "Pipeline 1 not found") @@ -1090,7 +1090,7 @@ func TestGetPipelineTemplate_PipelineFileNotFound(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() pipeline, _ := store.PipelineStore().CreatePipeline(createPipelineV1("pipeline1")) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) _, err := manager.GetPipelineLatestTemplate(pipeline.UUID) assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "not found") @@ -1100,7 +1100,7 @@ func TestGetPipelineTemplate_PipelineFileNotFound(t *testing.T) { func TestListPipelines(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p1 := createPipelineV1( @@ -1159,7 +1159,7 @@ func TestListPipelines(t *testing.T) { func TestListPipelinesV1(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p1 := createPipelineV1( "pipeline1", @@ -1218,7 +1218,7 @@ func TestListPipelineVersions(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p1 := createPipelineV1( @@ -1301,7 +1301,7 @@ func TestUpdatePipelineStatus(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) pipelineStore, ok := store.pipelineStore.(*storage.PipelineStore) assert.True(t, ok) @@ -1371,7 +1371,7 @@ func TestUpdatePipelineVersionStatus(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) pipelineStore, ok := store.pipelineStore.(*storage.PipelineStore) assert.True(t, ok) @@ -1440,7 +1440,7 @@ func TestDeletePipelineVersion(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p := createPipelineV1( @@ -1501,7 +1501,7 @@ func TestDeletePipelineVersion_FileError(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p := createPipelineV1( @@ -1543,7 +1543,7 @@ func TestDeletePipeline(t *testing.T) { initEnvVars() store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) // Create a pipeline. p1 := createPipelineV1( @@ -1999,7 +1999,7 @@ func TestCreateRun_ThroughPipelineIdAndPipelineVersion(t *testing.T) { func TestCreateRun_EmptyPipelineSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") apiRun := &model.Run{ DisplayName: "run1", @@ -2016,7 +2016,7 @@ func TestCreateRun_EmptyPipelineSpec(t *testing.T) { func TestCreateRun_InvalidWorkflowSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") apiRun := &model.Run{ DisplayName: "run1", @@ -2034,7 +2034,7 @@ func TestCreateRun_InvalidWorkflowSpec(t *testing.T) { func TestCreateRun_NullWorkflowSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") apiRun := &model.Run{ DisplayName: "run1", @@ -2052,7 +2052,7 @@ func TestCreateRun_NullWorkflowSpec(t *testing.T) { func TestCreateRun_OverrideParametersError(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") apiRun := &model.Run{ DisplayName: "run1", @@ -2070,7 +2070,7 @@ func TestCreateRun_OverrideParametersError(t *testing.T) { func TestCreateRun_CreateWorkflowError(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") manager.execClient = client.NewFakeExecClientWithBadWorkflow() apiRun := &model.Run{ @@ -2089,7 +2089,7 @@ func TestCreateRun_CreateWorkflowError(t *testing.T) { func TestCreateRun_StoreRunMetadataError(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") store.DB().Close() apiRun := &model.Run{ @@ -2119,7 +2119,7 @@ func TestDeleteRun(t *testing.T) { func TestDeleteRun_RunNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.DeleteRun(context.Background(), "1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "not found") @@ -2177,7 +2177,7 @@ func TestDeleteExperiment_ClearsDefaultExperiment(t *testing.T) { func TestDeleteExperiment_ExperimentNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.DeleteExperiment("1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "not found") @@ -2221,7 +2221,7 @@ func TestTerminateRun(t *testing.T) { func TestTerminateRun_RunNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.TerminateRun(context.Background(), "1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "not found") @@ -2257,7 +2257,7 @@ func TestRetryRun(t *testing.T) { func TestRetryRun_RunNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.RetryRun(context.Background(), "1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "not found") @@ -2549,7 +2549,7 @@ func TestCreateJob_ThroughPipelineIdAndPipelineVersion(t *testing.T) { func TestCreateJob_EmptyPipelineSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") job := &model.Job{ DisplayName: "pp 1", @@ -2567,7 +2567,7 @@ func TestCreateJob_EmptyPipelineSpec(t *testing.T) { func TestCreateJob_InvalidWorkflowSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") job := &model.Job{ K8SName: "pp 1", @@ -2586,7 +2586,7 @@ func TestCreateJob_InvalidWorkflowSpec(t *testing.T) { func TestCreateJob_NullWorkflowSpec(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, _ := manager.CreateDefaultExperiment("") job := &model.Job{ K8SName: "pp 1", @@ -2667,7 +2667,7 @@ func TestEnableJob(t *testing.T) { func TestEnableJob_JobNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.ChangeJobMode(context.Background(), "1", false) assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "Job 1 not found") @@ -2733,7 +2733,7 @@ func TestDeleteJob(t *testing.T) { func TestDeleteJob_JobNotExist(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) err := manager.DeleteJob(context.Background(), "1") assert.Equal(t, codes.NotFound, err.(*util.UserError).ExternalStatusCode()) assert.Contains(t, err.Error(), "Job 1 not found") @@ -2900,7 +2900,7 @@ func TestReportWorkflowResource_WorkflowMissingRunID(t *testing.T) { func TestReportWorkflowResource_RunNotFound(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) ctx := context.Background() defer store.Close() workflow := util.NewWorkflow(&v1alpha1.Workflow{ @@ -3184,7 +3184,7 @@ func TestReportScheduledWorkflowResource_Success_withRuntimeParamsV2(t *testing. func TestReportScheduledWorkflowResource_Error(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) manager.CreateDefaultExperiment("") // Create pipeline workflow := util.NewWorkflow(&v1alpha1.Workflow{ @@ -3324,7 +3324,7 @@ func TestReadArtifact_WorkflowNoStatus_NotFound(t *testing.T) { func TestReadArtifact_NoRun_NotFound(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) _, err := manager.ReadArtifact("run-1", "node-1", "artifact-1") assert.True(t, util.IsUserErrorCodeMatch(err, codes.NotFound)) @@ -3938,7 +3938,7 @@ spec: func TestCreateDefaultExperiment(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, err := manager.CreateDefaultExperiment("") assert.Nil(t, err) @@ -3962,7 +3962,7 @@ func TestCreateDefaultExperiment_MultiUser(t *testing.T) { store := NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) defer store.Close() - manager := NewResourceManager(store) + manager := NewResourceManager(store, &ResourceManagerOptions{CollectMetrics: false}) experimentID, err := manager.CreateDefaultExperiment("multi-user") assert.Nil(t, err) diff --git a/backend/src/apiserver/server/api_util_test.go b/backend/src/apiserver/server/api_util_test.go index e69a81ed71..e004e4639e 100644 --- a/backend/src/apiserver/server/api_util_test.go +++ b/backend/src/apiserver/server/api_util_test.go @@ -108,7 +108,7 @@ func TestValidateExperimentResourceReference_UnexpectedRelationship(t *testing.T func TestValidateExperimentResourceReference_ExperimentNotExist(t *testing.T) { clients := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := resource.NewResourceManager(clients) + manager := resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) defer clients.Close() err := ValidateExperimentResourceReference(manager, validReference) assert.NotNil(t, err) @@ -183,7 +183,7 @@ func TestValidatePipelineSpecAndResourceReferences_InvalidPipelineVersionId(t *t func TestValidatePipelineSpecAndResourceReferences_PipelineIdNotParentOfPipelineVersionId(t *testing.T) { clients := initWithExperimentsAndTwoPipelineVersions(t) - manager := resource.NewResourceManager(clients) + manager := resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) defer clients.Close() spec := &apiv1beta1.PipelineSpec{ PipelineId: NonDefaultFakeUUID, diff --git a/backend/src/apiserver/server/experiment_server_test.go b/backend/src/apiserver/server/experiment_server_test.go index 1f6449c73e..44a1dc2d04 100644 --- a/backend/src/apiserver/server/experiment_server_test.go +++ b/backend/src/apiserver/server/experiment_server_test.go @@ -35,7 +35,7 @@ import ( func TestCreateExperimentV1(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} @@ -59,7 +59,7 @@ func TestCreateExperimentV1(t *testing.T) { func TestCreateExperiment(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} @@ -78,7 +78,7 @@ func TestCreateExperiment(t *testing.T) { func TestCreateExperimentV1_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} clientManager.DB().Close() @@ -89,7 +89,7 @@ func TestCreateExperimentV1_Failed(t *testing.T) { func TestCreateExperiment_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} clientManager.DB().Close() @@ -100,7 +100,7 @@ func TestCreateExperiment_Failed(t *testing.T) { func TestCreateExperiment_EmptyName(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "", Description: "first experiment"} clientManager.DB().Close() @@ -111,7 +111,7 @@ func TestCreateExperiment_EmptyName(t *testing.T) { func TestCreateExperimentV1_EmptyName(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "", Description: "first experiment"} clientManager.DB().Close() @@ -186,7 +186,7 @@ func TestCreateExperimentV1_Multiuser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} tests := []struct { @@ -360,7 +360,7 @@ func TestCreateExperimentV1_Multiuser(t *testing.T) { } for _, tt := range tests { clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(tt.fakeId, nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server = ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} got, err := server.CreateExperimentV1(ctx, &apiv1beta1.CreateExperimentRequest{Experiment: tt.experiment}) if tt.wantError { @@ -382,7 +382,7 @@ func TestCreateExperiment_Multiuser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} tests := []struct { @@ -446,7 +446,7 @@ func TestCreateExperiment_Multiuser(t *testing.T) { func TestGetExperimentV1(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} @@ -472,7 +472,7 @@ func TestGetExperimentV1(t *testing.T) { func TestGetExperiment(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} @@ -493,7 +493,7 @@ func TestGetExperiment(t *testing.T) { func TestGetExperimentV1_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} @@ -507,7 +507,7 @@ func TestGetExperimentV1_Failed(t *testing.T) { func TestGetExperiment_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} @@ -570,7 +570,7 @@ func TestGetExperimentV1_Multiuser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} resourceReferences := []*apiv1beta1.ResourceReference{ { @@ -606,7 +606,7 @@ func TestGetExperiment_Multiuser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{ DisplayName: "exp1", @@ -631,7 +631,7 @@ func TestGetExperiment_Multiuser(t *testing.T) { func TestListExperimentsV1(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} @@ -659,7 +659,7 @@ func TestListExperimentsV1(t *testing.T) { func TestListExperiments(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} @@ -680,7 +680,7 @@ func TestListExperiments(t *testing.T) { func TestListExperimentsV1_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} @@ -694,7 +694,7 @@ func TestListExperimentsV1_Failed(t *testing.T) { func TestListExperiments_Failed(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} @@ -762,7 +762,7 @@ func TestListExperimentsV1_Multiuser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} resourceReferences := []*apiv1beta1.ResourceReference{ @@ -889,7 +889,7 @@ func TestListExperiments_Multiuser_NoDefault(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{ DisplayName: "exp1", @@ -967,7 +967,7 @@ func TestArchiveAndUnarchiveExperimentV1(t *testing.T) { _, err := runServer.CreateRunV1(nil, &apiv1beta1.CreateRunRequest{Run: run1}) assert.Nil(t, err) clients.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(FakeUUIDOne, nil)) - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) runServer = NewRunServer(manager, &RunServerOptions{CollectMetrics: false}) run2 := &apiv1beta1.Run{ Name: "run2", @@ -976,7 +976,7 @@ func TestArchiveAndUnarchiveExperimentV1(t *testing.T) { _, err = runServer.CreateRunV1(nil, &apiv1beta1.CreateRunRequest{Run: run2}) assert.Nil(t, err) clients.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(DefaultFakeUUID, nil)) - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) jobServer := NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) job1 := &apiv1beta1.Job{ Name: "name1", @@ -1043,7 +1043,7 @@ func TestArchiveAndUnarchiveExperiment(t *testing.T) { _, err := runServer.CreateRunV1(nil, &apiv1beta1.CreateRunRequest{Run: run1}) assert.Nil(t, err) clients.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(FakeUUIDOne, nil)) - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) runServer = NewRunServer(manager, &RunServerOptions{CollectMetrics: false}) run2 := &apiv1beta1.Run{ Name: "run2", @@ -1052,7 +1052,7 @@ func TestArchiveAndUnarchiveExperiment(t *testing.T) { _, err = runServer.CreateRunV1(nil, &apiv1beta1.CreateRunRequest{Run: run2}) assert.Nil(t, err) clients.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(DefaultFakeUUID, nil)) - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) jobServer := NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) job1 := &apiv1beta1.Job{ Name: "name1", @@ -1107,7 +1107,7 @@ func TestArchiveAndUnarchiveExperiment(t *testing.T) { // deleting an experiment that does not exist in single user mode, for V2 api. func TestDeleteExperiments_SingleUser(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment"} resultExperiment, err := server.CreateExperiment(nil, &apiV2beta1.CreateExperimentRequest{Experiment: experiment}) @@ -1125,7 +1125,7 @@ func TestDeleteExperiments_SingleUser(t *testing.T) { // deleting an experiment that does not exist in single user mode, for V1 api. func TestDeleteExperimentsV1_SingleUser(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiv1beta1.Experiment{Name: "ex1", Description: "first experiment"} resultExperiment, err := server.CreateExperimentV1(nil, &apiv1beta1.CreateExperimentRequest{Experiment: experiment}) @@ -1148,7 +1148,7 @@ func TestDeleteExperiments_MultiUser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} experiment := &apiV2beta1.Experiment{DisplayName: "ex1", Description: "first experiment", Namespace: "ns1"} resultExperiment, err := server.CreateExperiment(ctx, &apiV2beta1.CreateExperimentRequest{Experiment: experiment}) @@ -1171,7 +1171,7 @@ func TestDeleteExperimentsV1_MultiUser(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := ExperimentServer{resourceManager: resourceManager, options: &ExperimentServerOptions{CollectMetrics: false}} resourceReferences := []*apiv1beta1.ResourceReference{ { diff --git a/backend/src/apiserver/server/fakes_test.go b/backend/src/apiserver/server/fakes_test.go index 0027fd6a40..70e50000bc 100644 --- a/backend/src/apiserver/server/fakes_test.go +++ b/backend/src/apiserver/server/fakes_test.go @@ -144,7 +144,7 @@ func initEnvVars() { func initWithExperiment(t *testing.T) (*resource.FakeClientManager, *resource.ResourceManager, *model.Experiment) { initEnvVars() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) var apiExperiment *apiv1beta1.Experiment if common.IsMultiUserMode() { apiExperiment = &apiv1beta1.Experiment{ @@ -178,7 +178,7 @@ func initWithExperiment_SubjectAccessReview_Unauthorized(t *testing.T) (*resourc initEnvVars() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) clientManager.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) apiExperiment := &apiv1beta1.Experiment{Name: "exp1"} if common.IsMultiUserMode() { apiExperiment = &apiv1beta1.Experiment{ @@ -202,7 +202,7 @@ func initWithExperiment_SubjectAccessReview_Unauthorized(t *testing.T) (*resourc func initWithExperimentAndPipelineVersion(t *testing.T) (*resource.FakeClientManager, *resource.ResourceManager, *model.Experiment, *model.PipelineVersion) { initEnvVars() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) // Create an experiment. apiExperiment := &apiv1beta1.Experiment{Name: "exp1"} @@ -241,7 +241,7 @@ func initWithExperimentAndPipelineVersion(t *testing.T) (*resource.FakeClientMan func initWithExperimentsAndTwoPipelineVersions(t *testing.T) *resource.FakeClientManager { initEnvVars() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) // Create an experiment. apiExperiment := &apiv1beta1.Experiment{Name: "exp1"} @@ -268,7 +268,7 @@ func initWithExperimentsAndTwoPipelineVersions(t *testing.T) *resource.FakeClien ) assert.Nil(t, err) clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal("123e4567-e89b-12d3-a456-426655441001", nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) _, err = resourceManager.CreatePipelineVersion( &model.PipelineVersion{ Name: "pipeline_version", @@ -277,7 +277,7 @@ func initWithExperimentsAndTwoPipelineVersions(t *testing.T) *resource.FakeClien ) assert.Nil(t, err) clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(NonDefaultFakeUUID, nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) // Create another pipeline and then pipeline version. p1, err := resourceManager.CreatePipeline( &model.Pipeline{ @@ -299,7 +299,7 @@ func initWithExperimentsAndTwoPipelineVersions(t *testing.T) *resource.FakeClien assert.Nil(t, err) clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal("123e4567-e89b-12d3-a456-426655441002", nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) _, err = resourceManager.CreatePipelineVersion( &model.PipelineVersion{ Name: "another_pipeline_version", diff --git a/backend/src/apiserver/server/job_server_test.go b/backend/src/apiserver/server/job_server_test.go index 2b29e1c7ac..6e53a2edb5 100644 --- a/backend/src/apiserver/server/job_server_test.go +++ b/backend/src/apiserver/server/job_server_test.go @@ -317,7 +317,7 @@ func TestCreateJob_NoResRefs(t *testing.T) { clients, manager, _, _ := initWithExperimentAndPipelineVersion(t) defer clients.Close() clients.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(DefaultFakeIdTwo, nil)) - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) server := NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) apiJob := &apiv1beta1.Job{ Name: "job1", @@ -618,7 +618,7 @@ func TestGetJob_Unauthorized(t *testing.T) { assert.Nil(t, err) clients.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) server = NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) _, err = server.GetJob(ctx, &apiv1beta1.GetJobRequest{Id: job.Id}) @@ -825,7 +825,7 @@ func TestEnableJob_Unauthorized(t *testing.T) { assert.Nil(t, err) clients.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) server = NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) _, err = server.EnableJob(ctx, &apiv1beta1.EnableJobRequest{Id: job.Id}) @@ -870,7 +870,7 @@ func TestDisableJob_Unauthorized(t *testing.T) { assert.Nil(t, err) clients.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - manager = resource.NewResourceManager(clients) + manager = resource.NewResourceManager(clients, &resource.ResourceManagerOptions{CollectMetrics: false}) server = NewJobServer(manager, &JobServerOptions{CollectMetrics: false}) _, err = server.DisableJob(ctx, &apiv1beta1.DisableJobRequest{Id: job.Id}) diff --git a/backend/src/apiserver/server/pipeline_server_test.go b/backend/src/apiserver/server/pipeline_server_test.go index 784385e46b..8ff7831fe5 100644 --- a/backend/src/apiserver/server/pipeline_server_test.go +++ b/backend/src/apiserver/server/pipeline_server_test.go @@ -56,7 +56,7 @@ func TestCreatePipelineV1_YAML(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ @@ -85,7 +85,7 @@ func TestCreatePipelineV1_LargeFile(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ @@ -114,7 +114,7 @@ func TestCreatePipelineV1_Tarball(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ @@ -144,7 +144,7 @@ func TestCreatePipelineV1_InvalidYAML(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} createdPipeline, err := pipelineServer.CreatePipelineV1( @@ -166,7 +166,7 @@ func TestCreatePipelineV1_InvalidURL(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} createdPipeline, err := pipelineServer.CreatePipelineV1( @@ -188,7 +188,7 @@ func TestCreatePipelineV1_MissingUrl(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} createdPipeline, err := pipelineServer.CreatePipelineV1( @@ -218,7 +218,7 @@ func TestCreatePipelineV1_ExistingPipeline(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipelineServer.CreatePipelineV1( @@ -267,7 +267,7 @@ func TestCreatePipelineVersionV1_YAML(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal( util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{ resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}, @@ -312,7 +312,7 @@ func TestCreatePipelineVersion_InvalidYAML(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} _, err := pipelineServer.CreatePipelineVersionV1( @@ -344,7 +344,7 @@ func TestCreatePipelineVersion_Tarball(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipelineVersion, err := pipelineServer.CreatePipelineVersionV1( @@ -386,7 +386,7 @@ func TestCreatePipelineVersion_InvalidURL(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} _, err := pipelineServer.CreatePipelineVersionV1(context.Background(), &api.CreatePipelineVersionRequest{ @@ -417,7 +417,7 @@ func TestListPipelineVersion_NoResourceKey(t *testing.T) { defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} @@ -433,7 +433,7 @@ func TestListPipelinesPublic(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} _, err := pipelineServer.ListPipelinesV1(context.Background(), @@ -452,7 +452,7 @@ func TestGetPipelineByName_OK(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ Pipeline: &api.Pipeline{ @@ -485,7 +485,7 @@ func TestGetPipelineByName_Shared_OK(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ Pipeline: &api.Pipeline{ @@ -513,7 +513,7 @@ func TestGetPipelineByName_NotFound(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} _, err := pipelineServer.GetPipelineByNameV1(context.Background(), &api.GetPipelineByNameRequest{ @@ -527,7 +527,7 @@ func TestGetPipelineByName_WrongNameSpace(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ Pipeline: &api.Pipeline{ @@ -563,7 +563,7 @@ func TestCreatePipelineVersionAndCheckLatestVersion(t *testing.T) { // Close the server when test finishes defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipeline, err := pipelineServer.CreatePipelineV1(context.Background(), &api.CreatePipelineRequest{ @@ -579,7 +579,7 @@ func TestCreatePipelineVersionAndCheckLatestVersion(t *testing.T) { assert.NotNil(t, pipeline.DefaultVersion.Id) clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal("123e4567-e89b-12d3-a456-526655440001", nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer = PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} pipelineVersion, err := pipelineServer.CreatePipelineVersionV1( @@ -634,7 +634,7 @@ func TestPipelineServer_CreatePipeline(t *testing.T) { httpServer := getMockServer(t) defer httpServer.Close() clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} type args struct { @@ -699,7 +699,7 @@ func TestPipelineServer_CreatePipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { clientManager.UpdateUUID(util.NewFakeUUIDGeneratorOrFatal(tt.id, nil)) - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer = PipelineServer{resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}} got, err := pipelineServer.CreatePipeline(context.Background(), &apiv2.CreatePipelineRequest{Pipeline: tt.arg}) if tt.wantErr { @@ -850,7 +850,7 @@ func TestPipelineServer_CreatePipelineAndVersion_v2(t *testing.T) { for _, tt := range tests { clientManager := resource.NewFakeClientManagerOrFatal( util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) pipelineServer := PipelineServer{ resourceManager: resourceManager, httpClient: httpServer.Client(), options: &PipelineServerOptions{CollectMetrics: false}, } diff --git a/backend/src/apiserver/server/pipeline_upload_server_test.go b/backend/src/apiserver/server/pipeline_upload_server_test.go index 931f7b5f0f..e26eed60b9 100644 --- a/backend/src/apiserver/server/pipeline_upload_server_test.go +++ b/backend/src/apiserver/server/pipeline_upload_server_test.go @@ -614,14 +614,14 @@ func setupWriter(text string) (*bytes.Buffer, *multipart.Writer) { func setupClientManagerAndServer() (*resource.FakeClientManager, PipelineUploadServer) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := PipelineUploadServer{resourceManager: resourceManager, options: &PipelineUploadServerOptions{CollectMetrics: false}} return clientManager, server } func updateClientManager(clientManager *resource.FakeClientManager, uuid util.UUIDGeneratorInterface) PipelineUploadServer { clientManager.UpdateUUID(uuid) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) server := PipelineUploadServer{resourceManager: resourceManager, options: &PipelineUploadServerOptions{CollectMetrics: false}} return server } diff --git a/backend/src/apiserver/server/run_server.go b/backend/src/apiserver/server/run_server.go index 1a6cef80bf..a850fa9bdc 100644 --- a/backend/src/apiserver/server/run_server.go +++ b/backend/src/apiserver/server/run_server.go @@ -85,8 +85,6 @@ var ( Help: "The total number of RetryRun requests", }) - // TODO(jingzhang36): error count and success count. - runCount = promauto.NewGauge(prometheus.GaugeOpts{ Name: "run_server_run_count", Help: "The current number of runs in Kubeflow Pipelines instance", @@ -332,7 +330,9 @@ func (s *RunServer) DeleteRunV1(ctx context.Context, request *apiv1beta1.DeleteR return nil, util.Wrap(err, "Failed to delete a v1beta1 run") } if s.options.CollectMetrics { - runCount.Dec() + if util.GetMetricValue(runCount) > 0 { + runCount.Dec() + } } return &empty.Empty{}, nil } diff --git a/backend/src/apiserver/server/run_server_test.go b/backend/src/apiserver/server/run_server_test.go index 92a544763b..89aa763f01 100644 --- a/backend/src/apiserver/server/run_server_test.go +++ b/backend/src/apiserver/server/run_server_test.go @@ -1262,7 +1262,7 @@ func TestReportRunMetricsV1_Unauthorized(t *testing.T) { clientManager, resourceManager, runDetails := initWithOneTimeRun(t) defer clientManager.Close() clientManager.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - resourceManager = resource.NewResourceManager(clientManager) + resourceManager = resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) runServer := RunServer{resourceManager: resourceManager, options: &RunServerOptions{CollectMetrics: false}} _, err := runServer.ReportRunMetricsV1(ctx, &apiv1beta1.ReportRunMetricsRequest{ @@ -1502,7 +1502,7 @@ func TestReadArtifactsV1_Unauthorized(t *testing.T) { // make the following request unauthorized clientManager.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) runServer := RunServer{resourceManager: resourceManager, options: &RunServerOptions{CollectMetrics: false}} artifact := &apiv1beta1.ReadArtifactRequest{ @@ -1521,7 +1521,7 @@ func TestReadArtifactsV1_Unauthorized(t *testing.T) { func TestReadArtifactsV1_Run_NotFound(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - manager := resource.NewResourceManager(clientManager) + manager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) runServer := RunServer{resourceManager: manager, options: &RunServerOptions{CollectMetrics: false}} artifact := &apiv1beta1.ReadArtifactRequest{ RunId: "Wrong_RUN_UUID", diff --git a/backend/src/apiserver/server/visualization_server_test.go b/backend/src/apiserver/server/visualization_server_test.go index 0f32ec7e54..da6ecfee1f 100644 --- a/backend/src/apiserver/server/visualization_server_test.go +++ b/backend/src/apiserver/server/visualization_server_test.go @@ -276,7 +276,7 @@ func TestCreateVisualization_Unauthorized(t *testing.T) { clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) clientManager.SubjectAccessReviewClientFake = client.NewFakeSubjectAccessReviewClientUnauthorized() - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) defer clientManager.Close() server := &VisualizationServer{ @@ -316,7 +316,7 @@ func TestCreateVisualization_Unauthenticated(t *testing.T) { ctx := metadata.NewIncomingContext(context.Background(), md) clientManager := resource.NewFakeClientManagerOrFatal(util.NewFakeTimeForEpoch()) - resourceManager := resource.NewResourceManager(clientManager) + resourceManager := resource.NewResourceManager(clientManager, &resource.ResourceManagerOptions{CollectMetrics: false}) defer clientManager.Close() server := &VisualizationServer{ diff --git a/backend/src/common/util/metrics.go b/backend/src/common/util/metrics.go new file mode 100644 index 0000000000..dc3ae8898c --- /dev/null +++ b/backend/src/common/util/metrics.go @@ -0,0 +1,60 @@ +// Copyright 2018 The Kubeflow Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +type MetricsChan chan prometheus.Metric + +// GetMetricValue get metric value from registered Collector +func GetMetricValue(collector prometheus.Collector) float64 { + var total float64 + collectValue(collector, func(m dto.Metric) { + // retrieves data if current collector is a gauge + // if not then retrieves from a counter + if gauge := m.GetGauge(); gauge != nil { + total += m.GetGauge().GetValue() + } else { + if counter := m.GetCounter(); counter != nil { + total += m.GetCounter().GetValue() + } else { + glog.Errorln("invalid type, only valid collectors are: gauge, counter") + total = 0 + } + } + }) + return total +} + +func collectValue(collector prometheus.Collector, do func(dto.Metric)) { + c := make(MetricsChan) + + // collect calls the function for each metric associated with the Collector + go func(c MetricsChan) { + collector.Collect(c) + close(c) + }(c) + + // range across distinct label vector values + for x := range c { + m := dto.Metric{} + _ = x.Write(&m) + do(m) + } +} diff --git a/backend/third_party_licenses/apiserver.csv b/backend/third_party_licenses/apiserver.csv index 643f932b04..defbefb63e 100644 --- a/backend/third_party_licenses/apiserver.csv +++ b/backend/third_party_licenses/apiserver.csv @@ -85,7 +85,7 @@ github.com/oliveagle/jsonpath,https://github.com/oliveagle/jsonpath/blob/2e52cf6 github.com/pelletier/go-toml,https://github.com/pelletier/go-toml/blob/v1.9.4/LICENSE,Apache-2.0 github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.4.0/LICENSE,Apache-2.0 github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/LICENSE,Apache-2.0 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 @@ -116,7 +116,7 @@ google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0 google.golang.org/api/internal/third_party/uritemplates,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/internal/third_party/uritemplates/LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/ini.v1,https://github.com/go-ini/ini/blob/v1.66.3/LICENSE,Apache-2.0 gopkg.in/jcmturner/aescts.v1,https://github.com/jcmturner/aescts/blob/v1.0.1/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/cache_server.csv b/backend/third_party_licenses/cache_server.csv index 20d1fe62be..85c20629e5 100644 --- a/backend/third_party_licenses/cache_server.csv +++ b/backend/third_party_licenses/cache_server.csv @@ -59,7 +59,7 @@ github.com/oliveagle/jsonpath,https://github.com/oliveagle/jsonpath/blob/2e52cf6 github.com/peterhellberg/duration,https://github.com/peterhellberg/duration/blob/ec6baeebcd10/LICENSE,MIT github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.4.0/LICENSE,Apache-2.0 github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/LICENSE,Apache-2.0 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 @@ -80,7 +80,7 @@ golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3- golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/jcmturner/aescts.v1,https://github.com/jcmturner/aescts/blob/v1.0.1/LICENSE,Apache-2.0 gopkg.in/jcmturner/dnsutils.v1,https://github.com/jcmturner/dnsutils/blob/v1.0.1/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/driver.csv b/backend/third_party_licenses/driver.csv index 2e0d171ebe..b05a884c4c 100644 --- a/backend/third_party_licenses/driver.csv +++ b/backend/third_party_licenses/driver.csv @@ -51,7 +51,7 @@ google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0 google.golang.org/api/internal/third_party/uritemplates,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/internal/third_party/uritemplates/LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT diff --git a/backend/third_party_licenses/launcher.csv b/backend/third_party_licenses/launcher.csv index 974640725a..4aba0f16d3 100644 --- a/backend/third_party_licenses/launcher.csv +++ b/backend/third_party_licenses/launcher.csv @@ -46,7 +46,7 @@ google.golang.org/api,https://github.com/googleapis/google-api-go-client/blob/v0 google.golang.org/api/internal/third_party/uritemplates,https://github.com/googleapis/google-api-go-client/blob/v0.70.0/internal/third_party/uritemplates/LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT diff --git a/backend/third_party_licenses/persistence_agent.csv b/backend/third_party_licenses/persistence_agent.csv index 31defe0c67..5b9630a8af 100644 --- a/backend/third_party_licenses/persistence_agent.csv +++ b/backend/third_party_licenses/persistence_agent.csv @@ -55,7 +55,7 @@ github.com/oklog/ulid,https://github.com/oklog/ulid/blob/v1.3.1/LICENSE,Apache-2 github.com/oliveagle/jsonpath,https://github.com/oliveagle/jsonpath/blob/2e52cf6e6852/LICENSE,MIT github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.4.0/LICENSE,Apache-2.0 github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/LICENSE,Apache-2.0 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 @@ -76,7 +76,7 @@ golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3- golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/jcmturner/aescts.v1,https://github.com/jcmturner/aescts/blob/v1.0.1/LICENSE,Apache-2.0 gopkg.in/jcmturner/dnsutils.v1,https://github.com/jcmturner/dnsutils/blob/v1.0.1/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/swf.csv b/backend/third_party_licenses/swf.csv index 54d644960f..c83fb5cc8e 100644 --- a/backend/third_party_licenses/swf.csv +++ b/backend/third_party_licenses/swf.csv @@ -60,7 +60,7 @@ github.com/oliveagle/jsonpath,https://github.com/oliveagle/jsonpath/blob/2e52cf6 github.com/pelletier/go-toml,https://github.com/pelletier/go-toml/blob/v1.9.4/LICENSE,Apache-2.0 github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.4.0/LICENSE,Apache-2.0 github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/LICENSE,Apache-2.0 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 @@ -86,7 +86,7 @@ golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3- golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause google.golang.org/genproto,https://github.com/googleapis/go-genproto/blob/1973136f34c6/LICENSE,Apache-2.0 google.golang.org/grpc,https://github.com/grpc/grpc-go/blob/v1.44.0/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/ini.v1,https://github.com/go-ini/ini/blob/v1.66.3/LICENSE,Apache-2.0 gopkg.in/jcmturner/aescts.v1,https://github.com/jcmturner/aescts/blob/v1.0.1/LICENSE,Apache-2.0 diff --git a/backend/third_party_licenses/viewer.csv b/backend/third_party_licenses/viewer.csv index f6589f7589..3087d8f58e 100644 --- a/backend/third_party_licenses/viewer.csv +++ b/backend/third_party_licenses/viewer.csv @@ -30,7 +30,7 @@ github.com/modern-go/reflect2,https://github.com/modern-go/reflect2/blob/v1.0.2/ github.com/munnerz/goautoneg,https://github.com/munnerz/goautoneg/blob/a7dc8b61c822/LICENSE,BSD-3-Clause github.com/pkg/errors,https://github.com/pkg/errors/blob/v0.9.1/LICENSE,BSD-2-Clause github.com/prometheus/client_golang/prometheus,https://github.com/prometheus/client_golang/blob/v1.12.1/LICENSE,Apache-2.0 -github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.2.0/LICENSE,Apache-2.0 +github.com/prometheus/client_model/go,https://github.com/prometheus/client_model/blob/v0.4.0/LICENSE,Apache-2.0 github.com/prometheus/common,https://github.com/prometheus/common/blob/v0.32.1/LICENSE,Apache-2.0 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,https://github.com/prometheus/common/blob/v0.32.1/internal/bitbucket.org/ww/goautoneg/README.txt,BSD-3-Clause github.com/prometheus/procfs,https://github.com/prometheus/procfs/blob/v0.7.3/LICENSE,Apache-2.0 @@ -42,7 +42,7 @@ golang.org/x/term,https://cs.opensource.google/go/x/term/+/v0.8.0:LICENSE,BSD-3- golang.org/x/text,https://cs.opensource.google/go/x/text/+/v0.9.0:LICENSE,BSD-3-Clause golang.org/x/time/rate,https://cs.opensource.google/go/x/time/+/90d013bb:LICENSE,BSD-3-Clause gomodules.xyz/jsonpatch/v2,https://github.com/gomodules/jsonpatch/blob/v2.2.0/v2/LICENSE,Apache-2.0 -google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.27.1/LICENSE,BSD-3-Clause +google.golang.org/protobuf,https://github.com/protocolbuffers/protobuf-go/blob/v1.30.0/LICENSE,BSD-3-Clause gopkg.in/inf.v0,https://github.com/go-inf/inf/blob/v0.9.1/LICENSE,BSD-3-Clause gopkg.in/yaml.v2,https://github.com/go-yaml/yaml/blob/v2.4.0/LICENSE,Apache-2.0 gopkg.in/yaml.v3,https://github.com/go-yaml/yaml/blob/v3.0.1/LICENSE,MIT diff --git a/go.mod b/go.mod index 30743ae074..2140e27775 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/peterhellberg/duration v0.0.0-20191119133758-ec6baeebcd10 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.12.1 + github.com/prometheus/client_model v0.4.0 github.com/robfig/cron v1.2.0 github.com/sirupsen/logrus v1.8.1 github.com/spf13/viper v1.10.1 @@ -48,7 +49,7 @@ require ( google.golang.org/genproto v0.0.0-20220310185008-1973136f34c6 google.golang.org/grpc v1.44.0 google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0 - google.golang.org/protobuf v1.27.1 + google.golang.org/protobuf v1.30.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.24.3 k8s.io/apimachinery v0.24.3 diff --git a/go.sum b/go.sum index 24aa560db5..bef3f379d6 100644 --- a/go.sum +++ b/go.sum @@ -1151,8 +1151,9 @@ github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrb github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= +github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= @@ -2071,8 +2072,9 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From 72e7f964c542ffd0bc485da9fce9ca5b047c3ab7 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 10 Oct 2023 14:53:17 -0700 Subject: [PATCH 200/253] feat(components): [text2sql] Initialize text2sql pipeline PiperOrigin-RevId: 572374590 --- .../model_evaluation/__init__.py | 2 + .../model_evaluation/text2sql/__init__.py | 14 +++++++ .../evaluation_llm_text2sql_pipeline.py | 42 +++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index 04cbe28d46..e41a453603 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -34,11 +34,13 @@ from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp +from google_cloud_pipeline_components._implementation.model_evaluation.text2sql.evaluation_llm_text2sql_pipeline import evaluation_llm_text2sql_pipeline __all__ = [ 'evaluation_llm_safety_bias_pipeline', 'evaluation_llm_embedding_pipeline', + 'evaluation_llm_text2sql_pipeline', 'ChunkingOp', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/__init__.py new file mode 100644 index 0000000000..d0127e1ee4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation Text2SQL Pipeline.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py new file mode 100644 index 0000000000..9ce8270e75 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -0,0 +1,42 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Text2SQL evaluation pipeline.""" + +from google_cloud_pipeline_components.types import artifact_types +import kfp + + +_PIPELINE_NAME = 'evaluation_llm_text2sql_pipeline' + + +@kfp.dsl.pipeline(name=_PIPELINE_NAME) +def evaluation_llm_text2sql_pipeline( + location: str, + model_name: str, +): + """The LLM Evaluation Text2SQL Pipeline. + + Args: + location: Required. The GCP region that runs the pipeline components. + model_name: The path for model to generate embeddings. + """ + + get_vertex_model_task = kfp.dsl.importer( + artifact_uri=( + f'https://{location}-aiplatform.googleapis.com/v1/{model_name}' + ), + artifact_class=artifact_types.VertexModel, + metadata={'resourceName': model_name}, + ) + get_vertex_model_task.set_display_name('get-vertex-model') From 9cf92c31619111ba218a876c292ca4f2a45096c6 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Tue, 10 Oct 2023 14:54:49 -0700 Subject: [PATCH 201/253] feat(components): Release new model evaluation image versions PiperOrigin-RevId: 572375018 --- .../_implementation/model_evaluation/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py index 3bd9886e11..f5bc1ab908 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/version.py @@ -13,8 +13,8 @@ # limitations under the License. """Version constants for model evaluation components.""" -_EVAL_VERSION = 'v0.9.2' -_LLM_EVAL_VERSION = 'v0.2' +_EVAL_VERSION = 'v0.9.3' +_LLM_EVAL_VERSION = 'v0.3' _EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/model-evaluation' _LLM_EVAL_IMAGE_NAME = 'gcr.io/ml-pipeline/llm-model-evaluation' From 3c5f62a6a32669736f73f54c79ab6b0d04349c6f Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Tue, 10 Oct 2023 16:02:19 -0700 Subject: [PATCH 202/253] feat(backend): Update driver and launcher images (#10076) --- backend/Dockerfile.driver | 2 +- backend/Dockerfile.launcher | 2 +- backend/src/v2/compiler/argocompiler/argo.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/Dockerfile.driver b/backend/Dockerfile.driver index 78b2eb3133..5ffc60a4aa 100644 --- a/backend/Dockerfile.driver +++ b/backend/Dockerfile.driver @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM golang:1.20.4-alpine3.17 as builder +FROM golang:1.20.9-alpine3.17 as builder WORKDIR /go/src/github.com/kubeflow/pipelines COPY . . diff --git a/backend/Dockerfile.launcher b/backend/Dockerfile.launcher index b936267ac9..4269ec52ef 100644 --- a/backend/Dockerfile.launcher +++ b/backend/Dockerfile.launcher @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM golang:1.20.4-alpine3.17 as builder +FROM golang:1.20.9-alpine3.17 as builder WORKDIR /go/src/github.com/kubeflow/pipelines COPY . . diff --git a/backend/src/v2/compiler/argocompiler/argo.go b/backend/src/v2/compiler/argocompiler/argo.go index 3e4f33842d..dc9dcd6457 100644 --- a/backend/src/v2/compiler/argocompiler/argo.go +++ b/backend/src/v2/compiler/argocompiler/argo.go @@ -116,8 +116,8 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S wf: wf, templates: make(map[string]*wfapi.Template), // TODO(chensun): release process and update the images. - driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:0ce9bf20ac9cbb21e84ff0762d5ae508d21e9c85fde2b14b51363bd1b8cd7528", - launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:80cf120abd125db84fa547640fd6386c4b2a26936e0c2b04a7d3634991a850a4", + driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:fa68f52639b4f4683c9f8f468502867c9663823af0fbcff1cbe7847d5374bf5c", + launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:6641bf94acaeec03ee7e231241800fce2f0ad92eee25371bd5248ca800a086d7", job: job, spec: spec, executors: deploy.GetExecutors(), From 1f37243ec1d1a49af25f7de38b22b068edc18705 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 10 Oct 2023 17:17:08 -0700 Subject: [PATCH 203/253] feat(components): Update embedding pipeline containers to use llm-pipeline image PiperOrigin-RevId: 572411103 --- .../model_evaluation/llm_embedding_retrieval/component.py | 8 +++----- .../llm_information_retrieval_preprocessor/component.py | 6 ++---- .../model_evaluation/llm_retrieval_metrics/component.py | 5 ++--- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py index da49f11904..4f481e5b4b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_embedding_retrieval/component.py @@ -15,16 +15,14 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils -from kfp.dsl import container_component +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import Artifact +from kfp.dsl import container_component from kfp.dsl import Input from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' - - @container_component def llm_embedding_retrieval( gcp_resources: OutputPath(str), @@ -114,7 +112,7 @@ def llm_embedding_retrieval( custom_job_payload=utils.build_custom_job_payload( display_name=display_name, machine_type=machine_type, - image_uri=_IMAGE_URI, + image_uri=version.LLM_EVAL_IMAGE_TAG, args=[ f'--embedding_retrieval={True}', f'--project={project}', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py index a17faa0ff2..ee9d3fecdc 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_information_retrieval_preprocessor/component.py @@ -15,14 +15,12 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' - - @container_component def llm_information_retrieval_preprocessor( gcp_resources: OutputPath(str), @@ -129,7 +127,7 @@ def llm_information_retrieval_preprocessor( custom_job_payload=utils.build_custom_job_payload( display_name=display_name, machine_type=machine_type, - image_uri=_IMAGE_URI, + image_uri=version.LLM_EVAL_IMAGE_TAG, args=[ f'--information_retrieval_preprocessor={True}', f'--project={project}', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py index 3dfe2d4ff0..e6ffed536d 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/llm_retrieval_metrics/component.py @@ -2,14 +2,13 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version from kfp.dsl import container_component from kfp.dsl import Metrics from kfp.dsl import Output from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER -_IMAGE_URI = 'us-docker.pkg.dev/vertex-evaluation/public/llm:v0.3' - @container_component def llm_retrieval_metrics( @@ -99,7 +98,7 @@ def llm_retrieval_metrics( custom_job_payload=utils.build_custom_job_payload( display_name=display_name, machine_type=machine_type, - image_uri=_IMAGE_URI, + image_uri=version.LLM_EVAL_IMAGE_TAG, args=[ f'--retrieval_metrics={True}', f'--project={project}', From 5c44143742213c11821d4775d7fda50724747032 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 10 Oct 2023 20:30:54 -0700 Subject: [PATCH 204/253] feat(components): Implement timestamp filtering for chunking PiperOrigin-RevId: 572444659 --- .../_implementation/model_evaluation/chunking/component.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py index 291f480b9c..9e3cda2a4b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/chunking/component.py @@ -30,6 +30,8 @@ def chunking( location: str, input_text_gcs_dir: str, output_bq_destination: str, + output_text_gcs_dir: str, + generation_threshold_microseconds: str, display_name: str = 'chunking', machine_type: str = 'n1-standard-8', service_account: str = '', @@ -45,6 +47,9 @@ def chunking( include '/' at the end of the path. output_bq_destination: The BigQuery table URI where the component will write chunks to. + output_text_gcs_dir: The GCS folder to hold intermediate data. + generation_threshold_microseconds: only files created on/after this + generation threshold will be processed, in microseconds. display_name: The name of the chunking job/component. machine_type: The machine type of this custom job. service_account: Sets the default service account for workload run-as @@ -79,6 +84,8 @@ def chunking( f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--input_text_gcs_dir={input_text_gcs_dir}', f'--output_bq_destination={output_bq_destination}', + f'--output_text_gcs_dir={output_text_gcs_dir}', + f'--generation_threshold_microseconds={generation_threshold_microseconds}', f'--gcp_resources={gcp_resources}', '--executor_input={{$.json_escape[1]}}', ], From b348911974ec489dda349f4dd7e82ffef4f4487e Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 11 Oct 2023 10:47:44 -0700 Subject: [PATCH 205/253] chore(components): pin GCPC's `google-cloud-aiplatform` dependency PiperOrigin-RevId: 572621401 --- components/google-cloud/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 6e45b90e6e..36e5e5e913 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -28,8 +28,7 @@ RUN pip3 install -U google-cloud-storage RUN pip3 install -U google-api-python-client # Required by dataflow_launcher -# b/238481913: Pinning the version of apache_beam to below 2.34 for now -RUN pip3 install -U "apache_beam[gcp]<2.34.0" +RUN pip3 install -U "apache_beam[gcp]" # Required for sklearn/train_test_split_jsonl RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn<=1.0.2" From 8f07661ae96921a9245c43f2385a92ebcf69978c Mon Sep 17 00:00:00 2001 From: Changyu Zhu Date: Wed, 11 Oct 2023 12:54:49 -0700 Subject: [PATCH 206/253] feat(components): Add vision data converter component to preview PiperOrigin-RevId: 572660064 --- components/google-cloud/RELEASE.md | 1 + .../preview/automl/vision/__init__.py | 20 ++ .../preview/automl/vision/data_converter.py | 187 ++++++++++++++++++ 3 files changed, 208 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index c7d7a82f5f..5e3e393e9b 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,6 +1,7 @@ ## Upcoming release * Upload tensorboard metrics from `preview.llm.rlhf_pipeline` if a `tensorboard_resource_id` is provided at runtime. * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. +* Add `preview.automl.vision` and `DataConverterJobOp`. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/__init__.py new file mode 100644 index 0000000000..d854e1cd11 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""AutoML Vision components.""" + +from google_cloud_pipeline_components.preview.automl.vision.data_converter import data_converter as DataConverterJobOp + +__all__ = [ + 'DataConverterJobOp', +] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py new file mode 100644 index 0000000000..6e6b108aa9 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py @@ -0,0 +1,187 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""AutoML Vision training data converter.""" + +from typing import Optional + +from google_cloud_pipeline_components import _image +from google_cloud_pipeline_components import _placeholders +from kfp import dsl + + +# pylint: disable=g-doc-args +@dsl.container_component +def data_converter( + display_name: str, + input_file_path: str, + input_file_type: str, + objective: str, + output_dir: str, + gcp_resources: dsl.OutputPath(str), + location: str = 'us-central1', + timeout: str = '604800s', + service_account: Optional[str] = None, + machine_type: str = 'n1-highmem-4', + output_shape: Optional[str] = None, + split_ratio: Optional[str] = None, + num_shard: Optional[str] = None, + output_fps: Optional[int] = None, + num_frames: Optional[int] = None, + min_duration_sec: Optional[float] = None, + pos_neg_ratio: Optional[float] = None, + encryption_spec_key_name: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, +): + # fmt: off + """Runs AutoML Vision data conversion. It will be launched as a Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. + + Args: + display_name: The name of the CustomJob. + input_file_path: Input file path. Please refer to different input formats in Vertex AI Documentation. For example, [image classification prepare data](https://cloud.google.com/vertex-ai/docs/image-data/classification/prepare-data) page. + input_file_type: 'csv', 'jsonl', or 'coco_json'. Must be one of the input file types supported by the objective. + objective: One of 'icn', 'iod', 'isg', 'vcn', or 'var'. + output_dir: Cloud Storage directory for storing converted data and pipeline information. + location: Location for creating the custom training job. If not set, default to us-central1. + timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". + service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + machine_type: [Machine type](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types) for the CustomJob. If conversion failed, consider using a machine type with more RAM or splitting dataset into smaller pieces. + output_shape: Video only. Output shape (height,width) for video frames. + split_ratio: Proportion of data to split into train/validation/test, separated by comma. + num_shard: Number of train/validation/test shards, separated by comma. + output_fps: Video only. Output frames per second. + num_frames: VAR only. Number of frames inside a single video clip window. + min_duration_sec: VAR only. Minimum duration of a video clip annotation in seconds. + pos_neg_ratio: VAR only. Sampling ratio between positive and negative segments. + encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. + project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. + Returns: + gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. + """ + # fmt: on + return dsl.ContainerSpec( + image=_image.GCPC_IMAGE_TAG, + command=[ + 'python3', + '-u', + '-m', + 'google_cloud_pipeline_components.container.v1.custom_job.launcher', + ], + args=[ + '--type', + 'CustomJob', + '--payload', + dsl.ConcatPlaceholder([ + '{', + '"display_name": "', + display_name, + '",', + '"job_spec": {', + '"worker_pool_specs": [{', + '"machine_spec": {', + '"machine_type": "', + machine_type, + '"},', + '"replica_count": 1,', + '"container_spec": {', + ( + '"image_uri":' + ' "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter",' + ), + '"args": [', + '"--input_file_path", "', + input_file_path, + '",', + '"--input_file_type", "', + input_file_type, + '",', + '"--objective", "', + objective, + '",', + '"--output_dir", "', + output_dir, + '"', + dsl.IfPresentPlaceholder( + input_name='output_shape', + then=dsl.ConcatPlaceholder( + [',"--output_shape","', output_shape, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='split_ratio', + then=dsl.ConcatPlaceholder( + [',"--split_ratio","', split_ratio, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='num_shard', + then=dsl.ConcatPlaceholder( + [',"--num_shard","', num_shard, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='output_fps', + then=dsl.ConcatPlaceholder( + [',"--output_fps","', output_fps, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='num_frames', + then=dsl.ConcatPlaceholder( + [',"--num_frames","', num_frames, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='min_duration_sec', + then=dsl.ConcatPlaceholder( + [',"--min_duration_sec","', min_duration_sec, '"'] + ), + ), + dsl.IfPresentPlaceholder( + input_name='pos_neg_ratio', + then=dsl.ConcatPlaceholder( + [',"--pos_neg_ratio","', pos_neg_ratio, '"'] + ), + ), + ']}}],', + '"scheduling": {', + '"timeout": "', + timeout, + '"', + '},', + dsl.IfPresentPlaceholder( + input_name='service_account', + then=dsl.ConcatPlaceholder( + ['"service_account": "', service_account, '",'] + ), + ), + '"enable_web_access": false,', + '"base_output_directory": {', + '"output_uri_prefix": "', + output_dir, + '"', + '}},', + '"encryption_spec": {', + '"kms_key_name": "', + encryption_spec_key_name, + '"', + '}}', + ]), + '--project', + project, + '--location', + location, + '--gcp_resources', + gcp_resources, + ], + ) From efe630cf80e79b815a3a37a1c7fca7c3a56fae21 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Wed, 11 Oct 2023 20:27:27 +0000 Subject: [PATCH 207/253] chore(release): bumped version to 2.0.2 --- CHANGELOG.md | 113 ++++++++++++++++++ VERSION | 2 +- .../api/v1beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v1beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../api/v2beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v2beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../templates/application.yaml | 2 +- manifests/gcp_marketplace/schema.yaml | 4 +- .../base/cache-deployer/kustomization.yaml | 2 +- .../kustomize/base/cache/kustomization.yaml | 2 +- .../generic/pipeline-install-config.yaml | 4 +- .../base/metadata/base/kustomization.yaml | 2 +- .../base/pipeline/kustomization.yaml | 12 +- .../metadata-writer/kustomization.yaml | 2 +- .../env/gcp/inverse-proxy/kustomization.yaml | 2 +- 23 files changed, 146 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c4a74290a..db4d260733 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,118 @@ # Changelog +### [2.0.2](https://github.com/kubeflow/pipelines/compare/2.0.0...2.0.2) (2023-10-11) + + +### Features + +* **backend:** add postgres initialization ([\#9798](https://github.com/kubeflow/pipelines/issues/9798)) ([e1f0c01](https://github.com/kubeflow/pipelines/commit/e1f0c010f80031ea09af69f9bbedf2e24509605f)) +* **backend:** Added metrics to be collected from failed/successful workflows ([\#9576](https://github.com/kubeflow/pipelines/issues/9576)) ([5835824](https://github.com/kubeflow/pipelines/commit/5835824e9cca76af70b733e7d494bb4bbdd8e2b7)) +* **backend:** enforce SA Token based auth b/w Persistence Agent and Pipeline API Server ([\#9957](https://github.com/kubeflow/pipelines/issues/9957)) ([760c158](https://github.com/kubeflow/pipelines/commit/760c1589edbe58bbd77611222a66a17b371a0d08)) +* **backend:** Update driver and launcher images ([\#10076](https://github.com/kubeflow/pipelines/issues/10076)) ([3c5f62a](https://github.com/kubeflow/pipelines/commit/3c5f62a6a32669736f73f54c79ab6b0d04349c6f)) +* **chore:** Change AutoML Vision Error Analysis pipeline names ([450e910](https://github.com/kubeflow/pipelines/commit/450e9108172b5a4eb76abb6647bb65661581747a)) +* **components:** [text2sql] Initialize text2sql pipeline ([72e7f96](https://github.com/kubeflow/pipelines/commit/72e7f964c542ffd0bc485da9fce9ca5b047c3ab7)) +* **components:** add `persistent_resource_id` to preview GCPC custom job components/utils ([fc1f12b](https://github.com/kubeflow/pipelines/commit/fc1f12b7bd2f28390c838abcf3dd020723ad573a)) +* **components:** Add AutoML image training job v1 remote runner ([df4bc46](https://github.com/kubeflow/pipelines/commit/df4bc46725798d27a32c3935e48dae2384e7d4b9)) +* **components:** Add Feature Attribution components to _implementation/model_evaluation. Add LLM Eval text generation and text classification pipelines to preview namespace init file ([f454a86](https://github.com/kubeflow/pipelines/commit/f454a86177b85b5cc11a7c57f63fa7f03f45604c)) +* **components:** Add helper functions to create slice_specs and bias_configs ([95901c8](https://github.com/kubeflow/pipelines/commit/95901c88302c61e6cdc33ddd2cd96ab65663e881)) +* **components:** Add LLM implementation component that uploads tensorboard metrics after training ([b273aab](https://github.com/kubeflow/pipelines/commit/b273aabb894338c85093b0fb564bb5b3094e36b7)) +* **components:** Add main entry and command-line flags for Templated Custom Job Launcher ([f8f01bc](https://github.com/kubeflow/pipelines/commit/f8f01bcd08ba30bb8ac902843468984fdc662033)) +* **components:** Add rlhf and infer pipelines to preview/llm. Add llm related components to _implementation/llm ([611298a](https://github.com/kubeflow/pipelines/commit/611298a8ee68f406e09009debb909a44de0ae99e)) +* **components:** Add sampling_strategy parameter to bulk inferrer to support different strategy. By default, we use greedy ([e21174f](https://github.com/kubeflow/pipelines/commit/e21174f94aa75f48b6ae99f4c4b64f82d91bffd9)) +* **components:** Add support for customizing evaluation_display_name in model evaluation pipelines ([e8b8450](https://github.com/kubeflow/pipelines/commit/e8b8450e0a9501eca130b02d2cf2995b994d02c3)) +* **components:** add Vertex RAI safety bias evaluation pipeline ([b630d5c](https://github.com/kubeflow/pipelines/commit/b630d5c8ae7559be0011e67f01e3aec1946ef765)) +* **components:** Add vision data converter component to preview ([8f07661](https://github.com/kubeflow/pipelines/commit/8f07661ae96921a9245c43f2385a92ebcf69978c)) +* **components:** Allow ImportModelEvaluationOp to take LLM metrics from --metrics and --problem_type parameters ([d331ca0](https://github.com/kubeflow/pipelines/commit/d331ca0204359d67e03fcd9b903ed2eff2b299a6)) +* **components:** Components for Embedding Eval pipeline ([562cd29](https://github.com/kubeflow/pipelines/commit/562cd299cc25244b9b3d900cd8c6cb86142f8326)) +* **components:** define new GCPC Model Eval component for LLM Text Generation ([a634eef](https://github.com/kubeflow/pipelines/commit/a634eef3ec541ee64eb0220d5db12b82f682479e)) +* **components:** Embedding eval pipeline for experimental launch ([cac1856](https://github.com/kubeflow/pipelines/commit/cac185653317326459ff1f4a107b86c29aedaf59)) +* **components:** fork a subset of `v1` `custom_job` and `gcp_launcher` container code to `preview` ([abf05f4](https://github.com/kubeflow/pipelines/commit/abf05f48191b214bf5e993cd4cc725ff793d544c)) +* **components:** Implement `create_templated_custom_job` for Templated Custom Job Launcher ([e307545](https://github.com/kubeflow/pipelines/commit/e307545e689516c1249d1211e4131db49a346ffd)) +* **components:** Implement chunking for embedding evaluation pipeline ([0ced6ec](https://github.com/kubeflow/pipelines/commit/0ced6ec7d2846faefc655bad5ea549f81cfcd373)) +* **components:** Implement helper functions for Jinja2-based Templated Custom Job Launcher ([8518e95](https://github.com/kubeflow/pipelines/commit/8518e95efcdb2c78a4ae719af66b72caac082267)) +* **components:** Implement LLM Safety Bias Component & E2E tests ([ae804f4](https://github.com/kubeflow/pipelines/commit/ae804f471cb5ad7e4ba70ee44bf958a4a909d2a9)) +* **components:** Implement the chunking component ([a76e385](https://github.com/kubeflow/pipelines/commit/a76e385b20b30f9a974139bc4d6d04f8517441f2)) +* **components:** Implement timestamp filtering for chunking ([5c44143](https://github.com/kubeflow/pipelines/commit/5c44143742213c11821d4775d7fda50724747032)) +* **components:** internal change ([0670337](https://github.com/kubeflow/pipelines/commit/067033762db315f83b84cfe1d6dc039c96a0e9f2)) +* **components:** Internal change ([ff90cea](https://github.com/kubeflow/pipelines/commit/ff90ceae9a4f403a14dd01c5468068c6079d511f)) +* **components:** Internal change ([1dc8453](https://github.com/kubeflow/pipelines/commit/1dc84534d406e1b3fd683fbc1504587e22d5f5d8)) +* **components:** Internal change ([f80d2b3](https://github.com/kubeflow/pipelines/commit/f80d2b30e4c7d05c0f511f3bf3d6dd102a9a578a)) +* **components:** Internal change ([f8c1f9c](https://github.com/kubeflow/pipelines/commit/f8c1f9cf21d2472017c1a07319d3fab4c22fa7b5)) +* **components:** Metric importing for embedding evaluation ([47f1147](https://github.com/kubeflow/pipelines/commit/47f11475c5b19cc99a49962527723fff5f85aa5a)) +* **components:** Move model & data bias components to preview ([314daa4](https://github.com/kubeflow/pipelines/commit/314daa4a852916c098cd4c126fce8745f4778deb)) +* **components:** Output imported evaluation resource name in ImportModelEvaluationOp ([c27d23a](https://github.com/kubeflow/pipelines/commit/c27d23a79455e18dc6ad362d2ee7353028f32ca6)) +* **components:** Release new model evaluation image versions ([9cf92c3](https://github.com/kubeflow/pipelines/commit/9cf92c31619111ba218a876c292ca4f2a45096c6)) +* **components:** Review and update batch_predict_job GCPC docstrings ([ea7a5ef](https://github.com/kubeflow/pipelines/commit/ea7a5efb97e5aa14dbf58ab55aa2f68b1ddc5941)) +* **components:** Support multiple chunking functions ([4945e2c](https://github.com/kubeflow/pipelines/commit/4945e2cfc5848898727e608ab2f9c607bb99dc92)) +* **components:** Switch v1 AutoMLImageTrainingJob to use the pipeline remote runner ([15c24e3](https://github.com/kubeflow/pipelines/commit/15c24e344f4c624d151116e548c470be935b8e70)) +* **components:** Update container URIs for embedding eval components ([f43272d](https://github.com/kubeflow/pipelines/commit/f43272dee8c40563ee05c07d9e1de56c4ba7c08f)) +* **components:** Update default image tag used by LLM implementation components ([b31d8a5](https://github.com/kubeflow/pipelines/commit/b31d8a57ef5db67a8cd782d7ab60f7e5b131ae7a)) +* **components:** Update embedding pipeline containers to use llm-pipeline image ([1f37243](https://github.com/kubeflow/pipelines/commit/1f37243ec1d1a49af25f7de38b22b068edc18705)) +* **components:** Update policy to reward model name mapping in function based component in _implementation/llm ([110e082](https://github.com/kubeflow/pipelines/commit/110e0824812883b74c73b26603a78d8cc00548d5)) +* **components:** Update RAI safety component with latest image ([af753dc](https://github.com/kubeflow/pipelines/commit/af753dc645ea2630a07dce3c0a1287ee3d2d5c87)) +* **components:** Update supported large model reference names that can be resolved by function based component in _implementation/llm ([9ce2866](https://github.com/kubeflow/pipelines/commit/9ce28665276a74184339ee86c8ca84f8368fb8b4)) +* **components:** Upgrade LLM evaluation classification and text generation pipelines to preview ([b350ac4](https://github.com/kubeflow/pipelines/commit/b350ac4ddc32bd699c4cf92e3f6774088fb89f4f)) +* **components:** Upload tensorboard metrics from RLHF pipeline if a tensorboard resource id is provided ([2bb57ec](https://github.com/kubeflow/pipelines/commit/2bb57ece351757ab5aefa57c74fda397425abd00)) +* **components:** Use 64 v3 TPUs for llm pipelines ([45fe8e8](https://github.com/kubeflow/pipelines/commit/45fe8e86583646143d1685d9e04d887ff27440ee)) +* **components:** use GCPC project id placeholder as project parameter default ([88e1045](https://github.com/kubeflow/pipelines/commit/88e1045c116a6dc8adac83b5936821fe2ef9b263)) +* **components:** Use t5-xl reward model when tuning t5-xxl ([6468b4d](https://github.com/kubeflow/pipelines/commit/6468b4db11c2cd60a7b2dba7482ab170a129982d)) +* **deployment:** add option to deploy mysql in KFP standalone ([\#9855](https://github.com/kubeflow/pipelines/issues/9855)) ([b086020](https://github.com/kubeflow/pipelines/commit/b086020a249bd7c99ceaf54b6c8d4535f9f73df0)) +* **eval:** Implement embedding metrics importing ([b71d43e](https://github.com/kubeflow/pipelines/commit/b71d43eff3d4cf95b9b67eb3a890524f9d115807)) +* **manifests:** Add a postgresql deployment manifest in third-party folder ([\#9581](https://github.com/kubeflow/pipelines/issues/9581)) ([49bfda9](https://github.com/kubeflow/pipelines/commit/49bfda90cadc6437173909dea5b02cffc7cd7e66)) +* **mlmd:** Introduce PostgreSQL kustomization for MLMD. ([\#9927](https://github.com/kubeflow/pipelines/issues/9927)) ([b6be4ea](https://github.com/kubeflow/pipelines/commit/b6be4ea79bd4828e48f78eba5d69ef332524f309)) +* **sdk:** add logging at end of executor execution ([\#9895](https://github.com/kubeflow/pipelines/issues/9895)) ([ef0788d](https://github.com/kubeflow/pipelines/commit/ef0788d98690d0c70f747d8900ed719ce1328b35)) +* **sdk:** enable dependency-free runtime install of kfp ([\#9886](https://github.com/kubeflow/pipelines/issues/9886)) ([cf0e0cf](https://github.com/kubeflow/pipelines/commit/cf0e0cf87c7a33f906e2bc31da8c5356ed75a831)) +* **sdk:** support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` ([\#10010](https://github.com/kubeflow/pipelines/issues/10010)) ([e99f270](https://github.com/kubeflow/pipelines/commit/e99f2704fc164039d9106a76223ee4abf9402bfb)) +* **sdk:** support dsl.If, dsl.Elif, and dsl.Else ([\#9894](https://github.com/kubeflow/pipelines/issues/9894)) ([c6b236d](https://github.com/kubeflow/pipelines/commit/c6b236d1a0a2385421e823512bd4c37041f1af26)) +* Adding new test infrastructure for e2e pipeline tests ([d98fa90](https://github.com/kubeflow/pipelines/commit/d98fa90bff79f3c13853d6ed9044c308253deba4)) + + +### Bug Fixes + +* **backend:** Fix performance issue within a mysql request ([\#9680](https://github.com/kubeflow/pipelines/issues/9680)) ([213dd5a](https://github.com/kubeflow/pipelines/commit/213dd5a1afc436ea207a466f69af3f6bd528b058)) +* **backend:** fix timeouts with list run api. Fixes [\#9780](https://github.com/kubeflow/pipelines/issues/9780) ([\#9806](https://github.com/kubeflow/pipelines/issues/9806)) ([a6af41c](https://github.com/kubeflow/pipelines/commit/a6af41c23be0fdc2a038c8b46725faa49e8909c1)) +* **backend:** Move ConMaxLifeTime back to DbConfig.ConMaxLifeTime. ([\#9873](https://github.com/kubeflow/pipelines/issues/9873)) ([fe60742](https://github.com/kubeflow/pipelines/commit/fe60742b000763b0d589d3124b544091a0aa29fb)) +* **backend:** OutPutPath dir creation mode Fixes [\#7629](https://github.com/kubeflow/pipelines/issues/7629) ([\#9946](https://github.com/kubeflow/pipelines/issues/9946)) ([4003e56](https://github.com/kubeflow/pipelines/commit/4003e562713bd04fa94387d8b53dfbe3cf31cb12)) +* **backend:** Sync scheduled workflows v1 if APIVersion and Kind are missing. Fixes [\#9809](https://github.com/kubeflow/pipelines/issues/9809) ([\#9968](https://github.com/kubeflow/pipelines/issues/9968)) ([dcaafee](https://github.com/kubeflow/pipelines/commit/dcaafeee8b98e2733444455e7117b628f017422d)) +* **backend:** update requirements scripts ([\#10009](https://github.com/kubeflow/pipelines/issues/10009)) ([434b41a](https://github.com/kubeflow/pipelines/commit/434b41a19c983432e5f1ba218ac29e5075604db9)) +* **components:** Disable caching for LLM pipeline components that store temporary artifacts ([4fd1c02](https://github.com/kubeflow/pipelines/commit/4fd1c02fc0a17d4d1272dde69d81c0bbb1fe18b4)) +* **components:** fix parent_model parameter of ModelUploadOp ignored ([5a0e2bd](https://github.com/kubeflow/pipelines/commit/5a0e2bdef086cdcb96c7a33ff6d883cd063cb375)) +* **components:** Fix proto reference from range to _range in model evaluation preview utils function ([f323acf](https://github.com/kubeflow/pipelines/commit/f323acf4eba80d9909fa23dfafff0ef8adcf05a9)) +* **components:** Fix the feature transform engine arguments ([42df5e1](https://github.com/kubeflow/pipelines/commit/42df5e1301de4300c2b84205ef22c786f791d4c3)) +* **components:** Have RLHF importer use default image if override is falsy ([3b8cea0](https://github.com/kubeflow/pipelines/commit/3b8cea060fc3088520666fea26e6452bda2fdb15)) +* **components:** include model version in upload model output artifact (fix) ([b0cccfe](https://github.com/kubeflow/pipelines/commit/b0cccfee9432d2e787ba1f74eb8beb906222bea8)) +* **components:** Minor update for chunking parameter name ([07156ae](https://github.com/kubeflow/pipelines/commit/07156ae8a6c59b378de3a8e960cd7c703130037a)) +* **components:** Move model eval version.py to _implementation folder ([7f23bfc](https://github.com/kubeflow/pipelines/commit/7f23bfc88813e54927e46f1b019ea86c0f03ce70)) +* **components:** Update package import for google protobuf ([2cfe463](https://github.com/kubeflow/pipelines/commit/2cfe4636ae362efff177329143feb6f7f6f6a8a5)) +* **frontend:** content is not available ([\#9720](https://github.com/kubeflow/pipelines/issues/9720)) ([e137ae7](https://github.com/kubeflow/pipelines/commit/e137ae7faccad207fedbeeff80f8502e49a1fbc5)) +* **frontend:** Introduce ALLOWED_ARTIFACT_DOMAIN_REGEX flag to prevent accessing undesired domains. Remove user input string from server response. ([\#9844](https://github.com/kubeflow/pipelines/issues/9844)) ([83d7e71](https://github.com/kubeflow/pipelines/commit/83d7e719d08c73c2c535722b66b77cdf0cb4cd08)) +* **frontend:** Missing pipeline version name in new run page. ([\#9799](https://github.com/kubeflow/pipelines/issues/9799)) ([0153430](https://github.com/kubeflow/pipelines/commit/0153430206567e5c50c878bc7b2fcdf0a79817c0)) +* **frontend:** Recurring run card in experiment details page ([\#9697](https://github.com/kubeflow/pipelines/issues/9697)) ([d1be1d9](https://github.com/kubeflow/pipelines/commit/d1be1d9ffdbf4a8bcf9aa8df36ec22ac182ceb7e)) +* **frontend:** Splitting logsDetails into lines based on CR and LF. Fixes [\#9593](https://github.com/kubeflow/pipelines/issues/9593) ([\#9594](https://github.com/kubeflow/pipelines/issues/9594)) ([536d93a](https://github.com/kubeflow/pipelines/commit/536d93a1bffe035bf5222e7a48faf59d3b053800)) +* **manifests:** Update persistence agent manifests in marketplace helm chart ([\#9908](https://github.com/kubeflow/pipelines/issues/9908)) ([0fed207](https://github.com/kubeflow/pipelines/commit/0fed207e40535825a74c8b228ad2e9fce87b0a71)) +* **samples:** Update execution_order, loop_output samples to v2 pipelines ([\#9867](https://github.com/kubeflow/pipelines/issues/9867)) ([3e3a747](https://github.com/kubeflow/pipelines/commit/3e3a747b582abf21d95149502343d8efdfa5fc62)) +* **samples:** Update loop_parameter, loop_static samples to v2 pipelines ([\#9870](https://github.com/kubeflow/pipelines/issues/9870)) ([ff2e002](https://github.com/kubeflow/pipelines/commit/ff2e002157472cd69eef74c2010756797e4ed460)) +* **samples:** update samples to v2 pipelines ([\#9851](https://github.com/kubeflow/pipelines/issues/9851)) ([1002e0c](https://github.com/kubeflow/pipelines/commit/1002e0cf8cbb452c8839d4631ce90851e293581c)) +* **samples:** Update volume_ops sample to v2 pipelines ([\#9877](https://github.com/kubeflow/pipelines/issues/9877)) ([90cec16](https://github.com/kubeflow/pipelines/commit/90cec167c0e49e115910928b00b5c5e50eaeed7c)) +* **sdk:** fix --no-deps flag usage ([\#9982](https://github.com/kubeflow/pipelines/issues/9982)) ([cc2cd58](https://github.com/kubeflow/pipelines/commit/cc2cd5891822ff841d4447dfd097764d26a2dda5)) +* **sdk:** fix click dependency bug ([\#9634](https://github.com/kubeflow/pipelines/issues/9634)) ([92c02a6](https://github.com/kubeflow/pipelines/commit/92c02a61723d4f19882ffcf6fd9f82a152a8a576)) +* **sdk:** fix GCPC break in KFP SDK ([\#9791](https://github.com/kubeflow/pipelines/issues/9791)) ([540294a](https://github.com/kubeflow/pipelines/commit/540294aedb9622b13063fdbee287411e68ba656a)) +* **sdk:** fix GitHub release script ([\#9663](https://github.com/kubeflow/pipelines/issues/9663)) ([e92d8bc](https://github.com/kubeflow/pipelines/commit/e92d8bc7228159cbec8c3ffbe51501a83ac99622)) +* **sdk:** fix incorrect sub-DAG output type when using `dsl.Collected` ([\#10069](https://github.com/kubeflow/pipelines/issues/10069)) ([fcdff29](https://github.com/kubeflow/pipelines/commit/fcdff294a6323f6cb1c0e574fc7aa5ccc25e420b)) +* Move stale GHA operation config to the right place ([\#9935](https://github.com/kubeflow/pipelines/issues/9935)) ([63a0803](https://github.com/kubeflow/pipelines/commit/63a0803e3a355b7cade2ddef69e7b57d96707436)) +* **sdk:** fix kfp sdk v2 readme ([\#9668](https://github.com/kubeflow/pipelines/issues/9668)) ([e5fe981](https://github.com/kubeflow/pipelines/commit/e5fe981c1af88b02122eeb1a46fead3a26993aeb)) +* **sdk:** various kfp-dsl fixes ([\#9785](https://github.com/kubeflow/pipelines/issues/9785)) ([8ad9716](https://github.com/kubeflow/pipelines/commit/8ad97167ff9ea589d396728fdec4413fd559ade1)) + + +### Other Pull Requests + +* Intel oneAPI XGBoost daal4py example pipeline ([\#10044](https://github.com/kubeflow/pipelines/issues/10044)) ([271d4eb](https://github.com/kubeflow/pipelines/commit/271d4ebfafa5a3fab7f100212fd14e1eb28421bd)) +* No public description ([adb8677](https://github.com/kubeflow/pipelines/commit/adb86777a0c8bf8c28bb0cee1d936daf70d9a59f)) +* fix(components):Update batch_prediction_*_gcs_source to predictions_*_gcs_source in information retrieval preprocessor ([e3bf085](https://github.com/kubeflow/pipelines/commit/e3bf085997aabc9024eed1fd2e002f77cc4fc43e)) +* Fix Persistence Agent SA Token time interval ([\#9892](https://github.com/kubeflow/pipelines/issues/9892)) ([6dfcee7](https://github.com/kubeflow/pipelines/commit/6dfcee7fa9d0e54a4797189d1f437367d4d9f4a2)) +* feat(backend) Enable auth between pesistence agent and pipelineAPI (ReportServer) ([\#9699](https://github.com/kubeflow/pipelines/issues/9699)) ([cb18d00](https://github.com/kubeflow/pipelines/commit/cb18d00bbbaed9cd77fc50dce739ed62c72b2356)) +* fix(backend) Replace LEFT with INNER JOIN when Archive Experiment ([\#9730](https://github.com/kubeflow/pipelines/issues/9730)) ([de89b1c](https://github.com/kubeflow/pipelines/commit/de89b1c6580d6efb69a4234d7d490ac24db9b3c9)) + ### [2.0.1](https://github.com/kubeflow/pipelines/compare/2.0.0...2.0.1) (2023-08-17) diff --git a/VERSION b/VERSION index 10bf840ed5..f93ea0ca33 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.1 \ No newline at end of file +2.0.2 \ No newline at end of file diff --git a/backend/api/v1beta1/python_http_client/README.md b/backend/api/v1beta1/python_http_client/README.md index f0e94be6d2..12742f284f 100644 --- a/backend/api/v1beta1/python_http_client/README.md +++ b/backend/api/v1beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.1 -- Package version: 2.0.1 +- API version: 2.0.2 +- Package version: 2.0.2 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py index fc1497d659..8d3f7b1a35 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.1" +__version__ = "2.0.2" # import apis into sdk package from kfp_server_api.api.experiment_service_api import ExperimentServiceApi diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py index 5b4cb571de..e5afaf6b98 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.1/python' + self.user_agent = 'OpenAPI-Generator/2.0.2/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py index fe73377512..578dcda2dc 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.1\n"\ - "SDK Package Version: 2.0.1".\ + "Version of the API: 2.0.2\n"\ + "SDK Package Version: 2.0.2".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v1beta1/python_http_client/setup.py b/backend/api/v1beta1/python_http_client/setup.py index d3fd643008..aa45f1e52d 100644 --- a/backend/api/v1beta1/python_http_client/setup.py +++ b/backend/api/v1beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.1" +VERSION = "2.0.2" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json index a4ebf3ca8a..233d7a0e88 100644 --- a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.1", + "version": "2.0.2", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/backend/api/v2beta1/python_http_client/README.md b/backend/api/v2beta1/python_http_client/README.md index dd98e9d6bd..7b2ec51e9e 100644 --- a/backend/api/v2beta1/python_http_client/README.md +++ b/backend/api/v2beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.1 -- Package version: 2.0.1 +- API version: 2.0.2 +- Package version: 2.0.2 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py index 87463f0a21..3f33d9f4fa 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.1" +__version__ = "2.0.2" # import apis into sdk package from kfp_server_api.api.auth_service_api import AuthServiceApi diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py index 5b4cb571de..e5afaf6b98 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.1/python' + self.user_agent = 'OpenAPI-Generator/2.0.2/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py index fe73377512..578dcda2dc 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.1\n"\ - "SDK Package Version: 2.0.1".\ + "Version of the API: 2.0.2\n"\ + "SDK Package Version: 2.0.2".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v2beta1/python_http_client/setup.py b/backend/api/v2beta1/python_http_client/setup.py index d3fd643008..aa45f1e52d 100644 --- a/backend/api/v2beta1/python_http_client/setup.py +++ b/backend/api/v2beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.1" +VERSION = "2.0.2" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json index bf218246d0..60d0004a14 100644 --- a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.1", + "version": "2.0.2", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml index fd778769e4..77728a31db 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml @@ -12,7 +12,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.1 + version: 2.0.2 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index 21305e19de..bc2c3fda7f 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -1,9 +1,9 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: 2.0.1 + publishedVersion: 2.0.2 publishedVersionMetadata: - releaseNote: Based on 2.0.1 version. + releaseNote: Based on 2.0.2 version. releaseTypes: - Feature recommended: false diff --git a/manifests/kustomize/base/cache-deployer/kustomization.yaml b/manifests/kustomize/base/cache-deployer/kustomization.yaml index a9640aa6cb..1e82e5ef34 100644 --- a/manifests/kustomize/base/cache-deployer/kustomization.yaml +++ b/manifests/kustomize/base/cache-deployer/kustomization.yaml @@ -8,4 +8,4 @@ commonLabels: app: cache-deployer images: - name: gcr.io/ml-pipeline/cache-deployer - newTag: 2.0.1 + newTag: 2.0.2 diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml index 2c2001ed0b..2f2ca2f4b4 100644 --- a/manifests/kustomize/base/cache/kustomization.yaml +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -10,4 +10,4 @@ commonLabels: app: cache-server images: - name: gcr.io/ml-pipeline/cache-server - newTag: 2.0.1 + newTag: 2.0.2 diff --git a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml index cd50fe5dce..b8cfddd1ad 100644 --- a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml +++ b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml @@ -11,8 +11,8 @@ data: until the changes take effect. A quick way to restart all deployments in a namespace: `kubectl rollout restart deployment -n `. appName: pipeline - appVersion: 2.0.1 - dbHost: mysql # relic to be removed after release + appVersion: 2.0.2 + dbHost: mysql # relic to be removed after release dbPort: "3306" # relic to be removed after release dbType: mysql mysqlHost: mysql diff --git a/manifests/kustomize/base/metadata/base/kustomization.yaml b/manifests/kustomize/base/metadata/base/kustomization.yaml index 5ad3f07bac..b25f43b46a 100644 --- a/manifests/kustomize/base/metadata/base/kustomization.yaml +++ b/manifests/kustomize/base/metadata/base/kustomization.yaml @@ -9,4 +9,4 @@ resources: - metadata-grpc-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-envoy - newTag: 2.0.1 + newTag: 2.0.2 diff --git a/manifests/kustomize/base/pipeline/kustomization.yaml b/manifests/kustomize/base/pipeline/kustomization.yaml index f3ca5274c8..492b72b00d 100644 --- a/manifests/kustomize/base/pipeline/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/kustomization.yaml @@ -37,14 +37,14 @@ resources: - kfp-launcher-configmap.yaml images: - name: gcr.io/ml-pipeline/api-server - newTag: 2.0.1 + newTag: 2.0.2 - name: gcr.io/ml-pipeline/persistenceagent - newTag: 2.0.1 + newTag: 2.0.2 - name: gcr.io/ml-pipeline/scheduledworkflow - newTag: 2.0.1 + newTag: 2.0.2 - name: gcr.io/ml-pipeline/frontend - newTag: 2.0.1 + newTag: 2.0.2 - name: gcr.io/ml-pipeline/viewer-crd-controller - newTag: 2.0.1 + newTag: 2.0.2 - name: gcr.io/ml-pipeline/visualization-server - newTag: 2.0.1 + newTag: 2.0.2 diff --git a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml index 2d118e33cf..f27ba77689 100644 --- a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml @@ -7,4 +7,4 @@ resources: - metadata-writer-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-writer - newTag: 2.0.1 + newTag: 2.0.2 diff --git a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml index e8798ef205..064b195182 100644 --- a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml +++ b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: gcr.io/ml-pipeline/inverse-proxy-agent - newTag: 2.0.1 + newTag: 2.0.2 resources: - proxy-configmap.yaml - proxy-deployment.yaml From 412216f832a848bfc61ce289aed819d7f2860fdd Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 11 Oct 2023 13:55:31 -0700 Subject: [PATCH 208/253] feat(components): Add question_answer support for AutoSxS default instructions PiperOrigin-RevId: 572677918 --- .../_implementation/llm/function_based.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index 8f27dcb94b..122b67201c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -356,9 +356,14 @@ def generate_default_instruction( task = task.lower() if task == 'summarization': return f'Summarize in less than {target_sequence_length} words.' + + elif task == 'question_answer': + return f'Answer the question in less than {target_sequence_length} words.' + else: raise ValueError( - f'Task not recognized: {task}. Supported tasks are: summarization.' + f'Task not recognized: {task}. Supported tasks are: "summarization",' + ' "question_answer".' ) From 1386a826ba2bcdbc19eb2007ca43f6acd1031e4d Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 12 Oct 2023 07:16:54 -0700 Subject: [PATCH 209/253] feat(components): Set display names for SFT, RLHF and LLM inference pipelines PiperOrigin-RevId: 572897105 --- components/google-cloud/RELEASE.md | 1 + .../preview/llm/infer/component.py | 10 +-- .../preview/llm/rlhf/component.py | 66 ++++++++++--------- 3 files changed, 42 insertions(+), 35 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 5e3e393e9b..9933f01511 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -2,6 +2,7 @@ * Upload tensorboard metrics from `preview.llm.rlhf_pipeline` if a `tensorboard_resource_id` is provided at runtime. * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. * Add `preview.automl.vision` and `DataConverterJobOp`. +* Set display names for `preview.llm` pipelines. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index 0096b89796..cfa0f71556 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -64,14 +64,14 @@ def infer_pipeline( machine_spec = function_based.resolve_machine_spec( location=location, use_test_spec=env.get_use_test_machine_spec(), - ) + ).set_display_name('Resolve Machine Spec') reference_model_metadata = function_based.resolve_reference_model_metadata( large_model_reference=large_model_reference - ).set_display_name('BaseModelMetadataResolver') + ).set_display_name('Resolve Model Metadata') prompt_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_importer', - ).set_display_name('PromptDatasetImageUriResolver') + ).set_display_name('Resolve Prompt Dataset Image URI') prompt_dataset_importer = ( private_text_importer.PrivateTextImporter( project=project, @@ -86,7 +86,7 @@ def infer_pipeline( image_uri=prompt_dataset_image_uri.output, instruction=instruction, ) - .set_display_name('PromptDatasetImporter') + .set_display_name('Import Prompt Dataset') .set_caching_options(False) ) @@ -94,7 +94,7 @@ def infer_pipeline( image_name='infer', accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], - ).set_display_name('BulkInferrerImageUriResolver') + ).set_display_name('Resolve Bulk Inferrer Image URI') bulk_inference = bulk_inferrer.BulkInferrer( project=project, location=location, diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py index b421cc2c8a..d48e5b6a77 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py @@ -93,15 +93,15 @@ def rlhf_pipeline( upload_location = 'us-central1' machine_spec = function_based.resolve_machine_spec( location=location, use_test_spec=env.get_use_test_machine_spec() - ) + ).set_display_name('Resolve Machine Spec') reference_model_metadata = function_based.resolve_reference_model_metadata( large_model_reference=large_model_reference, - ).set_display_name('BaseModelMetadataResolver') + ).set_display_name('Resolve Model Metadata') prompt_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_importer' - ).set_display_name('PromptDatasetImageUriResolver') + ).set_display_name('Resolve Prompt Dataset Image URI') prompt_dataset_importer = ( private_text_importer.PrivateTextImporter( project=project, @@ -117,13 +117,13 @@ def rlhf_pipeline( image_uri=prompt_dataset_image_uri.output, instruction=instruction, ) - .set_display_name('PromptDatasetImporter') + .set_display_name('Import Prompt Dataset') .set_caching_options(False) ) preference_dataset_image_uri = function_based.resolve_private_image_uri( image_name='text_comparison_importer' - ).set_display_name('PreferenceDatasetImageUriResolver') + ).set_display_name('Resolve Preference Dataset Image URI') comma_separated_candidates_field_names = ( function_based.convert_to_delimited_string(items=candidate_columns) ) @@ -142,7 +142,7 @@ def rlhf_pipeline( image_uri=preference_dataset_image_uri.output, instruction=instruction, ) - .set_display_name('PreferenceDatasetImporter') + .set_display_name('Import Preference Dataset') .set_caching_options(False) ) @@ -150,7 +150,7 @@ def rlhf_pipeline( image_name='reward_model', accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], - ).set_display_name('RewardModelImageUriResolver') + ).set_display_name('Resolve Reward Model Image URI') reward_model = ( reward_model_trainer.RewardModelTrainer( project=project, @@ -175,13 +175,13 @@ def rlhf_pipeline( learning_rate_multiplier=reward_model_learning_rate_multiplier, lora_dim=reward_model_lora_dim, ) - .set_display_name('RewardModelTrainer') + .set_display_name('Reward Model Trainer') .set_caching_options(False) ) has_tensorboard_id = function_based.value_exists( value=tensorboard_resource_id - ) + ).set_display_name('Resolve Tensorboard Resource ID') with kfp.dsl.Condition( # pytype: disable=wrong-arg-types has_tensorboard_id.output == True, # pylint: disable=singleton-comparison, g-explicit-bool-comparison name='Upload Reward Model Tensorboard Metrics', @@ -194,13 +194,13 @@ def rlhf_pipeline( f'{kfp.dsl.PIPELINE_JOB_ID_PLACEHOLDER}-' f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' ), - ) + ).set_display_name('Reward Model Tensorboard Metrics Uploader') rl_image_uri = function_based.resolve_private_image_uri( image_name='reinforcer', accelerator_type=machine_spec.outputs['accelerator_type'], accelerator_count=machine_spec.outputs['accelerator_count'], - ).set_display_name('ReinforcerImageUriResolver') + ).set_display_name('Resolve Reinforcer Image URI') rl_model = ( reinforcer.Reinforcer( project=project, @@ -246,9 +246,11 @@ def rlhf_pipeline( f'{kfp.dsl.PIPELINE_JOB_ID_PLACEHOLDER}-' f'{kfp.dsl.PIPELINE_TASK_ID_PLACEHOLDER}' ), - ) + ).set_display_name('Reinforcement Learning Tensorboard Metrics Uploader') - should_perform_inference = function_based.value_exists(value=eval_dataset) + should_perform_inference = function_based.value_exists( + value=eval_dataset + ).set_display_name('Resolve Inference Dataset') with kfp.dsl.Condition( should_perform_inference.output == True, name='Perform Inference' # pylint: disable=singleton-comparison ): @@ -266,39 +268,43 @@ def rlhf_pipeline( adapter_artifact = kfp.dsl.importer( artifact_uri=rl_model.outputs['output_adapter_path'], artifact_class=kfp.dsl.Artifact, - ) + ).set_display_name('Import Tuned Adapter') regional_endpoint = function_based.resolve_regional_endpoint( upload_location=upload_location - ) + ).set_display_name('Resolve Regional Endpoint') display_name = function_based.resolve_model_display_name( large_model_reference=reference_model_metadata.outputs[ 'large_model_reference' ], model_display_name=model_display_name, - ) + ).set_display_name('Resolve Model Display Name') upload_model = function_based.resolve_upload_model( large_model_reference=reference_model_metadata.outputs[ 'large_model_reference' ] - ) - upload_task = upload_llm_model.upload_llm_model( - project=_placeholders.PROJECT_ID_PLACEHOLDER, - location=upload_location, - regional_endpoint=regional_endpoint.output, - artifact_uri=adapter_artifact.output, - model_display_name=display_name.output, - model_reference_name='text-bison@001', - upload_model=upload_model.output, - ).set_env_variable( - name='VERTEX_AI_PIPELINES_RUN_LABELS', - value=json.dumps({'tune-type': 'rlhf'}), + ).set_display_name('Resolve Upload Model') + upload_task = ( + upload_llm_model.upload_llm_model( + project=_placeholders.PROJECT_ID_PLACEHOLDER, + location=upload_location, + regional_endpoint=regional_endpoint.output, + artifact_uri=adapter_artifact.output, + model_display_name=display_name.output, + model_reference_name='text-bison@001', + upload_model=upload_model.output, + ) + .set_env_variable( + name='VERTEX_AI_PIPELINES_RUN_LABELS', + value=json.dumps({'tune-type': 'rlhf'}), + ) + .set_display_name('Upload Model') ) deploy_model = function_based.resolve_deploy_model( deploy_model=deploy_model, large_model_reference=reference_model_metadata.outputs[ 'large_model_reference' ], - ) + ).set_display_name('Resolve Deploy Model') deploy_task = deploy_llm_model.create_endpoint_and_deploy_model( project=_placeholders.PROJECT_ID_PLACEHOLDER, location=upload_location, @@ -306,7 +312,7 @@ def rlhf_pipeline( display_name=display_name.output, regional_endpoint=regional_endpoint.output, deploy_model=deploy_model.output, - ) + ).set_display_name('Deploy Model') return PipelineOutput( model_resource_name=upload_task.outputs['model_resource_name'], From 4d71fdac3fc92dd4d54c6be3a28725667b8f3c5e Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 12 Oct 2023 14:34:25 -0700 Subject: [PATCH 210/253] feat(components): Update image tag used by llm pipelines PiperOrigin-RevId: 573014609 --- .../google_cloud_pipeline_components/_implementation/llm/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py index 7b2bbfbe86..af0c853c0a 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/env.py @@ -16,7 +16,7 @@ def get_private_image_tag() -> str: - return os.getenv('PRIVATE_IMAGE_TAG', '20230918_1327_RC00') + return os.getenv('PRIVATE_IMAGE_TAG', '20231010_1107_RC00') def get_use_test_machine_spec() -> bool: From 9aa750e62f6e225d037ecdda9bf7cab95f05675d Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 12 Oct 2023 15:17:03 -0700 Subject: [PATCH 211/253] feat(components): [text2sql] Initialize preprocess component and integrate with text2sql pipeline PiperOrigin-RevId: 573025616 --- .../evaluation_llm_text2sql_pipeline.py | 54 ++++++++- .../text2sql_preprocess/__init__.py | 14 +++ .../text2sql_preprocess/component.py | 103 ++++++++++++++++++ 3 files changed, 168 insertions(+), 3 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index 9ce8270e75..efead8599b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -13,6 +13,8 @@ # limitations under the License. """Text2SQL evaluation pipeline.""" +from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_preprocess.component import text2sql_evaluation_preprocess as Text2SQLEvaluationPreprocessOp from google_cloud_pipeline_components.types import artifact_types import kfp @@ -22,14 +24,48 @@ @kfp.dsl.pipeline(name=_PIPELINE_NAME) def evaluation_llm_text2sql_pipeline( - location: str, model_name: str, + evaluation_data_source_path: str, + tables_metadata_path: str, + prompt_template_path: str = '', + project: str = _placeholders.PROJECT_ID_PLACEHOLDER, + location: str = _placeholders.LOCATION_PLACEHOLDER, + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', ): """The LLM Evaluation Text2SQL Pipeline. Args: - location: Required. The GCP region that runs the pipeline components. - model_name: The path for model to generate embeddings. + model_name: The Model used to run text2sql evaluation. Must be a publisher + model or a managed Model sharing the same ancestor location. Starting this + job has no impact on any existing deployments of the Model and their + resources. Supported model is publishers/google/models/text-bison. + evaluation_data_source_path: Required. The path for json file containing + text2sql evaluation input dataset, including natural language question, + ground truth SQL / SQL results. + tables_metadata_path: Required. The path for json file containing database + metadata, including table names, schema fields. + prompt_template_path: Required. The path for json file containing prompt + template. Will provide default value if users do not sepecify. + project: Optional. The GCP project that runs the pipeline components. + Default value is the same project used to run the pipeline. + location: Optional. The GCP region that runs the pipeline components. + Default value is the same location used to run the pipeline. + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Service account to run the dataflow job. If not set, + dataflow will use the default worker service account. For more details, + see + https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account + network: Dataflow's fully qualified subnetwork name, when empty the default + subnetwork will be used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. """ get_vertex_model_task = kfp.dsl.importer( @@ -40,3 +76,15 @@ def evaluation_llm_text2sql_pipeline( metadata={'resourceName': model_name}, ) get_vertex_model_task.set_display_name('get-vertex-model') + + _ = Text2SQLEvaluationPreprocessOp( + project=project, + location=location, + evaluation_data_source_path=evaluation_data_source_path, + tables_metadata_path=tables_metadata_path, + prompt_template_path=prompt_template_path, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/__init__.py new file mode 100644 index 0000000000..198fd983f0 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Text2SQL Evaluation Preprocess Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py new file mode 100644 index 0000000000..4f9aa155d3 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py @@ -0,0 +1,103 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Text2SQL evaluation preprocess component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +@container_component +def text2sql_evaluation_preprocess( + gcp_resources: OutputPath(str), + model_inference_input_path: OutputPath(str), + project: str, + location: str, + evaluation_data_source_path: str, + tables_metadata_path: str, + prompt_template_path: str = '', + display_name: str = 'text2sql-evaluation-preprocess', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Preprocess inputs for text2sql evaluation pipeline. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + evaluation_data_source_path: Required. The path for json file containing + text2sql evaluation input dataset, including natural language question, + ground truth SQL / SQL results. + tables_metadata_path: Required. The path for json file containing database + metadata, including table names, schema fields. + prompt_template_path: Required. The path for json file containing prompt + template. Will provide default value if users do not sepecify. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + model_inference_input_path (str): + The GCS path to save preprocessed data to run batch + prediction to get table names. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=version.LLM_EVAL_IMAGE_TAG, + args=[ + f'--text2sql_preprocess={True}', + f'--project={project}', + f'--location={location}', + f'--evaluation_data_source_path={evaluation_data_source_path}', + f'--tables_metadata_path={tables_metadata_path}', + f'--prompt_template_path={prompt_template_path}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--model_inference_input_path={model_inference_input_path}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) From 633ddeb07e9212d2e373dba8d20a0f6d67ab037d Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 13 Oct 2023 14:17:45 -0700 Subject: [PATCH 212/253] feat(components): [text2sql] Initialize validate and process component PiperOrigin-RevId: 573317490 --- .../evaluation_llm_text2sql_pipeline.py | 15 +++ .../text2sql_validate_and_process/__init__.py | 14 +++ .../component.py | 102 ++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index efead8599b..b7a9bc0569 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -15,6 +15,7 @@ from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_preprocess.component import text2sql_evaluation_preprocess as Text2SQLEvaluationPreprocessOp +from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_validate_and_process.component import text2sql_evaluation_validate_and_process as Text2SQLEvaluationValidateAndProcessOp from google_cloud_pipeline_components.types import artifact_types import kfp @@ -88,3 +89,17 @@ def evaluation_llm_text2sql_pipeline( network=network, encryption_spec_key_name=encryption_spec_key_name, ) + + _ = Text2SQLEvaluationValidateAndProcessOp( + project=project, + location=location, + # TODO(bozhengbz) Add value to model_inference_results_path + # when model batch prediction component is added. + model_inference_results_path='gs://test/model_inference_results.json', + tables_metadata_path=tables_metadata_path, + prompt_template_path=prompt_template_path, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/__init__.py new file mode 100644 index 0000000000..7980146e7d --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Text2SQL Evaluation Validate and Process Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py new file mode 100644 index 0000000000..bc2deb06d4 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py @@ -0,0 +1,102 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Text2SQL evaluation preprocess component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +@container_component +def text2sql_evaluation_validate_and_process( + gcp_resources: OutputPath(str), + model_inference_input_path: OutputPath(str), + project: str, + location: str, + model_inference_results_path: str, + tables_metadata_path: str, + prompt_template_path: str = '', + display_name: str = 'text2sql-evaluation-validate-and-process', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Text2SQL evaluation component to validate model inference results in previous step and generate model inference input in the next step. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + model_inference_results_path: Required. The path for json file containing + text2sql model inference results from the last step. + tables_metadata_path: Required. The path for json file containing database + metadata, including table names, schema fields. + prompt_template_path: Required. The path for json file containing prompt + template. Will provide default value if users do not sepecify. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + model_inference_input_path (str): + The GCS path to save processed data to run batch prediction in the + next step. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=version.LLM_EVAL_IMAGE_TAG, + args=[ + f'--text2sql_validate_and_process={True}', + f'--project={project}', + f'--location={location}', + f'--model_inference_results_path={model_inference_results_path}', + f'--tables_metadata_path={tables_metadata_path}', + f'--prompt_template_path={prompt_template_path}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--model_inference_input_path={model_inference_input_path}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) From ea93979eed02e131bd20180da149b9465670dfe1 Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 13 Oct 2023 15:07:42 -0700 Subject: [PATCH 213/253] feat(components): [text2sql] Initialize evaluation component PiperOrigin-RevId: 573329341 --- .../evaluation_llm_text2sql_pipeline.py | 22 ++++ .../text2sql_evaluation/__init__.py | 14 +++ .../text2sql_evaluation/component.py | 116 ++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index b7a9bc0569..f9e59493b4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -14,6 +14,7 @@ """Text2SQL evaluation pipeline.""" from google_cloud_pipeline_components import _placeholders +from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_evaluation.component import text2sql_evaluation as Text2SQLEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_preprocess.component import text2sql_evaluation_preprocess as Text2SQLEvaluationPreprocessOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_validate_and_process.component import text2sql_evaluation_validate_and_process as Text2SQLEvaluationValidateAndProcessOp from google_cloud_pipeline_components.types import artifact_types @@ -29,6 +30,8 @@ def evaluation_llm_text2sql_pipeline( evaluation_data_source_path: str, tables_metadata_path: str, prompt_template_path: str = '', + sql_dialect: str = 'bigquery', + evaluation_method: str = 'parser', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, machine_type: str = 'e2-highmem-16', @@ -50,6 +53,10 @@ def evaluation_llm_text2sql_pipeline( metadata, including table names, schema fields. prompt_template_path: Required. The path for json file containing prompt template. Will provide default value if users do not sepecify. + sql_dialect: Optional. SQL dialect type, e.g. bigquery, mysql, etc. Default + value is bigquery. + evaluation_method: Optional. Text2SQL evaluation method, value can be + 'parser', 'execution', 'all'. Default value is 'parser'. project: Optional. The GCP project that runs the pipeline components. Default value is the same project used to run the pipeline. location: Optional. The GCP region that runs the pipeline components. @@ -103,3 +110,18 @@ def evaluation_llm_text2sql_pipeline( network=network, encryption_spec_key_name=encryption_spec_key_name, ) + + _ = Text2SQLEvaluationOp( + project=project, + location=location, + sql_dialect=sql_dialect, + evaluation_method=evaluation_method, + # TODO(bozhengbz) Add value to model_inference_results_path + # when model batch prediction component is added. + model_inference_results_path='gs://test/model_inference_results.json', + tables_metadata_path=tables_metadata_path, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/__init__.py new file mode 100644 index 0000000000..5e23a86fc7 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation LLM Text2SQL Evaluation Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py new file mode 100644 index 0000000000..063172067a --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py @@ -0,0 +1,116 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Text2SQL evaluation component used in KFP pipelines.""" + +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import container_component +from kfp.dsl import Metrics +from kfp.dsl import Output +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + + +@container_component +def text2sql_evaluation( + gcp_resources: OutputPath(str), + error_analysis_path: OutputPath(str), + generated_sql_results_path: OutputPath(str), + text2sql_evaluation_metrics: Output[Metrics], + project: str, + location: str, + sql_dialect: str, + evaluation_method: str, + model_inference_results_path: str, + tables_metadata_path: str, + display_name: str = 'text2sql-evaluation', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Get evaluation metrics, generated SQL results and error analysis in text2sql evaluation pipeline. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + sql_dialect: Required. SQL dialect type, e.g. bigquery, mysql, etc. + evaluation_method: Required. Text2SQL evaluation method, value can be + 'parser', 'execution', 'all'. + model_inference_results_path: Required. The path for json file containing + text2sql model inference results from the last step. + tables_metadata_path: Required. The path for json file containing database + metadata, including table names, schema fields. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + error_analysis_path: + Path for aggregrated error analysis for genereated SQL queris. + generated_sql_results_path: + Path for generated sql queries and execution results (optional) + in json format. + text2sql_evaluation_metrics: + A Metrics artifact representing the text2sql evaluation metrics. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=version.LLM_EVAL_IMAGE_TAG, + args=[ + f'--text2sql_evaluation={True}', + f'--project={project}', + f'--location={location}', + f'--sql_dialect={sql_dialect}', + f'--evaluation_method={evaluation_method}', + f'--model_inference_results_path={model_inference_results_path}', + f'--tables_metadata_path={tables_metadata_path}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + f'--error_analysis_path={error_analysis_path}', + f'--generated_sql_results_path={generated_sql_results_path}', + f'--text2sql_evaluation_metrics_output_path={text2sql_evaluation_metrics.path}', + f'--error_analysis_path={error_analysis_path}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) From 0487f9a8b1d8ab0d96d757bd4b598ffd353ecc81 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 13 Oct 2023 15:15:56 -0700 Subject: [PATCH 214/253] feat(components): Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline PiperOrigin-RevId: 573331226 --- components/google-cloud/RELEASE.md | 1 + .../evaluation_automl_unstructure_data_pipeline.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 9933f01511..91788d695a 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -3,6 +3,7 @@ * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. * Add `preview.automl.vision` and `DataConverterJobOp`. * Set display names for `preview.llm` pipelines. +* Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py index 34fc9ad764..4148df50de 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, NamedTuple +from typing import Any, List, NamedTuple from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model import GetVertexModelOp @@ -43,6 +43,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab batch_predict_max_replica_count: int = 10, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic @@ -81,6 +82,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. @@ -173,6 +175,7 @@ def evaluation_automl_unstructure_data_classification_pipeline( # pylint: disab encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, model=get_model_task.outputs['model'], + slicing_specs=slicing_specs, ) # Import the evaluation result to Vertex AI. @@ -373,6 +376,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de batch_predict_max_replica_count: int = 10, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic @@ -409,6 +413,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice. evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. @@ -442,6 +447,7 @@ def evaluation_automl_unstructure_data_pipeline( # pylint: disable=dangerous-de batch_predict_max_replica_count=batch_predict_max_replica_count, batch_predict_accelerator_type=batch_predict_accelerator_type, batch_predict_accelerator_count=batch_predict_accelerator_count, + slicing_specs=slicing_specs, evaluation_prediction_label_column=evaluation_prediction_label_column, evaluation_prediction_score_column=evaluation_prediction_score_column, evaluation_class_labels=evaluation_class_labels, From c9032716ab2013df56cb1078a703d48ed8e36fb4 Mon Sep 17 00:00:00 2001 From: gkcalat <35157096+gkcalat@users.noreply.github.com> Date: Fri, 13 Oct 2023 15:24:52 -0700 Subject: [PATCH 215/253] fix(sdk): Fix OOB for IPython and refactor. Closes #10075. (#10094) * Fix OOB for IPython and refactor. Closes #10075. * Address comments --- sdk/python/kfp/client/auth.py | 15 +++- sdk/python/kfp/client/auth_test.py | 100 +++++++++++++++++++++++++++ sdk/python/kfp/client/client.py | 20 ++---- sdk/python/kfp/client/client_test.py | 26 ++----- 4 files changed, 122 insertions(+), 39 deletions(-) create mode 100644 sdk/python/kfp/client/auth_test.py diff --git a/sdk/python/kfp/client/auth.py b/sdk/python/kfp/client/auth.py index 4d71b40433..6aea635bfc 100644 --- a/sdk/python/kfp/client/auth.py +++ b/sdk/python/kfp/client/auth.py @@ -261,7 +261,8 @@ def get_auth_code(client_id: str) -> Tuple[str, str]: 'scope=openid%20email&access_type=offline&' f'redirect_uri={redirect_uri}') authorization_response = None - if ('SSH_CONNECTION' in os.environ) or ('SSH_CLIENT' in os.environ): + if ('SSH_CONNECTION' in os.environ) or ('SSH_CLIENT' + in os.environ) or is_ipython(): try: print(( 'SSH connection detected. Please follow the instructions below.' @@ -509,3 +510,15 @@ def fetch_auth_token_from_response(url: str) -> str: if isinstance(access_code, list): access_code = str(access_code.pop(0)) return access_code + + +def is_ipython() -> bool: + """Returns whether we are running in notebook.""" + try: + import IPython + ipy = IPython.get_ipython() + if ipy is None: + return False + except ImportError: + return False + return True diff --git a/sdk/python/kfp/client/auth_test.py b/sdk/python/kfp/client/auth_test.py new file mode 100644 index 0000000000..db58b51adb --- /dev/null +++ b/sdk/python/kfp/client/auth_test.py @@ -0,0 +1,100 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from unittest.mock import MagicMock +from unittest.mock import patch + +from absl.testing import parameterized +from kfp.client import auth + + +class TestAuth(parameterized.TestCase): + + def test_is_ipython_return_false(self): + mock = MagicMock() + with patch.dict('sys.modules', IPython=mock): + mock.get_ipython.return_value = None + self.assertFalse(auth.is_ipython()) + + def test_is_ipython_return_true(self): + mock = MagicMock() + with patch.dict('sys.modules', IPython=mock): + mock.get_ipython.return_value = 'Something' + self.assertTrue(auth.is_ipython()) + + def test_is_ipython_should_raise_error(self): + mock = MagicMock() + with patch.dict('sys.modules', mock): + mock.side_effect = ImportError + self.assertFalse(auth.is_ipython()) + + @patch('builtins.input', lambda *args: + 'https://oauth2.example.com/auth?code=4/P7q7W91a-oMsCeLvIaQm6bTrgtp7' + ) + @patch('kfp.client.auth.is_ipython', lambda *args: True) + @patch.dict(os.environ, dict(), clear=True) + def test_get_auth_code_from_ipython(self): + token, redirect_uri = auth.get_auth_code('sample-client-id') + self.assertEqual(token, '4/P7q7W91a-oMsCeLvIaQm6bTrgtp7') + self.assertEqual(redirect_uri, 'http://localhost:9901') + + @patch('builtins.input', lambda *args: + 'https://oauth2.example.com/auth?code=4/P7q7W91a-oMsCeLvIaQm6bTrgtp7' + ) + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, {'SSH_CONNECTION': 'ENABLED'}, clear=True) + def test_get_auth_code_from_remote_connection(self): + token, redirect_uri = auth.get_auth_code('sample-client-id') + self.assertEqual(token, '4/P7q7W91a-oMsCeLvIaQm6bTrgtp7') + self.assertEqual(redirect_uri, 'http://localhost:9901') + + @patch('builtins.input', lambda *args: + 'https://oauth2.example.com/auth?code=4/P7q7W91a-oMsCeLvIaQm6bTrgtp7' + ) + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, {'SSH_CLIENT': 'ENABLED'}, clear=True) + def test_get_auth_code_from_remote_client(self): + token, redirect_uri = auth.get_auth_code('sample-client-id') + self.assertEqual(token, '4/P7q7W91a-oMsCeLvIaQm6bTrgtp7') + self.assertEqual(redirect_uri, 'http://localhost:9901') + + @patch('builtins.input', lambda *args: 'https://oauth2.example.com/auth') + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, {'SSH_CLIENT': 'ENABLED'}, clear=True) + def test_get_auth_code_from_remote_client_missing_code(self): + self.assertRaises(KeyError, auth.get_auth_code, 'sample-client-id') + + @patch('kfp.client.auth.get_auth_response_local', lambda *args: + 'https://oauth2.example.com/auth?code=4/P7q7W91a-oMsCeLvIaQm6bTrgtp7' + ) + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, dict(), clear=True) + def test_get_auth_code_from_local(self): + token, redirect_uri = auth.get_auth_code('sample-client-id') + self.assertEqual(token, '4/P7q7W91a-oMsCeLvIaQm6bTrgtp7') + self.assertEqual(redirect_uri, 'http://localhost:9901') + + @patch('kfp.client.auth.get_auth_response_local', lambda *args: None) + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, dict(), clear=True) + def test_get_auth_code_from_local_empty_response(self): + self.assertRaises(ValueError, auth.get_auth_code, 'sample-client-id') + + @patch('kfp.client.auth.get_auth_response_local', + lambda *args: 'this-is-an-invalid-response') + @patch('kfp.client.auth.is_ipython', lambda *args: False) + @patch.dict(os.environ, dict(), clear=True) + def test_get_auth_code_from_local_invalid_response(self): + self.assertRaises(KeyError, auth.get_auth_code, 'sample-client-id') diff --git a/sdk/python/kfp/client/client.py b/sdk/python/kfp/client/client.py index 57b4c6d1f9..448433ed9d 100644 --- a/sdk/python/kfp/client/client.py +++ b/sdk/python/kfp/client/client.py @@ -324,18 +324,6 @@ def _load_config( def _is_inverse_proxy_host(self, host: str) -> bool: return bool(re.match(r'\S+.googleusercontent.com/{0,1}$', host)) - def _is_ipython(self) -> bool: - """Returns whether we are running in notebook.""" - try: - import IPython - ipy = IPython.get_ipython() - if ipy is None: - return False - except ImportError: - return False - - return True - def _get_url_prefix(self) -> str: if self._uihost: # User's own connection. @@ -488,7 +476,7 @@ def create_experiment( experiment = self._experiment_api.create_experiment(body=experiment) link = f'{self._get_url_prefix()}/#/experiments/details/{experiment.experiment_id}' - if self._is_ipython(): + if auth.is_ipython(): import IPython html = f'Experiment details.' IPython.display.display(IPython.display.HTML(html)) @@ -744,7 +732,7 @@ def run_pipeline( response = self._run_api.create_run(body=run_body) link = f'{self._get_url_prefix()}/#/runs/details/{response.run_id}' - if self._is_ipython(): + if auth.is_ipython(): import IPython html = (f'Run details.') IPython.display.display(IPython.display.HTML(html)) @@ -1424,7 +1412,7 @@ def upload_pipeline( description=description, namespace=namespace) link = f'{self._get_url_prefix()}/#/pipelines/details/{response.pipeline_id}' - if self._is_ipython(): + if auth.is_ipython(): import IPython html = f'Pipeline details.' IPython.display.display(IPython.display.HTML(html)) @@ -1473,7 +1461,7 @@ def upload_pipeline_version( pipeline_package_path, **kwargs) link = f'{self._get_url_prefix()}/#/pipelines/details/{response.pipeline_id}/version/{response.pipeline_version_id}' - if self._is_ipython(): + if auth.is_ipython(): import IPython html = f'Pipeline details.' IPython.display.display(IPython.display.HTML(html)) diff --git a/sdk/python/kfp/client/client_test.py b/sdk/python/kfp/client/client_test.py index 03a6632c0c..da6b0710b9 100644 --- a/sdk/python/kfp/client/client_test.py +++ b/sdk/python/kfp/client/client_test.py @@ -17,12 +17,12 @@ import tempfile import textwrap import unittest -from unittest.mock import MagicMock from unittest.mock import Mock from unittest.mock import patch from absl.testing import parameterized from google.protobuf import json_format +from kfp.client import auth from kfp.client import client from kfp.compiler import Compiler from kfp.dsl import component @@ -194,24 +194,6 @@ class TestClient(parameterized.TestCase): def setUp(self): self.client = client.Client(namespace='ns1') - def test__is_ipython_return_false(self): - mock = MagicMock() - with patch.dict('sys.modules', IPython=mock): - mock.get_ipython.return_value = None - self.assertFalse(self.client._is_ipython()) - - def test__is_ipython_return_true(self): - mock = MagicMock() - with patch.dict('sys.modules', IPython=mock): - mock.get_ipython.return_value = 'Something' - self.assertTrue(self.client._is_ipython()) - - def test__is_ipython_should_raise_error(self): - mock = MagicMock() - with patch.dict('sys.modules', mock): - mock.side_effect = ImportError - self.assertFalse(self.client._is_ipython()) - def test_wait_for_run_completion_invalid_token_should_raise_error(self): with self.assertRaises(kfp_server_api.ApiException): with patch.object( @@ -371,7 +353,7 @@ def pipeline_test_upload_without_name(boolean: bool = True): with patch.object(self.client._upload_api, 'upload_pipeline') as mock_upload_pipeline: - with patch.object(self.client, '_is_ipython', return_value=False): + with patch.object(auth, 'is_ipython', return_value=False): with tempfile.TemporaryDirectory() as tmp_path: pipeline_test_path = os.path.join(tmp_path, 'test.yaml') Compiler().compile( @@ -401,7 +383,7 @@ def pipeline_test_upload_without_name(boolean: bool = True): def test_upload_pipeline_with_name(self, pipeline_name): with patch.object(self.client._upload_api, 'upload_pipeline') as mock_upload_pipeline: - with patch.object(self.client, '_is_ipython', return_value=False): + with patch.object(auth, 'is_ipython', return_value=False): self.client.upload_pipeline( pipeline_package_path='fake.yaml', pipeline_name=pipeline_name, @@ -421,7 +403,7 @@ def test_upload_pipeline_with_name(self, pipeline_name): def test_upload_pipeline_with_name_invalid(self, pipeline_name): with patch.object(self.client._upload_api, 'upload_pipeline') as mock_upload_pipeline: - with patch.object(self.client, '_is_ipython', return_value=False): + with patch.object(auth, 'is_ipython', return_value=False): with self.assertRaisesRegex( ValueError, 'Invalid pipeline name. Pipeline name cannot be empty or contain only whitespace.' From ab33e2af4e02ca91548bd17e13f4143aa1a7e7e6 Mon Sep 17 00:00:00 2001 From: gkcalat <35157096+gkcalat@users.noreply.github.com> Date: Sun, 15 Oct 2023 15:58:53 -0700 Subject: [PATCH 216/253] chore(sdk): Improve a message printed for users. Closes #10100 (#10101) --- sdk/python/kfp/client/auth.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/python/kfp/client/auth.py b/sdk/python/kfp/client/auth.py index 6aea635bfc..8a68183345 100644 --- a/sdk/python/kfp/client/auth.py +++ b/sdk/python/kfp/client/auth.py @@ -264,9 +264,10 @@ def get_auth_code(client_id: str) -> Tuple[str, str]: if ('SSH_CONNECTION' in os.environ) or ('SSH_CLIENT' in os.environ) or is_ipython(): try: - print(( - 'SSH connection detected. Please follow the instructions below.' - ' Otherwise, press CTRL+C if you are not connected via SSH.')) + print(('SSH connection or IPython shell detected. Please follow the' + ' instructions below. Otherwise, press CTRL+C if you are not' + ' connected via SSH and not using IPython (e.g. Jupyter' + ' Notebook).')) authorization_response = get_auth_response_ssh(host, port, auth_url) except KeyboardInterrupt: authorization_response = None From da6a3601468282c0592eae8e89a3d97b982e2d43 Mon Sep 17 00:00:00 2001 From: James Liu <37026441+zijianjoy@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:12:54 -0700 Subject: [PATCH 217/253] fix(frontend): Replace twitter artifactory endpoint with npm endpoint. (#10099) * fix(frontend): Replace twitter artifactory endpoint with npm endpoint. - Replace twitter artifactory endpoint with npm endpoint. - Update .nvmrc to have same node version as Dockerfile. * remove caniuse-browserlint --- frontend/.nvmrc | 2 +- frontend/package-lock.json | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/.nvmrc b/frontend/.nvmrc index cae54a258e..4a9c19cb52 100644 --- a/frontend/.nvmrc +++ b/frontend/.nvmrc @@ -1 +1 @@ -v12.14.1 +v14.21.3 diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 681d067591..0593ec2cb3 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -14865,7 +14865,7 @@ }, "camelcase-css": { "version": "2.0.1", - "resolved": "https://artifactory.twitter.biz:443/api/npm/js-virtual/camelcase-css/-/camelcase-css-2.0.1.tgz", + "resolved": "https://registry.npmjs.org/camelcase-css/-/camelcase-css-2.0.1.tgz", "integrity": "sha1-7pePaUeRTMMMa0R0G27R338EP9U=", "dev": true }, @@ -21688,7 +21688,7 @@ }, "html-tags": { "version": "3.1.0", - "resolved": "https://artifactory.twitter.biz:443/api/npm/js-virtual/html-tags/-/html-tags-3.1.0.tgz", + "resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.1.0.tgz", "integrity": "sha1-e15vfmZen7QfMAB+2eDUHpf7IUA=", "dev": true }, @@ -40083,4 +40083,4 @@ "dev": true } } -} +} \ No newline at end of file From 1682ce8adeb2c55a155588eae7492b2f0a8b783a Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 17 Oct 2023 11:15:03 -0700 Subject: [PATCH 218/253] feat(components): Support service account in kubeflow model_batch_predict component PiperOrigin-RevId: 574209244 --- components/google-cloud/RELEASE.md | 1 + .../v1/batch_predict_job/component.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 91788d695a..d1a672a587 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -4,6 +4,7 @@ * Add `preview.automl.vision` and `DataConverterJobOp`. * Set display names for `preview.llm` pipelines. * Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline. +* Support `service_account` in `ModelBatchPredictOp`. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py index b179913af9..235632ac21 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/batch_predict_job/component.py @@ -56,6 +56,7 @@ def model_batch_predict( accelerator_count: int = 0, starting_replica_count: int = 0, max_replica_count: int = 0, + service_account: str = '', manual_batch_tuning_parameters_batch_size: int = 0, generate_explanation: bool = False, explanation_metadata: Dict[str, str] = {}, @@ -88,6 +89,7 @@ def model_batch_predict( accelerator_count: The number of accelerators to attach to the `machine_type`. Only used if `machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec starting_replica_count: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. + service_account: The service account that the DeployedModel's container runs as. If not specified, a system generated one will be used, which has minimal permissions and the custom container, if used, may not have enough permission to access other Google Cloud resources. Users deploying the Model must have the iam.serviceAccounts.actAs permission on this service account. manual_batch_tuning_parameters_batch_size: The number of the records (e.g. instances) of the operation given in each batch to a machine replica. Machine type, and size of a single record should be considered when setting this parameter, higher value speeds up the batch operation's execution, but too high value will result in a whole batch not fitting in a machine's memory, and the whole operation will fail. generate_explanation: Generate explanation along with the batch prediction results. This will cause the batch prediction output to include explanations based on the `prediction_format`: - `bigquery`: output includes a column named `explanation`. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - `jsonl`: The JSON objects on each line include an additional entry keyed `explanation`. The value of the entry is a JSON object that conforms to the [aiplatform.gapic.Explanation] object. - `csv`: Generating explanations for CSV format is not supported. If this field is set to true, either the Model.explanation_spec or explanation_metadata and explanation_parameters must be populated. explanation_metadata: Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata. @@ -197,6 +199,9 @@ def model_batch_predict( ', "max_replica_count": ', max_replica_count, '}', + ', "service_account": "', + service_account, + '"', ', "manual_batch_tuning_parameters": {', '"batch_size": ', manual_batch_tuning_parameters_batch_size, From 2d3171cbfec626055e59b8a58ce83fb54ecad113 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 17 Oct 2023 19:34:55 -0700 Subject: [PATCH 219/253] feat(sdk): support collecting outputs from conditional branches using `dsl.OneOf` (#10067) * support dsl.OneOf * address review feedback * address review feedback --- sdk/RELEASE.md | 1 + sdk/python/kfp/compiler/compiler_test.py | 799 ++++++++++++++++++ sdk/python/kfp/compiler/compiler_utils.py | 247 ++++-- .../kfp/compiler/pipeline_spec_builder.py | 122 ++- sdk/python/kfp/dsl/__init__.py | 3 + sdk/python/kfp/dsl/for_loop.py | 11 + sdk/python/kfp/dsl/pipeline_channel.py | 253 +++++- sdk/python/kfp/dsl/pipeline_channel_test.py | 224 ++++- sdk/python/kfp/dsl/pipeline_context.py | 4 + sdk/python/kfp/dsl/tasks_group.py | 13 + .../pipelines/if_elif_else_complex.py | 14 +- .../pipelines/if_elif_else_complex.yaml | 198 ++++- ... => if_elif_else_with_oneof_parameters.py} | 24 +- .../if_elif_else_with_oneof_parameters.yaml | 420 +++++++++ sdk/python/test_data/pipelines/if_else.yaml | 214 ----- .../pipelines/if_else_with_oneof_artifacts.py | 60 ++ .../if_else_with_oneof_artifacts.yaml | 380 +++++++++ ...se.py => if_else_with_oneof_parameters.py} | 11 +- ...aml => if_else_with_oneof_parameters.yaml} | 139 +-- sdk/python/test_data/test_data_config.yaml | 13 +- 20 files changed, 2742 insertions(+), 408 deletions(-) rename sdk/python/test_data/pipelines/{if_elif_else.py => if_elif_else_with_oneof_parameters.py} (66%) create mode 100644 sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.yaml delete mode 100644 sdk/python/test_data/pipelines/if_else.yaml create mode 100644 sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.py create mode 100644 sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.yaml rename sdk/python/test_data/pipelines/{if_else.py => if_else_with_oneof_parameters.py} (79%) rename sdk/python/test_data/pipelines/{if_elif_else.yaml => if_else_with_oneof_parameters.yaml} (72%) diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 09b20e1b54..502f530072 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -3,6 +3,7 @@ ## Features ## Breaking changes +* Support collecting outputs from conditional branches using `dsl.OneOf` [\#10067](https://github.com/kubeflow/pipelines/pull/10067) ## Deprecations diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index 6cf0761461..b5d7a5267d 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -147,11 +147,36 @@ def print_hello(): print('hello') +@dsl.component +def cleanup(): + print('cleanup') + + @dsl.component def double(num: int) -> int: return 2 * num +@dsl.component +def print_and_return_as_artifact(text: str, a: Output[Artifact]): + print(text) + with open(a.path, 'w') as f: + f.write(text) + + +@dsl.component +def print_and_return_with_output_key(text: str, output_key: OutputPath(str)): + print(text) + with open(output_key, 'w') as f: + f.write(text) + + +@dsl.component +def print_artifact(a: Input[Artifact]): + with open(a.path) as f: + print(f.read()) + + ########### @@ -4140,6 +4165,44 @@ def my_pipeline( 'Component output artifact.') +class TestCannotReturnFromWithinControlFlowGroup(unittest.TestCase): + + def test_condition_raises(self): + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output from within the control flow group dsl\.Condition\.' + ): + + @dsl.pipeline + def my_pipeline(string: str = 'string') -> str: + with dsl.Condition(string == 'foo'): + return print_and_return(text=string).output + + def test_loop_raises(self): + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output from within the control flow group dsl\.ParallelFor\.' + ): + + @dsl.pipeline + def my_pipeline(string: str = 'string') -> str: + with dsl.ParallelFor([1, 2, 3]): + return print_and_return(text=string).output + + def test_exit_handler_raises(self): + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + r'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output from within the control flow group dsl\.ExitHandler\.' + ): + + @dsl.pipeline + def my_pipeline(string: str = 'string') -> str: + with dsl.ExitHandler(print_and_return(text='exit task')): + return print_and_return(text=string).output + + class TestConditionLogic(unittest.TestCase): def test_if(self): @@ -4480,5 +4543,741 @@ def flip_coin_pipeline(): print_and_return(text='Got tails!') +class TestDslOneOf(unittest.TestCase): + # The space of possible tests is very large, so we test a representative set of cases covering the following styles of usage: + # - upstream conditions: if/else v if/elif/else + # - data consumed: parameters v artifacts + # - where dsl.OneOf goes: consumed by task v returned v both + # - when outputs have different keys: e.g., .output v .outputs[] + # - how the if/elif/else are nested and at what level they are consumed + + # Data type validation (e.g., dsl.OneOf(artifact, param) fails) and similar is covered in pipeline_channel_test.py. + + # To help narrow the tests further (we already test lots of aspects in the following cases), we choose focus on the dsl.OneOf behavior, not the conditional logic if If/Elif/Else. This is more verbose, but more maintainable and the behavior under test is clearer. + + def test_if_else_returned(self): + """Uses If and Else branches, parameters passed to dsl.OneOf, dsl.OneOf returned from a pipeline, and different output keys on dsl.OneOf channels.""" + + @dsl.pipeline + def roll_die_pipeline() -> str: + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Else(): + t2 = print_and_return_with_output_key(text='Got tails!') + return dsl.OneOf(t1.output, t2.outputs['output_key']) + + # hole punched through if + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-2'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through else + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-with-output-key-output_key'] + .parameter_type, + type_utils.STRING, + ) + # condition-branches surfaces + self.assertEqual( + roll_die_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = roll_die_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-with-output-key-output_key', + producer_subtask='condition-3', + )) + # surfaced as output + self.assertEqual( + roll_die_pipeline.pipeline_spec.root.dag.outputs + .parameters['Output'].value_from_parameter, + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + producer_subtask='condition-branches-1', + output_parameter_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + def test_if_elif_else_returned(self): + """Uses If, Elif, and Else branches, parameters passed to dsl.OneOf, dsl.OneOf returned from a pipeline, and different output keys on dsl.OneOf channels.""" + + @dsl.pipeline + def roll_die_pipeline() -> str: + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + return dsl.OneOf(t1.output, t2.output, t3.outputs['output_key']) + + # hole punched through if + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-2'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through elif + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-2-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through else + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-4'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-with-output-key-output_key'] + .parameter_type, + type_utils.STRING, + ) + # condition-branches surfaces + self.assertEqual( + roll_die_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = roll_die_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-2-Output', + producer_subtask='condition-3', + )) + self.assertEqual( + parameter_selectors[2], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-with-output-key-output_key', + producer_subtask='condition-4', + )) + # surfaced as output + self.assertEqual( + roll_die_pipeline.pipeline_spec.root.dag.outputs + .parameters['Output'].value_from_parameter, + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + producer_subtask='condition-branches-1', + output_parameter_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + def test_if_elif_else_consumed(self): + """Uses If, Elif, and Else branches, parameters passed to dsl.OneOf, dsl.OneOf passed to a consumer task, and different output keys on dsl.OneOf channels.""" + + @dsl.pipeline + def roll_die_pipeline(): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + print_and_return( + text=dsl.OneOf(t1.output, t2.output, t3.outputs['output_key'])) + + # hole punched through if + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-2'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through elif + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-2-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through else + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-4'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-with-output-key-output_key'] + .parameter_type, + type_utils.STRING, + ) + # condition-branches surfaces + self.assertEqual( + roll_die_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = roll_die_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-2-Output', + producer_subtask='condition-3', + )) + self.assertEqual( + parameter_selectors[2], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-with-output-key-output_key', + producer_subtask='condition-4', + )) + # consumed from condition-branches + self.assertEqual( + roll_die_pipeline.pipeline_spec.root.dag.tasks['print-and-return-3'] + .inputs.parameters['text'].task_output_parameter, + pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec + .TaskOutputParameterSpec( + producer_task='condition-branches-1', + output_parameter_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + def test_if_else_consumed_and_returned(self): + """Uses If, Elif, and Else branches, parameters passed to dsl.OneOf, and dsl.OneOf passed to a consumer task and returned from the pipeline.""" + + @dsl.pipeline + def flip_coin_pipeline() -> str: + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return(text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return(text='Got tails!') + x = dsl.OneOf(print_task_1.output, print_task_2.output) + print_and_return(text=x) + return x + + # hole punched through if + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-2'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-Output'].parameter_type, + type_utils.STRING, + ) + # hole punched through else + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions.parameters[ + 'pipelinechannel--print-and-return-2-Output'].parameter_type, + type_utils.STRING, + ) + # condition-branches surfaces + self.assertEqual( + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = flip_coin_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-2-Output', + producer_subtask='condition-3', + )) + # consumed from condition-branches + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag + .tasks['print-and-return-3'].inputs.parameters['text'] + .task_output_parameter, + pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec + .TaskOutputParameterSpec( + producer_task='condition-branches-1', + output_parameter_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + # surfaced as output + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.outputs + .parameters['Output'].value_from_parameter, + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + producer_subtask='condition-branches-1', + output_parameter_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + def test_if_else_consumed_and_returned_artifacts(self): + """Uses If, Elif, and Else branches, artifacts passed to dsl.OneOf, and dsl.OneOf passed to a consumer task and returned from the pipeline.""" + + @dsl.pipeline + def flip_coin_pipeline() -> Artifact: + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact(text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact(text='Got tails!') + x = dsl.OneOf(print_task_1.outputs['a'], print_task_2.outputs['a']) + print_artifact(a=x) + return x + + # hole punched through if + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-2'] + .output_definitions + .artifacts['pipelinechannel--print-and-return-as-artifact-a'] + .artifact_type.schema_title, + 'system.Artifact', + ) + # hole punched through else + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions + .artifacts['pipelinechannel--print-and-return-as-artifact-2-a'] + .artifact_type.schema_title, + 'system.Artifact', + ) + # condition-branches surfaces + self.assertEqual( + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .artifacts['pipelinechannel--condition-branches-1-oneof-1'] + .artifact_type.schema_title, + 'system.Artifact', + ) + artifact_selectors = flip_coin_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.artifacts[ + 'pipelinechannel--condition-branches-1-oneof-1'].artifact_selectors + self.assertEqual( + artifact_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + output_artifact_key='pipelinechannel--print-and-return-as-artifact-a', + producer_subtask='condition-2', + )) + self.assertEqual( + artifact_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + output_artifact_key='pipelinechannel--print-and-return-as-artifact-2-a', + producer_subtask='condition-3', + )) + + # consumed from condition-branches + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.tasks['print-artifact'] + .inputs.artifacts['a'].task_output_artifact, + pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec + .TaskOutputArtifactSpec( + producer_task='condition-branches-1', + output_artifact_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + # surfaced as output + self.assertEqual( + flip_coin_pipeline.pipeline_spec.root.dag.outputs + .artifacts['Output'].artifact_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + producer_subtask='condition-branches-1', + output_artifact_key='pipelinechannel--condition-branches-1-oneof-1', + ), + ) + + def test_nested_under_condition_consumed(self): + """Uses If, Else, and OneOf nested under a parent If.""" + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool): + with dsl.If(execute_pipeline == True): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + x = dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + print_artifact(a=x) + + # hole punched through if + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-3'] + .output_definitions + .artifacts['pipelinechannel--print-and-return-as-artifact-a'] + .artifact_type.schema_title, + 'system.Artifact', + ) + # hole punched through else + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-4'] + .output_definitions + .artifacts['pipelinechannel--print-and-return-as-artifact-2-a'] + .artifact_type.schema_title, + 'system.Artifact', + ) + # condition-branches surfaces + self.assertEqual( + flip_coin_pipeline.pipeline_spec + .components['comp-condition-branches-2'].output_definitions + .artifacts['pipelinechannel--condition-branches-2-oneof-1'] + .artifact_type.schema_title, + 'system.Artifact', + ) + artifact_selectors = flip_coin_pipeline.pipeline_spec.components[ + 'comp-condition-branches-2'].dag.outputs.artifacts[ + 'pipelinechannel--condition-branches-2-oneof-1'].artifact_selectors + self.assertEqual( + artifact_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + output_artifact_key='pipelinechannel--print-and-return-as-artifact-a', + producer_subtask='condition-3', + )) + self.assertEqual( + artifact_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + output_artifact_key='pipelinechannel--print-and-return-as-artifact-2-a', + producer_subtask='condition-4', + )) + # consumed from condition-branches + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-1'].dag + .tasks['print-artifact'].inputs.artifacts['a'].task_output_artifact, + pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec + .TaskOutputArtifactSpec( + producer_task='condition-branches-2', + output_artifact_key='pipelinechannel--condition-branches-2-oneof-1', + ), + ) + + def test_nested_under_condition_returned_raises(self): + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + f'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output dsl\.OneOf from within the control flow group dsl\.If\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool): + with dsl.If(execute_pipeline == True): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + return dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + + def test_deeply_nested_consumed(self): + """Uses If, Elif, Else, and OneOf deeply nested within multiple dub-DAGs.""" + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool): + with dsl.ExitHandler(cleanup()): + with dsl.ParallelFor([1, 2, 3]): + with dsl.If(execute_pipeline == True): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + x = dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + print_artifact(a=x) + + self.assertIn( + 'condition-branches-5', flip_coin_pipeline.pipeline_spec + .components['comp-condition-4'].dag.tasks) + # consumed from condition-branches + self.assertEqual( + flip_coin_pipeline.pipeline_spec.components['comp-condition-4'].dag + .tasks['print-artifact'].inputs.artifacts['a'].task_output_artifact, + pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec + .TaskOutputArtifactSpec( + producer_task='condition-branches-5', + output_artifact_key='pipelinechannel--condition-branches-5-oneof-1', + ), + ) + + def test_deeply_nested_returned_raises(self): + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + f'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output dsl\.OneOf from within the control flow group dsl\.ParallelFor\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool) -> str: + with dsl.ExitHandler(cleanup()): + with dsl.If(execute_pipeline == True): + with dsl.ParallelFor([1, 2, 3]): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + return dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + + def test_consume_at_wrong_level(self): + + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + f'Illegal task dependency across DSL context managers\. A downstream task cannot depend on an upstream task within a dsl\.If context unless the downstream is within that context too\. Found task print-artifact which depends on upstream task condition-branches-5 within an uncommon dsl\.If context\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool): + with dsl.ExitHandler(cleanup()): + with dsl.ParallelFor([1, 2, 3]): + with dsl.If(execute_pipeline == True): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + x = dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + # this is one level dedented from the permitted case + print_artifact(a=x) + + def test_return_at_wrong_level(self): + with self.assertRaisesRegex( + compiler_utils.InvalidTopologyException, + f'Pipeline outputs may only be returned from the top level of the pipeline function scope\. Got pipeline output dsl\.OneOf from within the control flow group dsl\.If\.' + ): + + @dsl.pipeline + def flip_coin_pipeline(execute_pipeline: bool): + with dsl.If(execute_pipeline == True): + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return_as_artifact( + text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return_as_artifact( + text='Got tails!') + # this is returned at the right level, but not permitted since it's still effectively returning from within the dsl.If group + return dsl.OneOf(print_task_1.outputs['a'], + print_task_2.outputs['a']) + + def test_oneof_in_condition(self): + """Tests that dsl.OneOf's channel can be consumed in a downstream group nested one level""" + + @dsl.pipeline + def roll_die_pipeline(repeat_on: str = 'Got heads!'): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + x = dsl.OneOf(t1.output, t2.output, t3.outputs['output_key']) + + with dsl.If(x == repeat_on): + print_and_return(text=x) + + # condition-branches surfaces + self.assertEqual( + roll_die_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = roll_die_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-2-Output', + producer_subtask='condition-3', + )) + self.assertEqual( + parameter_selectors[2], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-with-output-key-output_key', + producer_subtask='condition-4', + )) + # condition points to correct upstream output + self.assertEqual( + roll_die_pipeline.pipeline_spec.root.dag.tasks['condition-5'] + .trigger_policy.condition, + "inputs.parameter_values['pipelinechannel--condition-branches-1-pipelinechannel--condition-branches-1-oneof-1'] == inputs.parameter_values['pipelinechannel--repeat_on']" + ) + + def test_consumed_in_nested_groups(self): + """Tests that dsl.OneOf's channel can be consumed in a downstream group nested multiple levels""" + + @dsl.pipeline + def roll_die_pipeline( + repeat: bool = True, + rounds: List[str] = ['a', 'b', 'c'], + ): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + x = dsl.OneOf(t1.output, t2.output, t3.outputs['output_key']) + + with dsl.ParallelFor(rounds): + with dsl.If(repeat == True): + print_and_return(text=x) + + # condition-branches surfaces + self.assertEqual( + roll_die_pipeline.pipeline_spec + .components['comp-condition-branches-1'].output_definitions + .parameters['pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, + type_utils.STRING, + ) + parameter_selectors = roll_die_pipeline.pipeline_spec.components[ + 'comp-condition-branches-1'].dag.outputs.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].value_from_oneof.parameter_selectors + self.assertEqual( + parameter_selectors[0], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-Output', + producer_subtask='condition-2', + )) + self.assertEqual( + parameter_selectors[1], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-2-Output', + producer_subtask='condition-3', + )) + self.assertEqual( + parameter_selectors[2], + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + output_parameter_key='pipelinechannel--print-and-return-with-output-key-output_key', + producer_subtask='condition-4', + )) + # condition points to correct upstream output + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-6'] + .input_definitions.parameters[ + 'pipelinechannel--condition-branches-1-pipelinechannel--condition-branches-1-oneof-1'] + .parameter_type, type_utils.STRING) + # inner task consumes from condition input parameter + self.assertEqual( + roll_die_pipeline.pipeline_spec.components['comp-condition-6'].dag + .tasks['print-and-return-3'].inputs.parameters['text'] + .component_input_parameter, + 'pipelinechannel--condition-branches-1-pipelinechannel--condition-branches-1-oneof-1' + ) + + def test_oneof_in_fstring(self): + with self.assertRaisesRegex( + NotImplementedError, + f'dsl\.OneOf does not support string interpolation\.'): + + @dsl.pipeline + def roll_die_pipeline(): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + print_and_return( + text=f"Final result: {dsl.OneOf(t1.output, t2.output, t3.outputs['output_key'])}" + ) + + def test_type_checking_parameters(self): + with self.assertRaisesRegex( + type_utils.InconsistentTypeException, + "Incompatible argument passed to the input 'val' of component 'print-int': Argument type 'STRING' is incompatible with the input type 'NUMBER_INTEGER'", + ): + + @dsl.component + def print_int(val: int): + print(val) + + @dsl.pipeline + def roll_die_pipeline(): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + with dsl.Else(): + t3 = print_and_return_with_output_key(text='Draw!') + print_int( + val=dsl.OneOf(t1.output, t2.output, + t3.outputs['output_key'])) + + def test_oneof_of_oneof(self): + with self.assertRaisesRegex( + ValueError, + r'dsl.OneOf cannot be used inside of another dsl\.OneOf\.'): + + @dsl.pipeline + def roll_die_pipeline() -> str: + outer_flip_coin_task = flip_coin() + with dsl.If(outer_flip_coin_task.output == 'heads'): + inner_flip_coin_task = flip_coin() + with dsl.If(inner_flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Else(): + t2 = print_and_return(text='Got tails!') + t3 = dsl.OneOf(t1.output, t2.output) + with dsl.Else(): + t4 = print_and_return(text='First flip was not heads!') + return dsl.OneOf(t3, t4.output) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/compiler/compiler_utils.py b/sdk/python/kfp/compiler/compiler_utils.py index ccc6730b1e..0bc525e250 100644 --- a/sdk/python/kfp/compiler/compiler_utils.py +++ b/sdk/python/kfp/compiler/compiler_utils.py @@ -14,9 +14,10 @@ """Utility methods for compiler implementation that is IR-agnostic.""" import collections -from copy import deepcopy +import copy from typing import DefaultDict, Dict, List, Mapping, Set, Tuple, Union +from kfp import dsl from kfp.dsl import for_loop from kfp.dsl import pipeline_channel from kfp.dsl import pipeline_context @@ -258,10 +259,9 @@ def get_inputs_for_all_groups( if isinstance(channel_to_add, pipeline_channel.PipelineChannel): channels_to_add.append(channel_to_add) - if channel.task_name: + if channel.task: # The PipelineChannel is produced by a task. - - upstream_task = pipeline.tasks[channel.task_name] + upstream_task = channel.task upstream_groups, downstream_groups = ( _get_uncommon_ancestors( task_name_to_parent_groups=task_name_to_parent_groups, @@ -462,46 +462,116 @@ def get_outputs_for_all_groups( } outputs = collections.defaultdict(dict) - + processed_oneofs: Set[pipeline_channel.OneOfMixin] = set() # handle dsl.Collected consumed by tasks for task in pipeline.tasks.values(): for channel in task.channel_inputs: - if not isinstance(channel, for_loop.Collected): - continue - producer_task = pipeline.tasks[channel.task_name] - consumer_task = task - - upstream_groups, downstream_groups = ( - _get_uncommon_ancestors( - task_name_to_parent_groups=task_name_to_parent_groups, - group_name_to_parent_groups=group_name_to_parent_groups, - task1=producer_task, - task2=consumer_task, - )) - validate_parallel_for_fan_in_consumption_legal( - consumer_task_name=consumer_task.name, - upstream_groups=upstream_groups, - group_name_to_group=group_name_to_group, - ) + # TODO: migrate Collected to OneOfMixin style implementation, + # then simplify this logic to align with OneOfMixin logic + if isinstance(channel, dsl.Collected): + producer_task = pipeline.tasks[channel.task_name] + consumer_task = task + + upstream_groups, downstream_groups = ( + _get_uncommon_ancestors( + task_name_to_parent_groups=task_name_to_parent_groups, + group_name_to_parent_groups=group_name_to_parent_groups, + task1=producer_task, + task2=consumer_task, + )) + validate_parallel_for_fan_in_consumption_legal( + consumer_task_name=consumer_task.name, + upstream_groups=upstream_groups, + group_name_to_group=group_name_to_group, + ) + + # producer_task's immediate parent group and the name by which + # to surface the channel + surfaced_output_name = additional_input_name_for_pipeline_channel( + channel) + + # the highest-level task group that "consumes" the + # collected output + parent_consumer = downstream_groups[0] + producer_task_name = upstream_groups.pop() + + # process from the upstream groups from the inside out + for upstream_name in reversed(upstream_groups): + outputs[upstream_name][ + surfaced_output_name] = make_new_channel_for_collected_outputs( + channel_name=channel.name, + starting_channel=channel.output, + task_name=producer_task_name, + ) + + # on each iteration, mutate the channel being consumed so + # that it references the last parent group surfacer + channel.name = surfaced_output_name + channel.task_name = upstream_name + + # for the next iteration, set the consumer to the current + # surfacer (parent group) + producer_task_name = upstream_name - # producer_task's immediate parent group and the name by which - # to surface the channel + parent_of_current_surfacer = group_name_to_parent_groups[ + upstream_name][-2] + if parent_consumer in group_name_to_children[ + parent_of_current_surfacer]: + break + + elif isinstance(channel, pipeline_channel.OneOfMixin): + for inner_channel in channel.channels: + producer_task = pipeline.tasks[inner_channel.task_name] + consumer_task = task + upstream_groups, downstream_groups = ( + _get_uncommon_ancestors( + task_name_to_parent_groups=task_name_to_parent_groups, + group_name_to_parent_groups=group_name_to_parent_groups, + task1=producer_task, + task2=consumer_task, + )) + surfaced_output_name = additional_input_name_for_pipeline_channel( + inner_channel) + + # 1. get the oneof + # 2. find the task group that surfaced it + # 3. find the inner tasks reponsible + + for upstream_name in reversed(upstream_groups): + # skip the first task processed, since we don't need to add new outputs for the innermost task + if upstream_name == inner_channel.task.name: + continue + # # once we've hit the outermost condition-branches group, we're done + if upstream_name == channel.condition_branches_group.name: + outputs[upstream_name][channel.name] = channel + break + + # copy so we can update the inner channel for the next iteration + # use copy not deepcopy, since deepcopy will needlessly copy the entire pipeline + # this uses more memory than needed and some objects are uncopiable + outputs[upstream_name][ + surfaced_output_name] = copy.copy(inner_channel) + + inner_channel.name = surfaced_output_name + inner_channel.task_name = upstream_name + + processed_oneofs.add(channel) + + # handle dsl.Collected returned from pipeline + # TODO: consider migrating dsl.Collected returns to pattern used by dsl.OneOf, where the OneOf constructor returns a parameter/artifact channel, which fits in more cleanly into the existing compiler abtractions + for output_key, channel in pipeline_outputs_dict.items(): + if isinstance(channel, for_loop.Collected): surfaced_output_name = additional_input_name_for_pipeline_channel( channel) - - # the highest-level task group that "consumes" the - # collected output - parent_consumer = downstream_groups[0] + upstream_groups = task_name_to_parent_groups[channel.task_name][1:] producer_task_name = upstream_groups.pop() - - # process from the upstream groups from the inside out + # process upstream groups from the inside out, until getting to the pipeline level for upstream_name in reversed(upstream_groups): - outputs[upstream_name][ - surfaced_output_name] = make_new_channel_for_collected_outputs( - channel_name=channel.name, - starting_channel=channel.output, - task_name=producer_task_name, - ) + new_channel = make_new_channel_for_collected_outputs( + channel_name=channel.name, + starting_channel=channel.output, + task_name=producer_task_name, + ) # on each iteration, mutate the channel being consumed so # that it references the last parent group surfacer @@ -511,46 +581,46 @@ def get_outputs_for_all_groups( # for the next iteration, set the consumer to the current # surfacer (parent group) producer_task_name = upstream_name - - parent_of_current_surfacer = group_name_to_parent_groups[ - upstream_name][-2] - if parent_consumer in group_name_to_children[ - parent_of_current_surfacer]: - break - - # handle dsl.Collected returned from pipeline - for output_key, channel in pipeline_outputs_dict.items(): - if isinstance(channel, for_loop.Collected): - surfaced_output_name = additional_input_name_for_pipeline_channel( - channel) + outputs[upstream_name][surfaced_output_name] = new_channel + + # after surfacing from all inner TasksGroup, change the PipelineChannel output to also return from the correct TasksGroup + pipeline_outputs_dict[ + output_key] = make_new_channel_for_collected_outputs( + channel_name=surfaced_output_name, + starting_channel=channel.output, + task_name=upstream_name, + ) + elif isinstance(channel, pipeline_channel.OneOfMixin): + # if the output has already been consumed by a task before it is returned, we don't need to reprocess it + if channel in processed_oneofs: + continue + for inner_channel in channel.channels: + producer_task = pipeline.tasks[inner_channel.task_name] upstream_groups = task_name_to_parent_groups[ - channel.task_name][1:] - producer_task_name = upstream_groups.pop() - # process upstream groups from the inside out, until getting to the pipeline level - for upstream_name in reversed(upstream_groups): - new_channel = make_new_channel_for_collected_outputs( - channel_name=channel.name, - starting_channel=channel.output, - task_name=producer_task_name, - ) - - # on each iteration, mutate the channel being consumed so - # that it references the last parent group surfacer - channel.name = surfaced_output_name - channel.task_name = upstream_name + inner_channel.task_name][1:] + surfaced_output_name = additional_input_name_for_pipeline_channel( + inner_channel) - # for the next iteration, set the consumer to the current - # surfacer (parent group) - producer_task_name = upstream_name - outputs[upstream_name][surfaced_output_name] = new_channel - - # after surfacing from all inner TasksGroup, change the PipelineChannel output to also return from the correct TasksGroup - pipeline_outputs_dict[ - output_key] = make_new_channel_for_collected_outputs( - channel_name=surfaced_output_name, - starting_channel=channel.output, - task_name=upstream_name, - ) + # 1. get the oneof + # 2. find the task group that surfaced it + # 3. find the inner tasks reponsible + for upstream_name in reversed(upstream_groups): + # skip the first task processed, since we don't need to add new outputs for the innermost task + if upstream_name == inner_channel.task.name: + continue + # # once we've hit the outermost condition-branches group, we're done + if upstream_name == channel.condition_branches_group.name: + outputs[upstream_name][channel.name] = channel + break + + # copy so we can update the inner channel for the next iteration + # use copy not deepcopy, since deepcopy will needlessly copy the entire pipeline + # this uses more memory than needed and some objects are uncopiable + outputs[upstream_name][surfaced_output_name] = copy.copy( + inner_channel) + + inner_channel.name = surfaced_output_name + inner_channel.task_name = upstream_name return outputs, pipeline_outputs_dict @@ -633,22 +703,17 @@ def get_dependencies( """ dependencies = collections.defaultdict(set) for task in pipeline.tasks.values(): - upstream_task_names = set() + upstream_task_names: Set[Union[pipeline_task.PipelineTask, + tasks_group.TasksGroup]] = set() task_condition_inputs = list(condition_channels[task.name]) - for channel in task.channel_inputs + task_condition_inputs: - if channel.task_name: - upstream_task_names.add(channel.task_name) - upstream_task_names |= set(task.dependent_tasks) - - for upstream_task_name in upstream_task_names: - # the dependent op could be either a BaseOp or an opsgroup - if upstream_task_name in pipeline.tasks: - upstream_task = pipeline.tasks[upstream_task_name] - elif upstream_task_name in group_name_to_group: - upstream_task = group_name_to_group[upstream_task_name] - else: - raise ValueError( - f'Compiler cannot find task: {upstream_task_name}.') + all_channels = task.channel_inputs + task_condition_inputs + upstream_task_names.update( + {channel.task for channel in all_channels if channel.task}) + # dependent tasks is tasks on which .after was called and can only be the names of PipelineTasks, not TasksGroups + upstream_task_names.update( + {pipeline.tasks[after_task] for after_task in task.dependent_tasks}) + + for upstream_task in upstream_task_names: upstream_groups, downstream_groups = _get_uncommon_ancestors( task_name_to_parent_groups=task_name_to_parent_groups, @@ -658,7 +723,7 @@ def get_dependencies( ) # uncommon upstream ancestor check - uncommon_upstream_groups = deepcopy(upstream_groups) + uncommon_upstream_groups = copy.deepcopy(upstream_groups) uncommon_upstream_groups.remove( upstream_task.name ) # because a task's `upstream_groups` contains the task's name @@ -675,6 +740,8 @@ def get_dependencies( raise InvalidTopologyException( f'{ILLEGAL_CROSS_DAG_ERROR_PREFIX} A downstream task cannot depend on an upstream task within a dsl.{dependent_group.__class__.__name__} context unless the downstream is within that context too. Found task {task.name} which depends on upstream task {upstream_task.name} within an uncommon dsl.{dependent_group.__class__.__name__} context.' ) + # TODO: migrate Collected to OneOfMixin style implementation, + # then make this validation dsl.Collected-aware elif isinstance(dependent_group, tasks_group.ParallelFor): raise InvalidTopologyException( f'{ILLEGAL_CROSS_DAG_ERROR_PREFIX} A downstream task cannot depend on an upstream task within a dsl.{dependent_group.__class__.__name__} context unless the downstream is within that context too or the outputs are begin fanned-in to a list using dsl.{for_loop.Collected.__name__}. Found task {task.name} which depends on upstream task {upstream_task.name} within an uncommon dsl.{dependent_group.__class__.__name__} context.' diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 1c0b7aa463..1f972133c7 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -417,12 +417,13 @@ def _build_component_spec_from_component_spec_structure( return component_spec -def _connect_dag_outputs( +def connect_single_dag_output( component_spec: pipeline_spec_pb2.ComponentSpec, output_name: str, output_channel: pipeline_channel.PipelineChannel, ) -> None: - """Connects dag output to a subtask output. + """Connects a DAG output to a subtask output when the subtask output + contains only one channel (i.e., not OneOfMixin). Args: component_spec: The component spec to modify its dag outputs. @@ -451,14 +452,71 @@ def _connect_dag_outputs( output_name].value_from_parameter.output_parameter_key = output_channel.name +def connect_oneof_dag_output( + component_spec: pipeline_spec_pb2.ComponentSpec, + output_name: str, + oneof_output: pipeline_channel.OneOfMixin, +) -> None: + """Connects a output to the OneOf output returned by the DAG's internal + condition-branches group. + + Args: + component_spec: The component spec to modify its DAG outputs. + output_name: The name of the DAG output. + oneof_output: The OneOfMixin object returned by the pipeline (OneOf in user code). + """ + if isinstance(oneof_output, pipeline_channel.OneOfArtifact): + if output_name not in component_spec.output_definitions.artifacts: + raise ValueError( + f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.' + ) + for channel in oneof_output.channels: + component_spec.dag.outputs.artifacts[ + output_name].artifact_selectors.append( + pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( + producer_subtask=channel.task_name, + output_artifact_key=channel.name, + )) + if isinstance(oneof_output, pipeline_channel.OneOfParameter): + if output_name not in component_spec.output_definitions.parameters: + raise ValueError( + f'Pipeline or component output not defined: {output_name}. You may be missing a type annotation.' + ) + for channel in oneof_output.channels: + component_spec.dag.outputs.parameters[ + output_name].value_from_oneof.parameter_selectors.append( + pipeline_spec_pb2.DagOutputsSpec.ParameterSelectorSpec( + producer_subtask=channel.task_name, + output_parameter_key=channel.name, + )) + + def _build_dag_outputs( component_spec: pipeline_spec_pb2.ComponentSpec, dag_outputs: Dict[str, pipeline_channel.PipelineChannel], ) -> None: - """Builds DAG output spec.""" + """Connects the DAG's outputs to a TaskGroup's ComponentSpec and validates + it is present in the component interface. + + Args: + component_spec: The ComponentSpec. + dag_outputs: Dictionary of output key to output channel. + """ for output_name, output_channel in dag_outputs.items(): - _connect_dag_outputs(component_spec, output_name, output_channel) - # Valid dag outputs covers all outptus in component definition. + if not isinstance(output_channel, pipeline_channel.PipelineChannel): + raise ValueError( + f"Got unknown pipeline output '{output_name}' of type {output_channel}." + ) + connect_single_dag_output(component_spec, output_name, output_channel) + + validate_dag_outputs(component_spec) + + +def validate_dag_outputs( + component_spec: pipeline_spec_pb2.ComponentSpec) -> None: + """Validates the DAG's ComponentSpec specifies the source task for all of + its ComponentSpec inputs (input_definitions) and outputs + (output_definitions).""" for output_name in component_spec.output_definitions.artifacts: if output_name not in component_spec.dag.outputs.artifacts: raise ValueError(f'Missing pipeline output: {output_name}.') @@ -467,6 +525,31 @@ def _build_dag_outputs( raise ValueError(f'Missing pipeline output: {output_name}.') +def build_oneof_dag_outputs( + component_spec: pipeline_spec_pb2.ComponentSpec, + oneof_outputs: Dict[str, pipeline_channel.OneOfMixin], +) -> None: + """Connects the DAG's OneOf outputs to a TaskGroup's ComponentSpec and + validates it is present in the component interface. + + Args: + component_spec: The ComponentSpec. + oneof_outputs: Dictionary of output key to OneOf output channel. + """ + for output_name, oneof_output in oneof_outputs.items(): + for channel in oneof_output.channels: + if not isinstance(channel, pipeline_channel.PipelineChannel): + raise ValueError( + f"Got unknown pipeline output '{output_name}' of type {type(channel)}." + ) + connect_oneof_dag_output( + component_spec, + output_name, + oneof_output, + ) + validate_dag_outputs(component_spec) + + def build_importer_spec_for_task( task: pipeline_task.PipelineTask ) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec: @@ -1290,7 +1373,7 @@ def build_spec_by_group( elif isinstance(subgroup, tasks_group.ConditionBranches): subgroup_component_spec = build_component_spec_for_group( input_pipeline_channels=subgroup_input_channels, - output_pipeline_channels={}, + output_pipeline_channels=subgroup_output_channels, ) subgroup_task_spec = build_task_spec_for_group( @@ -1299,6 +1382,9 @@ def build_spec_by_group( tasks_in_current_dag=tasks_in_current_dag, is_parent_component_root=is_parent_component_root, ) + # oneof is the only type of output a ConditionBranches group can have + build_oneof_dag_outputs(subgroup_component_spec, + subgroup_output_channels) else: raise RuntimeError( @@ -1694,6 +1780,28 @@ def _rename_component_refs( old_name_to_new_name[old_component_name]].CopyFrom(component_spec) +def validate_pipeline_outputs_dict( + pipeline_outputs_dict: Dict[str, pipeline_channel.PipelineChannel]): + for channel in pipeline_outputs_dict.values(): + if isinstance(channel, for_loop.Collected): + # this validation doesn't apply to Collected + continue + + elif isinstance(channel, pipeline_channel.OneOfMixin): + if channel.condition_branches_group.parent_task_group.group_type != tasks_group.TasksGroupType.PIPELINE: + raise compiler_utils.InvalidTopologyException( + f'Pipeline outputs may only be returned from the top level of the pipeline function scope. Got pipeline output dsl.{pipeline_channel.OneOf.__name__} from within the control flow group dsl.{channel.condition_branches_group.parent_task_group.__class__.__name__}.' + ) + + elif isinstance(channel, pipeline_channel.PipelineChannel): + if channel.task.parent_task_group.group_type != tasks_group.TasksGroupType.PIPELINE: + raise compiler_utils.InvalidTopologyException( + f'Pipeline outputs may only be returned from the top level of the pipeline function scope. Got pipeline output from within the control flow group dsl.{channel.task.parent_task_group.__class__.__name__}.' + ) + else: + raise ValueError(f'Got unknown pipeline output: {channel}.') + + def create_pipeline_spec( pipeline: pipeline_context.Pipeline, component_spec: structures.ComponentSpec, @@ -1729,6 +1837,8 @@ def create_pipeline_spec( # an output from a task in a condition group, for example, which isn't # caught until submission time using Vertex SDK client pipeline_outputs_dict = convert_pipeline_outputs_to_dict(pipeline_outputs) + validate_pipeline_outputs_dict(pipeline_outputs_dict) + root_group = pipeline.groups[0] all_groups = compiler_utils.get_all_groups(root_group) diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index 001226b02c..c2c70c847d 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -229,8 +229,10 @@ def my_pipeline(): if os.environ.get('_KFP_RUNTIME', 'false') != 'true': from kfp.dsl.component_decorator import component from kfp.dsl.container_component_decorator import container_component + # TODO: Collected should be moved to pipeline_channel.py, consistent with OneOf from kfp.dsl.for_loop import Collected from kfp.dsl.importer_node import importer + from kfp.dsl.pipeline_channel import OneOf from kfp.dsl.pipeline_context import pipeline from kfp.dsl.pipeline_task import PipelineTask from kfp.dsl.placeholders import ConcatPlaceholder @@ -252,6 +254,7 @@ def my_pipeline(): 'If', 'Elif', 'Else', + 'OneOf', 'ExitHandler', 'ParallelFor', 'Collected', diff --git a/sdk/python/kfp/dsl/for_loop.py b/sdk/python/kfp/dsl/for_loop.py index 5381576631..e49c9a2951 100644 --- a/sdk/python/kfp/dsl/for_loop.py +++ b/sdk/python/kfp/dsl/for_loop.py @@ -274,6 +274,7 @@ def _get_name_override(self, loop_arg_name: str, subvar_name: str) -> str: return f'{loop_arg_name}{self.SUBVAR_NAME_DELIMITER}{subvar_name}' +# TODO: migrate Collected to OneOfMixin style implementation class Collected(pipeline_channel.PipelineChannel): """For collecting into a list the output from a task in dsl.ParallelFor loops. @@ -313,3 +314,13 @@ def __init__( channel_type=channel_type, task_name=output.task_name, ) + self._validate_no_oneof_channel(self.output) + + def _validate_no_oneof_channel( + self, channel: Union[pipeline_channel.PipelineParameterChannel, + pipeline_channel.PipelineArtifactChannel] + ) -> None: + if isinstance(channel, pipeline_channel.OneOfMixin): + raise ValueError( + f'dsl.{pipeline_channel.OneOf.__name__} cannot be used inside of dsl.{Collected.__name__}.' + ) diff --git a/sdk/python/kfp/dsl/pipeline_channel.py b/sdk/python/kfp/dsl/pipeline_channel.py index 4841928bbf..6adb52525c 100644 --- a/sdk/python/kfp/dsl/pipeline_channel.py +++ b/sdk/python/kfp/dsl/pipeline_channel.py @@ -102,12 +102,31 @@ def __init__( self.task_name = task_name or None from kfp.dsl import pipeline_context - default_pipeline = pipeline_context.Pipeline.get_default_pipeline() - if self.task_name is not None and default_pipeline is not None and default_pipeline.tasks: - self.task = pipeline_context.Pipeline.get_default_pipeline().tasks[ - self.task_name] - else: - self.task = None + self.pipeline = pipeline_context.Pipeline.get_default_pipeline() + + @property + def task(self) -> Union['PipelineTask', 'TasksGroup']: + # TODO: migrate Collected to OneOfMixin style implementation, + # then move this out of a property + if self.task_name is None or self.pipeline is None: + return None + + if self.task_name in self.pipeline.tasks: + return self.pipeline.tasks[self.task_name] + + from kfp.compiler import compiler_utils + all_groups = compiler_utils.get_all_groups(self.pipeline.groups[0]) + # pipeline hasn't exited, so it doesn't have a name + all_groups_no_pipeline = all_groups[1:] + group_name_to_group = { + group.name: group for group in all_groups_no_pipeline + } + if self.task_name in group_name_to_group: + return group_name_to_group[self.task_name] + + raise ValueError( + f"PipelineChannel task name '{self.task_name}' not found in pipeline." + ) @property def full_name(self) -> str: @@ -265,6 +284,228 @@ def __init__( ) +class OneOfMixin(PipelineChannel): + """Shared functionality for OneOfParameter and OneOfAritfact.""" + + def _set_condition_branches_group( + self, channels: List[Union[PipelineParameterChannel, + PipelineArtifactChannel]] + ) -> None: + # avoid circular import + from kfp.dsl import tasks_group + + # .condition_branches_group could really be collapsed into just .task, + # but we prefer keeping both for clarity in the rest of the compiler + # code. When the code is logically related to a + # condition_branches_group, it aids understanding to reference this + # attribute name. When the code is trying to treat the OneOfMixin like + # a typical PipelineChannel, it aids to reference task. + self.condition_branches_group: tasks_group.ConditionBranches = channels[ + 0].task.parent_task_group.parent_task_group + + def _make_oneof_name(self) -> str: + # avoid circular imports + from kfp.compiler import compiler_utils + + # This is a different type of "injected channel". + # We know that this output will _always_ be a pipeline channel, so we + # set the pipeline-channel-- prefix immediately (here). + # In the downstream compiler logic, we get to treat this output like a + # normal task output. + return compiler_utils.additional_input_name_for_pipeline_channel( + f'{self.condition_branches_group.name}-oneof-{self.condition_branches_group.get_oneof_id()}' + ) + + def _validate_channels( + self, + channels: List[Union[PipelineParameterChannel, + PipelineArtifactChannel]], + ): + self._validate_no_collected_channel(channels) + self._validate_no_oneof_channel(channels) + self._validate_no_mix_of_parameters_and_artifacts(channels) + self._validate_has_else_group(self.condition_branches_group) + + def _validate_no_collected_channel( + self, channels: List[Union[PipelineParameterChannel, + PipelineArtifactChannel]] + ) -> None: + # avoid circular imports + from kfp.dsl import for_loop + if any(isinstance(channel, for_loop.Collected) for channel in channels): + raise ValueError( + f'dsl.{for_loop.Collected.__name__} cannot be used inside of dsl.{OneOf.__name__}.' + ) + + def _validate_no_oneof_channel( + self, channels: List[Union[PipelineParameterChannel, + PipelineArtifactChannel]] + ) -> None: + if any(isinstance(channel, OneOfMixin) for channel in channels): + raise ValueError( + f'dsl.{OneOf.__name__} cannot be used inside of another dsl.{OneOf.__name__}.' + ) + + def _validate_no_mix_of_parameters_and_artifacts( + self, channels: List[Union[PipelineParameterChannel, + PipelineArtifactChannel]] + ) -> None: + + first_channel = channels[0] + if isinstance(first_channel, PipelineParameterChannel): + first_channel_type = PipelineParameterChannel + else: + first_channel_type = PipelineArtifactChannel + + for channel in channels: + # if not all channels match the first channel's type, then there + # is a mix of parameter and artifact channels + if not isinstance(channel, first_channel_type): + raise TypeError( + f'Task outputs passed to dsl.{OneOf.__name__} must be the same type. Found a mix of parameters and artifacts passed to dsl.{OneOf.__name__}.' + ) + + def _validate_has_else_group( + self, + parent_group: 'tasks_group.ConditionBranches', + ) -> None: + # avoid circular imports + from kfp.dsl import tasks_group + if not isinstance(parent_group.groups[-1], tasks_group.Else): + raise ValueError( + f'dsl.{OneOf.__name__} must include an output from a task in a dsl.{tasks_group.Else.__name__} group to ensure at least one output is available at runtime.' + ) + + def __str__(self): + # supporting oneof in f-strings is technically feasible, but would + # require somehow encoding all of the oneof channels into the + # f-string + # another way to do this would be to maintain a pipeline-level + # map of PipelineChannels and encode a lookup key in the f-string + # the combination of OneOf and an f-string is not common, so prefer + # deferring implementation + raise NotImplementedError( + f'dsl.{OneOf.__name__} does not support string interpolation.') + + @property + def pattern(self) -> str: + # override self.pattern to avoid calling __str__, allowing us to block f-strings for now + # this makes it OneOfMixin hashable for use in sets/dicts + task_name = self.task_name or '' + name = self.name + channel_type = self.channel_type or '' + if isinstance(channel_type, dict): + channel_type = json.dumps(channel_type) + return _PIPELINE_CHANNEL_PLACEHOLDER_TEMPLATE % (task_name, name, + channel_type) + + +# splitting out OneOf into subclasses significantly decreases the amount of +# branching in downstream compiler logic, since the +# isinstance(, PipelineParameterChannel/PipelineArtifactChannel) +# checks continue to behave in desirable ways +class OneOfParameter(PipelineParameterChannel, OneOfMixin): + """OneOf that results in a parameter channel for all downstream tasks.""" + + def __init__(self, channels: List[PipelineParameterChannel]) -> None: + self.channels = channels + self._set_condition_branches_group(channels) + super().__init__( + name=self._make_oneof_name(), + channel_type=channels[0].channel_type, + task_name=None, + ) + self.task_name = self.condition_branches_group.name + self.channels = channels + self._validate_channels(channels) + self._validate_same_kfp_type(channels) + + def _validate_same_kfp_type( + self, channels: List[PipelineParameterChannel]) -> None: + expected_type = channels[0].channel_type + for i, channel in enumerate(channels[1:], start=1): + if channel.channel_type != expected_type: + raise TypeError( + f'Task outputs passed to dsl.{OneOf.__name__} must be the same type. Got two channels with different types: {expected_type} at index 0 and {channel.channel_type} at index {i}.' + ) + + +class OneOfArtifact(PipelineArtifactChannel, OneOfMixin): + """OneOf that results in an artifact channel for all downstream tasks.""" + + def __init__(self, channels: List[PipelineArtifactChannel]) -> None: + self.channels = channels + self._set_condition_branches_group(channels) + super().__init__( + name=self._make_oneof_name(), + channel_type=channels[0].channel_type, + task_name=None, + is_artifact_list=channels[0].is_artifact_list, + ) + self.task_name = self.condition_branches_group.name + self._validate_channels(channels) + self._validate_same_kfp_type(channels) + + def _validate_same_kfp_type( + self, channels: List[PipelineArtifactChannel]) -> None: + # Unlike for component interface type checking where anything is + # passable to Artifact, we should require the output artifacts for a + # OneOf to be the same. This reduces the complexity/ambiguity for the + # user of the actual type checking logic. What should the type checking + # behavior be if the OneOf surfaces an Artifact and a Dataset? We can + # always loosen backward compatibly in the future, so prefer starting + # conservatively. + expected_type = channels[0].channel_type + expected_is_list = channels[0].is_artifact_list + for i, channel in enumerate(channels[1:], start=1): + if channel.channel_type != expected_type or channel.is_artifact_list != expected_is_list: + raise TypeError( + f'Task outputs passed to dsl.{OneOf.__name__} must be the same type. Got two channels with different types: {expected_type} at index 0 and {channel.channel_type} at index {i}.' + ) + + +class OneOf: + """For collecting mutually exclusive outputs from conditional branches into + a single pipeline channel. + + Args: + channels: The channels to collect into a OneOf. Must be of the same type. + + Example: + :: + + @dsl.pipeline + def flip_coin_pipeline() -> str: + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + print_task_1 = print_and_return(text='Got heads!') + with dsl.Else(): + print_task_2 = print_and_return(text='Got tails!') + + # use the output from the branch that gets executed + oneof = dsl.OneOf(print_task_1.output, print_task_2.output) + + # consume it + print_and_return(text=oneof) + + # return it + return oneof + """ + + def __new__( + cls, *channels: Union[PipelineParameterChannel, PipelineArtifactChannel] + ) -> Union[OneOfParameter, OneOfArtifact]: + first_channel = channels[0] + if isinstance(first_channel, PipelineParameterChannel): + return OneOfParameter(channels=list(channels)) + elif isinstance(first_channel, PipelineArtifactChannel): + return OneOfArtifact(channels=list(channels)) + else: + raise ValueError( + f'Got unknown input to dsl.{OneOf.__name__} with type {type(first_channel)}.' + ) + + def create_pipeline_channel( name: str, channel_type: Union[str, Dict], diff --git a/sdk/python/kfp/dsl/pipeline_channel_test.py b/sdk/python/kfp/dsl/pipeline_channel_test.py index 4de0e84a25..b0b72be083 100644 --- a/sdk/python/kfp/dsl/pipeline_channel_test.py +++ b/sdk/python/kfp/dsl/pipeline_channel_test.py @@ -13,10 +13,14 @@ # limitations under the License. """Tests for kfp.dsl.pipeline_channel.""" +from typing import List import unittest from absl.testing import parameterized from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset +from kfp.dsl import Output from kfp.dsl import pipeline_channel @@ -156,19 +160,229 @@ def test_extract_pipeline_channels(self): self.assertListEqual([p1, p2, p3], params) +@dsl.component +def string_comp() -> str: + return 'text' + + +@dsl.component +def list_comp() -> List[str]: + return ['text'] + + +@dsl.component +def roll_three_sided_die() -> str: + import random + val = random.randint(0, 2) + + if val == 0: + return 'heads' + elif val == 1: + return 'tails' + else: + return 'draw' + + +@dsl.component +def print_and_return(text: str) -> str: + print(text) + return text + + class TestCanAccessTask(unittest.TestCase): def test(self): - @dsl.component - def comp() -> str: - return 'text' - @dsl.pipeline def my_pipeline(): - op1 = comp() + op1 = string_comp() self.assertEqual(op1.output.task, op1) +class TestOneOfAndCollectedNotComposable(unittest.TestCase): + + def test_collected_in_oneof(self): + with self.assertRaisesRegex( + ValueError, + 'dsl.Collected cannot be used inside of dsl.OneOf.'): + + @dsl.pipeline + def my_pipeline(x: str): + with dsl.If(x == 'foo'): + t1 = list_comp() + with dsl.Else(): + with dsl.ParallelFor([1, 2, 3]): + t2 = string_comp() + collected = dsl.Collected(t2.output) + # test cases doesn't return or pass to task to ensure validation is in the OneOf + dsl.OneOf(t1.output, collected) + + def test_oneof_in_collected(self): + with self.assertRaisesRegex( + ValueError, + 'dsl.OneOf cannot be used inside of dsl.Collected.'): + + @dsl.pipeline + def my_pipeline(x: str): + with dsl.ParallelFor([1, 2, 3]): + with dsl.If(x == 'foo'): + t1 = string_comp() + with dsl.Else(): + t2 = string_comp() + oneof = dsl.OneOf(t1.output, t2.output) + # test cases doesn't return or pass to task to ensure validation is in the Collected constructor + dsl.Collected(oneof) + + +class TestOneOfRequiresSameType(unittest.TestCase): + + def test_same_parameter_type(self): + + @dsl.pipeline + def my_pipeline(x: str) -> str: + with dsl.If(x == 'foo'): + t1 = string_comp() + with dsl.Else(): + t2 = string_comp() + return dsl.OneOf(t1.output, t2.output) + + self.assertEqual( + my_pipeline.pipeline_spec.components['comp-condition-branches-1'] + .output_definitions.parameters[ + 'pipelinechannel--condition-branches-1-oneof-1'].parameter_type, + 3) + + def test_different_parameter_types(self): + + with self.assertRaisesRegex( + TypeError, + r'Task outputs passed to dsl\.OneOf must be the same type. Got two channels with different types: String at index 0 and typing\.List\[str\] at index 1\.' + ): + + @dsl.pipeline + def my_pipeline(x: str) -> str: + with dsl.If(x == 'foo'): + t1 = string_comp() + with dsl.Else(): + t2 = list_comp() + return dsl.OneOf(t1.output, t2.output) + + def test_same_artifact_type(self): + + @dsl.component + def artifact_comp(out: Output[Artifact]): + with open(out.path, 'w') as f: + f.write('foo') + + @dsl.pipeline + def my_pipeline(x: str) -> Artifact: + with dsl.If(x == 'foo'): + t1 = artifact_comp() + with dsl.Else(): + t2 = artifact_comp() + return dsl.OneOf(t1.outputs['out'], t2.outputs['out']) + + self.assertEqual( + my_pipeline.pipeline_spec.components['comp-condition-branches-1'] + .output_definitions + .artifacts['pipelinechannel--condition-branches-1-oneof-1'] + .artifact_type.schema_title, + 'system.Artifact', + ) + self.assertEqual( + my_pipeline.pipeline_spec.components['comp-condition-branches-1'] + .output_definitions + .artifacts['pipelinechannel--condition-branches-1-oneof-1'] + .artifact_type.schema_version, + '0.0.1', + ) + + def test_different_artifact_type(self): + + @dsl.component + def artifact_comp_one(out: Output[Artifact]): + with open(out.path, 'w') as f: + f.write('foo') + + @dsl.component + def artifact_comp_two(out: Output[Dataset]): + with open(out.path, 'w') as f: + f.write('foo') + + with self.assertRaisesRegex( + TypeError, + r'Task outputs passed to dsl\.OneOf must be the same type. Got two channels with different types: system.Artifact@0.0.1 at index 0 and system.Dataset@0.0.1 at index 1\.' + ): + + @dsl.pipeline + def my_pipeline(x: str) -> Artifact: + with dsl.If(x == 'foo'): + t1 = artifact_comp_one() + with dsl.Else(): + t2 = artifact_comp_two() + return dsl.OneOf(t1.outputs['out'], t2.outputs['out']) + + def test_different_artifact_type_due_to_list(self): + # if we ever support list of artifact outputs from components, this test will fail, which is good because it needs to be changed + + with self.assertRaisesRegex( + ValueError, + r"Output lists of artifacts are only supported for pipelines\. Got output list of artifacts for output parameter 'out' of component 'artifact-comp-two'\." + ): + + @dsl.component + def artifact_comp_one(out: Output[Artifact]): + with open(out.path, 'w') as f: + f.write('foo') + + @dsl.component + def artifact_comp_two(out: Output[List[Artifact]]): + with open(out.path, 'w') as f: + f.write('foo') + + @dsl.pipeline + def my_pipeline(x: str) -> Artifact: + with dsl.If(x == 'foo'): + t1 = artifact_comp_one() + with dsl.Else(): + t2 = artifact_comp_two() + return dsl.OneOf(t1.outputs['out'], t2.outputs['out']) + + def test_parameters_mixed_with_artifacts(self): + + @dsl.component + def artifact_comp(out: Output[Artifact]): + with open(out.path, 'w') as f: + f.write('foo') + + with self.assertRaisesRegex( + TypeError, + r'Task outputs passed to dsl\.OneOf must be the same type\. Found a mix of parameters and artifacts passed to dsl\.OneOf\.' + ): + + @dsl.pipeline + def my_pipeline(x: str) -> str: + with dsl.If(x == 'foo'): + t1 = artifact_comp() + with dsl.Else(): + t2 = string_comp() + return dsl.OneOf(t1.output, t2.output) + + def test_no_else_raises(self): + with self.assertRaisesRegex( + ValueError, + r'dsl\.OneOf must include an output from a task in a dsl\.Else group to ensure at least one output is available at runtime\.' + ): + + @dsl.pipeline + def roll_die_pipeline(): + flip_coin_task = roll_three_sided_die() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = print_and_return(text='Got heads!') + with dsl.Elif(flip_coin_task.output == 'tails'): + t2 = print_and_return(text='Got tails!') + print_and_return(text=dsl.OneOf(t1.output, t2.output)) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/pipeline_context.py b/sdk/python/kfp/dsl/pipeline_context.py index 72ada197ae..4881bc5680 100644 --- a/sdk/python/kfp/dsl/pipeline_context.py +++ b/sdk/python/kfp/dsl/pipeline_context.py @@ -182,6 +182,7 @@ def push_tasks_group(self, group: 'tasks_group.TasksGroup'): group: A TasksGroup. Typically it is one of ExitHandler, Condition, and ParallelFor. """ + group.parent_task_group = self.get_parent_group() self.groups[-1].groups.append(group) self.groups.append(group) @@ -195,6 +196,9 @@ def get_last_tasks_group(self) -> Optional['tasks_group.TasksGroup']: groups = self.groups[-1].groups return groups[-1] if groups else None + def get_parent_group(self) -> 'tasks_group.TasksGroup': + return self.groups[-1] + def remove_task_from_groups(self, task: pipeline_task.PipelineTask): """Removes a task from the pipeline. diff --git a/sdk/python/kfp/dsl/tasks_group.py b/sdk/python/kfp/dsl/tasks_group.py index 2d4bb8d693..3f0f758bbd 100644 --- a/sdk/python/kfp/dsl/tasks_group.py +++ b/sdk/python/kfp/dsl/tasks_group.py @@ -68,6 +68,8 @@ def __init__( self.display_name = name self.dependencies = [] self.is_root = is_root + # backref to parent, set when the pipeline is called in pipeline_context + self.parent_task_group: Optional[TasksGroup] = None def __enter__(self): if not pipeline_context.Pipeline.get_default_pipeline(): @@ -142,6 +144,7 @@ def __init__( class ConditionBranches(TasksGroup): + _oneof_id = 0 def __init__(self) -> None: super().__init__( @@ -150,6 +153,16 @@ def __init__(self) -> None: is_root=False, ) + def get_oneof_id(self) -> int: + """Incrementor for uniquely identifying a OneOf for the parent + ConditionBranches group. + + This is analogous to incrementing a unique identifier for tasks + groups belonging to a pipeline. + """ + self._oneof_id += 1 + return self._oneof_id + class _ConditionBase(TasksGroup): """Parent class for condition control flow context managers (Condition, If, diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.py b/sdk/python/test_data/pipelines/if_elif_else_complex.py index 45efe58cd2..ea616d9bdf 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.py +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.py @@ -59,18 +59,24 @@ def lucky_number_pipeline(add_drumroll: bool = True, even_or_odd_task = is_even_or_odd(num=int_task.output) with dsl.If(even_or_odd_task.output == 'even'): - print_and_return(text='Got a low even number!') + t1 = print_and_return(text='Got a low even number!') with dsl.Else(): - print_and_return(text='Got a low odd number!') + t2 = print_and_return(text='Got a low odd number!') + + repeater_task = print_and_return( + text=dsl.OneOf(t1.output, t2.output)) with dsl.Elif(int_task.output > 5000): even_or_odd_task = is_even_or_odd(num=int_task.output) with dsl.If(even_or_odd_task.output == 'even'): - print_and_return(text='Got a high even number!') + t3 = print_and_return(text='Got a high even number!') with dsl.Else(): - print_and_return(text='Got a high odd number!') + t4 = print_and_return(text='Got a high odd number!') + + repeater_task = print_and_return( + text=dsl.OneOf(t3.output, t4.output)) with dsl.Else(): print_and_return( diff --git a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml index 9f14ee8b69..b1f5520ba1 100644 --- a/sdk/python/test_data/pipelines/if_elif_else_complex.yaml +++ b/sdk/python/test_data/pipelines/if_elif_else_complex.yaml @@ -7,46 +7,66 @@ components: comp-condition-11: dag: + outputs: + parameters: + pipelinechannel--print-and-return-5-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-5 tasks: - print-and-return-4: + print-and-return-5: cachingOptions: enableCache: true componentRef: - name: comp-print-and-return-4 + name: comp-print-and-return-5 inputs: parameters: text: runtimeValue: constant: Got a high even number! taskInfo: - name: print-and-return-4 + name: print-and-return-5 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-2-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-5-Output: + parameterType: STRING comp-condition-12: dag: + outputs: + parameters: + pipelinechannel--print-and-return-6-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-6 tasks: - print-and-return-5: + print-and-return-6: cachingOptions: enableCache: true componentRef: - name: comp-print-and-return-5 + name: comp-print-and-return-6 inputs: parameters: text: runtimeValue: constant: Got a high odd number! taskInfo: - name: print-and-return-5 + name: print-and-return-6 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-2-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-6-Output: + parameterType: STRING comp-condition-13: dag: tasks: @@ -64,11 +84,11 @@ components: triggerPolicy: condition: inputs.parameter_values['pipelinechannel--repeat_if_lucky_number'] == true - print-and-return-6: + print-and-return-8: cachingOptions: enableCache: true componentRef: - name: comp-print-and-return-6 + name: comp-print-and-return-8 inputs: parameters: text: @@ -76,7 +96,7 @@ components: constant: 'Announcing: Got the lucky number 5000! A one in 10,000 chance.' taskInfo: - name: print-and-return-6 + name: print-and-return-8 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: @@ -153,6 +173,12 @@ components: parameterType: NUMBER_INTEGER comp-condition-6: dag: + outputs: + parameters: + pipelinechannel--print-and-return-2-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-2 tasks: print-and-return-2: cachingOptions: @@ -172,8 +198,18 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-2-Output: + parameterType: STRING comp-condition-7: dag: + outputs: + parameters: + pipelinechannel--print-and-return-3-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-3 tasks: print-and-return-3: cachingOptions: @@ -193,6 +229,10 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-3-Output: + parameterType: STRING comp-condition-8: dag: tasks: @@ -222,6 +262,21 @@ components: componentInputParameter: pipelinechannel--int-0-to-9999-Output taskInfo: name: is-even-or-odd + print-and-return-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-4 + dependentTasks: + - condition-branches-5 + inputs: + parameters: + text: + taskOutputParameter: + outputParameterKey: pipelinechannel--condition-branches-5-oneof-1 + producerTask: condition-branches-5 + taskInfo: + name: print-and-return-4 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: @@ -255,12 +310,36 @@ components: componentInputParameter: pipelinechannel--int-0-to-9999-Output taskInfo: name: is-even-or-odd-2 + print-and-return-7: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-7 + dependentTasks: + - condition-branches-10 + inputs: + parameters: + text: + taskOutputParameter: + outputParameterKey: pipelinechannel--condition-branches-10-oneof-1 + producerTask: condition-branches-10 + taskInfo: + name: print-and-return-7 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: parameterType: NUMBER_INTEGER comp-condition-branches-10: dag: + outputs: + parameters: + pipelinechannel--condition-branches-10-oneof-1: + valueFromOneof: + parameterSelectors: + - outputParameterKey: pipelinechannel--print-and-return-5-Output + producerSubtask: condition-11 + - outputParameterKey: pipelinechannel--print-and-return-6-Output + producerSubtask: condition-12 tasks: condition-11: componentRef: @@ -296,6 +375,10 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-2-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--condition-branches-10-oneof-1: + parameterType: STRING comp-condition-branches-4: dag: tasks: @@ -347,6 +430,15 @@ components: parameterType: BOOLEAN comp-condition-branches-5: dag: + outputs: + parameters: + pipelinechannel--condition-branches-5-oneof-1: + valueFromOneof: + parameterSelectors: + - outputParameterKey: pipelinechannel--print-and-return-2-Output + producerSubtask: condition-6 + - outputParameterKey: pipelinechannel--print-and-return-3-Output + producerSubtask: condition-7 tasks: condition-6: componentRef: @@ -382,6 +474,10 @@ components: parameterType: NUMBER_INTEGER pipelinechannel--is-even-or-odd-Output: parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--condition-branches-5-oneof-1: + parameterType: STRING comp-for-loop-1: dag: outputs: @@ -443,11 +539,11 @@ components: comp-for-loop-16: dag: tasks: - print-and-return-7: + print-and-return-9: cachingOptions: enableCache: true componentRef: - name: comp-print-and-return-7 + name: comp-print-and-return-9 inputs: parameters: text: @@ -455,7 +551,7 @@ components: constant: 'Announcing again: Got the lucky number 5000! A one in 10,000 chance.' taskInfo: - name: print-and-return-7 + name: print-and-return-9 inputDefinitions: parameters: pipelinechannel--int-0-to-9999-Output: @@ -560,6 +656,26 @@ components: parameters: Output: parameterType: STRING + comp-print-and-return-8: + executorLabel: exec-print-and-return-8 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-9: + executorLabel: exec-print-and-return-9 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING comp-print-ints: executorLabel: exec-print-ints inputDefinitions: @@ -849,6 +965,64 @@ deploymentSpec: - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-8: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-9: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" diff --git a/sdk/python/test_data/pipelines/if_elif_else.py b/sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.py similarity index 66% rename from sdk/python/test_data/pipelines/if_elif_else.py rename to sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.py index fdaa3428f6..7e0dc1b57f 100644 --- a/sdk/python/test_data/pipelines/if_elif_else.py +++ b/sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.py @@ -34,18 +34,32 @@ def print_and_return(text: str) -> str: return text +@dsl.component +def special_print_and_return(text: str, output_key: dsl.OutputPath(str)): + print('Got the special state:', text) + with open(output_key, 'w') as f: + f.write(text) + + @dsl.pipeline -def roll_die_pipeline(): +def roll_die_pipeline() -> str: flip_coin_task = flip_three_sided_die() with dsl.If(flip_coin_task.output == 'heads'): - print_and_return(text='Got heads!') + t1 = print_and_return(text='Got heads!') with dsl.Elif(flip_coin_task.output == 'tails'): - print_and_return(text='Got tails!') + t2 = print_and_return(text='Got tails!') with dsl.Else(): - print_and_return(text='Draw!') + t3 = special_print_and_return(text='Draw!') + return dsl.OneOf(t1.output, t2.output, t3.outputs['output_key']) + + +@dsl.pipeline +def outer_pipeline() -> str: + flip_coin_task = roll_die_pipeline() + return print_and_return(text=flip_coin_task.output).output if __name__ == '__main__': compiler.Compiler().compile( - pipeline_func=roll_die_pipeline, + pipeline_func=outer_pipeline, package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.yaml b/sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.yaml new file mode 100644 index 0000000000..0915994760 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_elif_else_with_oneof_parameters.yaml @@ -0,0 +1,420 @@ +# PIPELINE DEFINITION +# Name: outer-pipeline +# Outputs: +# Output: str +components: + comp-condition-2: + dag: + outputs: + parameters: + pipelinechannel--print-and-return-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return + tasks: + print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return + inputs: + parameters: + text: + runtimeValue: + constant: Got heads! + taskInfo: + name: print-and-return + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-Output: + parameterType: STRING + comp-condition-3: + dag: + outputs: + parameters: + pipelinechannel--print-and-return-2-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-2 + tasks: + print-and-return-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-2 + inputs: + parameters: + text: + runtimeValue: + constant: Got tails! + taskInfo: + name: print-and-return-2 + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-2-Output: + parameterType: STRING + comp-condition-4: + dag: + outputs: + parameters: + pipelinechannel--special-print-and-return-output_key: + valueFromParameter: + outputParameterKey: output_key + producerSubtask: special-print-and-return + tasks: + special-print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-special-print-and-return + inputs: + parameters: + text: + runtimeValue: + constant: Draw! + taskInfo: + name: special-print-and-return + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--special-print-and-return-output_key: + parameterType: STRING + comp-condition-branches-1: + dag: + outputs: + parameters: + pipelinechannel--condition-branches-1-oneof-1: + valueFromOneof: + parameterSelectors: + - outputParameterKey: pipelinechannel--print-and-return-Output + producerSubtask: condition-2 + - outputParameterKey: pipelinechannel--print-and-return-2-Output + producerSubtask: condition-3 + - outputParameterKey: pipelinechannel--special-print-and-return-output_key + producerSubtask: condition-4 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-2 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-three-sided-die-Output'] + == 'heads' + condition-3: + componentRef: + name: comp-condition-3 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-3 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails''' + condition-4: + componentRef: + name: comp-condition-4 + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + componentInputParameter: pipelinechannel--flip-three-sided-die-Output + taskInfo: + name: condition-4 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''heads'') && !(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] + == ''tails'')' + inputDefinitions: + parameters: + pipelinechannel--flip-three-sided-die-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--condition-branches-1-oneof-1: + parameterType: STRING + comp-flip-three-sided-die: + executorLabel: exec-flip-three-sided-die + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return: + executorLabel: exec-print-and-return + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-2: + executorLabel: exec-print-and-return-2 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-print-and-return-3: + executorLabel: exec-print-and-return-3 + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-roll-die-pipeline: + dag: + outputs: + parameters: + Output: + valueFromParameter: + outputParameterKey: pipelinechannel--condition-branches-1-oneof-1 + producerSubtask: condition-branches-1 + tasks: + condition-branches-1: + componentRef: + name: comp-condition-branches-1 + dependentTasks: + - flip-three-sided-die + inputs: + parameters: + pipelinechannel--flip-three-sided-die-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-three-sided-die + taskInfo: + name: condition-branches-1 + flip-three-sided-die: + cachingOptions: + enableCache: true + componentRef: + name: comp-flip-three-sided-die + taskInfo: + name: flip-three-sided-die + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-special-print-and-return: + executorLabel: exec-special-print-and-return + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + parameters: + output_key: + parameterType: STRING +deploymentSpec: + executors: + exec-flip-three-sided-die: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - flip_three_sided_die + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef flip_three_sided_die() -> str:\n import random\n val =\ + \ random.randint(0, 2)\n\n if val == 0:\n return 'heads'\n \ + \ elif val == 1:\n return 'tails'\n else:\n return 'draw'\n\ + \n" + image: python:3.7 + exec-print-and-return: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-print-and-return-3: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ + \ text\n\n" + image: python:3.7 + exec-special-print-and-return: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - special_print_and_return + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef special_print_and_return(text: str, output_key: dsl.OutputPath(str)):\n\ + \ print('Got the special state:', text)\n with open(output_key, 'w')\ + \ as f:\n f.write(text)\n\n" + image: python:3.7 +pipelineInfo: + name: outer-pipeline +root: + dag: + outputs: + parameters: + Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return + tasks: + print-and-return: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-3 + dependentTasks: + - roll-die-pipeline + inputs: + parameters: + text: + taskOutputParameter: + outputParameterKey: Output + producerTask: roll-die-pipeline + taskInfo: + name: print-and-return + roll-die-pipeline: + cachingOptions: + enableCache: true + componentRef: + name: comp-roll-die-pipeline + taskInfo: + name: roll-die-pipeline + outputDefinitions: + parameters: + Output: + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/if_else.yaml b/sdk/python/test_data/pipelines/if_else.yaml deleted file mode 100644 index bdd9a8d0cb..0000000000 --- a/sdk/python/test_data/pipelines/if_else.yaml +++ /dev/null @@ -1,214 +0,0 @@ -# PIPELINE DEFINITION -# Name: flip-coin-pipeline -components: - comp-condition-2: - dag: - tasks: - print-and-return: - cachingOptions: - enableCache: true - componentRef: - name: comp-print-and-return - inputs: - parameters: - text: - runtimeValue: - constant: Got heads! - taskInfo: - name: print-and-return - inputDefinitions: - parameters: - pipelinechannel--flip-coin-Output: - parameterType: STRING - comp-condition-3: - dag: - tasks: - print-and-return-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-print-and-return-2 - inputs: - parameters: - text: - runtimeValue: - constant: Got tails! - taskInfo: - name: print-and-return-2 - inputDefinitions: - parameters: - pipelinechannel--flip-coin-Output: - parameterType: STRING - comp-condition-branches-1: - dag: - tasks: - condition-2: - componentRef: - name: comp-condition-2 - inputs: - parameters: - pipelinechannel--flip-coin-Output: - componentInputParameter: pipelinechannel--flip-coin-Output - taskInfo: - name: condition-2 - triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] - == 'heads' - condition-3: - componentRef: - name: comp-condition-3 - inputs: - parameters: - pipelinechannel--flip-coin-Output: - componentInputParameter: pipelinechannel--flip-coin-Output - taskInfo: - name: condition-3 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] - == ''heads'')' - inputDefinitions: - parameters: - pipelinechannel--flip-coin-Output: - parameterType: STRING - comp-flip-coin: - executorLabel: exec-flip-coin - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-print-and-return: - executorLabel: exec-print-and-return - inputDefinitions: - parameters: - text: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING - comp-print-and-return-2: - executorLabel: exec-print-and-return-2 - inputDefinitions: - parameters: - text: - parameterType: STRING - outputDefinitions: - parameters: - Output: - parameterType: STRING -deploymentSpec: - executors: - exec-flip-coin: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - flip_coin - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ - \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ - $0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef flip_coin() -> str:\n import random\n return 'heads' if\ - \ random.randint(0, 1) == 0 else 'tails'\n\n" - image: python:3.7 - exec-print-and-return: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - print_and_return - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ - \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ - $0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ - \ text\n\n" - image: python:3.7 - exec-print-and-return-2: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - print_and_return - command: - - sh - - -c - - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ - \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ - \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ - $0\" \"$@\"\n" - - sh - - -ec - - 'program_path=$(mktemp -d) - - - printf "%s" "$0" > "$program_path/ephemeral_component.py" - - _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" - - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef print_and_return(text: str) -> str:\n print(text)\n return\ - \ text\n\n" - image: python:3.7 -pipelineInfo: - name: flip-coin-pipeline -root: - dag: - tasks: - condition-branches-1: - componentRef: - name: comp-condition-branches-1 - dependentTasks: - - flip-coin - inputs: - parameters: - pipelinechannel--flip-coin-Output: - taskOutputParameter: - outputParameterKey: Output - producerTask: flip-coin - taskInfo: - name: condition-branches-1 - flip-coin: - cachingOptions: - enableCache: true - componentRef: - name: comp-flip-coin - taskInfo: - name: flip-coin -schemaVersion: 2.1.0 -sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.py b/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.py new file mode 100644 index 0000000000..4dc549c173 --- /dev/null +++ b/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.py @@ -0,0 +1,60 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Input +from kfp.dsl import Output + + +@dsl.component +def flip_coin() -> str: + import random + return 'heads' if random.randint(0, 1) == 0 else 'tails' + + +@dsl.component +def param_to_artifact(val: str, a: Output[Artifact]): + with open(a.path, 'w') as f: + f.write(val) + + +@dsl.component +def print_artifact(a: Input[Artifact]): + with open(a.path) as f: + print(f.read()) + + +@dsl.pipeline +def flip_coin_pipeline() -> Artifact: + flip_coin_task = flip_coin() + with dsl.If(flip_coin_task.output == 'heads'): + t1 = param_to_artifact(val=flip_coin_task.output) + with dsl.Else(): + t2 = param_to_artifact(val=flip_coin_task.output) + oneof = dsl.OneOf(t1.outputs['a'], t2.outputs['a']) + print_artifact(a=oneof) + return oneof + + +@dsl.pipeline +def outer_pipeline(): + flip_coin_task = flip_coin_pipeline() + print_artifact(a=flip_coin_task.output) + + +if __name__ == '__main__': + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=outer_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.yaml b/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.yaml new file mode 100644 index 0000000000..89e2a659fa --- /dev/null +++ b/sdk/python/test_data/pipelines/if_else_with_oneof_artifacts.yaml @@ -0,0 +1,380 @@ +# PIPELINE DEFINITION +# Name: outer-pipeline +components: + comp-condition-2: + dag: + outputs: + artifacts: + pipelinechannel--param-to-artifact-a: + artifactSelectors: + - outputArtifactKey: a + producerSubtask: param-to-artifact + tasks: + param-to-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-param-to-artifact + inputs: + parameters: + val: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: param-to-artifact + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING + outputDefinitions: + artifacts: + pipelinechannel--param-to-artifact-a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-condition-3: + dag: + outputs: + artifacts: + pipelinechannel--param-to-artifact-2-a: + artifactSelectors: + - outputArtifactKey: a + producerSubtask: param-to-artifact-2 + tasks: + param-to-artifact-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-param-to-artifact-2 + inputs: + parameters: + val: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: param-to-artifact-2 + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING + outputDefinitions: + artifacts: + pipelinechannel--param-to-artifact-2-a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-condition-branches-1: + dag: + outputs: + artifacts: + pipelinechannel--condition-branches-1-oneof-1: + artifactSelectors: + - outputArtifactKey: pipelinechannel--param-to-artifact-a + producerSubtask: condition-2 + - outputArtifactKey: pipelinechannel--param-to-artifact-2-a + producerSubtask: condition-3 + tasks: + condition-2: + componentRef: + name: comp-condition-2 + inputs: + parameters: + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: condition-2 + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] + == 'heads' + condition-3: + componentRef: + name: comp-condition-3 + inputs: + parameters: + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output + taskInfo: + name: condition-3 + triggerPolicy: + condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] + == ''heads'')' + inputDefinitions: + parameters: + pipelinechannel--flip-coin-Output: + parameterType: STRING + outputDefinitions: + artifacts: + pipelinechannel--condition-branches-1-oneof-1: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-flip-coin: + executorLabel: exec-flip-coin + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-flip-coin-pipeline: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: pipelinechannel--condition-branches-1-oneof-1 + producerSubtask: condition-branches-1 + tasks: + condition-branches-1: + componentRef: + name: comp-condition-branches-1 + dependentTasks: + - flip-coin + inputs: + parameters: + pipelinechannel--flip-coin-Output: + taskOutputParameter: + outputParameterKey: Output + producerTask: flip-coin + taskInfo: + name: condition-branches-1 + flip-coin: + cachingOptions: + enableCache: true + componentRef: + name: comp-flip-coin + taskInfo: + name: flip-coin + print-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-artifact + dependentTasks: + - condition-branches-1 + inputs: + artifacts: + a: + taskOutputArtifact: + outputArtifactKey: pipelinechannel--condition-branches-1-oneof-1 + producerTask: condition-branches-1 + taskInfo: + name: print-artifact + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-param-to-artifact: + executorLabel: exec-param-to-artifact + inputDefinitions: + parameters: + val: + parameterType: STRING + outputDefinitions: + artifacts: + a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-param-to-artifact-2: + executorLabel: exec-param-to-artifact-2 + inputDefinitions: + parameters: + val: + parameterType: STRING + outputDefinitions: + artifacts: + a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-print-artifact: + executorLabel: exec-print-artifact + inputDefinitions: + artifacts: + a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-print-artifact-2: + executorLabel: exec-print-artifact-2 + inputDefinitions: + artifacts: + a: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-flip-coin: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - flip_coin + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef flip_coin() -> str:\n import random\n return 'heads' if\ + \ random.randint(0, 1) == 0 else 'tails'\n\n" + image: python:3.7 + exec-param-to-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - param_to_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef param_to_artifact(val: str, a: Output[Artifact]):\n with open(a.path,\ + \ 'w') as f:\n f.write(val)\n\n" + image: python:3.7 + exec-param-to-artifact-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - param_to_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef param_to_artifact(val: str, a: Output[Artifact]):\n with open(a.path,\ + \ 'w') as f:\n f.write(val)\n\n" + image: python:3.7 + exec-print-artifact: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_artifact(a: Input[Artifact]):\n with open(a.path) as\ + \ f:\n print(f.read())\n\n" + image: python:3.7 + exec-print-artifact-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - print_artifact + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef print_artifact(a: Input[Artifact]):\n with open(a.path) as\ + \ f:\n print(f.read())\n\n" + image: python:3.7 +pipelineInfo: + name: outer-pipeline +root: + dag: + tasks: + flip-coin-pipeline: + cachingOptions: + enableCache: true + componentRef: + name: comp-flip-coin-pipeline + taskInfo: + name: flip-coin-pipeline + print-artifact: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-artifact-2 + dependentTasks: + - flip-coin-pipeline + inputs: + artifacts: + a: + taskOutputArtifact: + outputArtifactKey: Output + producerTask: flip-coin-pipeline + taskInfo: + name: print-artifact +schemaVersion: 2.1.0 +sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/pipelines/if_else.py b/sdk/python/test_data/pipelines/if_else_with_oneof_parameters.py similarity index 79% rename from sdk/python/test_data/pipelines/if_else.py rename to sdk/python/test_data/pipelines/if_else_with_oneof_parameters.py index 1da8a074ac..05f7f93403 100644 --- a/sdk/python/test_data/pipelines/if_else.py +++ b/sdk/python/test_data/pipelines/if_else_with_oneof_parameters.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from kfp import compiler from kfp import dsl @@ -28,15 +27,19 @@ def print_and_return(text: str) -> str: @dsl.pipeline -def flip_coin_pipeline(): +def flip_coin_pipeline() -> str: flip_coin_task = flip_coin() with dsl.If(flip_coin_task.output == 'heads'): - print_and_return(text='Got heads!') + print_task_1 = print_and_return(text='Got heads!') with dsl.Else(): - print_and_return(text='Got tails!') + print_task_2 = print_and_return(text='Got tails!') + x = dsl.OneOf(print_task_1.output, print_task_2.output) + print_and_return(text=x) + return x if __name__ == '__main__': + from kfp import compiler compiler.Compiler().compile( pipeline_func=flip_coin_pipeline, package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/if_elif_else.yaml b/sdk/python/test_data/pipelines/if_else_with_oneof_parameters.yaml similarity index 72% rename from sdk/python/test_data/pipelines/if_elif_else.yaml rename to sdk/python/test_data/pipelines/if_else_with_oneof_parameters.yaml index 3887ce09a9..873288dd7e 100644 --- a/sdk/python/test_data/pipelines/if_elif_else.yaml +++ b/sdk/python/test_data/pipelines/if_else_with_oneof_parameters.yaml @@ -1,8 +1,16 @@ # PIPELINE DEFINITION -# Name: roll-die-pipeline +# Name: flip-coin-pipeline +# Outputs: +# Output: str components: comp-condition-2: dag: + outputs: + parameters: + pipelinechannel--print-and-return-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return tasks: print-and-return: cachingOptions: @@ -18,10 +26,20 @@ components: name: print-and-return inputDefinitions: parameters: - pipelinechannel--flip-three-sided-die-Output: + pipelinechannel--flip-coin-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--print-and-return-Output: parameterType: STRING comp-condition-3: dag: + outputs: + parameters: + pipelinechannel--print-and-return-2-Output: + valueFromParameter: + outputParameterKey: Output + producerSubtask: print-and-return-2 tasks: print-and-return-2: cachingOptions: @@ -37,74 +55,58 @@ components: name: print-and-return-2 inputDefinitions: parameters: - pipelinechannel--flip-three-sided-die-Output: + pipelinechannel--flip-coin-Output: parameterType: STRING - comp-condition-4: - dag: - tasks: - print-and-return-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-print-and-return-3 - inputs: - parameters: - text: - runtimeValue: - constant: Draw! - taskInfo: - name: print-and-return-3 - inputDefinitions: + outputDefinitions: parameters: - pipelinechannel--flip-three-sided-die-Output: + pipelinechannel--print-and-return-2-Output: parameterType: STRING comp-condition-branches-1: dag: + outputs: + parameters: + pipelinechannel--condition-branches-1-oneof-1: + valueFromOneof: + parameterSelectors: + - outputParameterKey: pipelinechannel--print-and-return-Output + producerSubtask: condition-2 + - outputParameterKey: pipelinechannel--print-and-return-2-Output + producerSubtask: condition-3 tasks: condition-2: componentRef: name: comp-condition-2 inputs: parameters: - pipelinechannel--flip-three-sided-die-Output: - componentInputParameter: pipelinechannel--flip-three-sided-die-Output + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output taskInfo: name: condition-2 triggerPolicy: - condition: inputs.parameter_values['pipelinechannel--flip-three-sided-die-Output'] + condition: inputs.parameter_values['pipelinechannel--flip-coin-Output'] == 'heads' condition-3: componentRef: name: comp-condition-3 inputs: parameters: - pipelinechannel--flip-three-sided-die-Output: - componentInputParameter: pipelinechannel--flip-three-sided-die-Output + pipelinechannel--flip-coin-Output: + componentInputParameter: pipelinechannel--flip-coin-Output taskInfo: name: condition-3 triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''heads'') && inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''tails''' - condition-4: - componentRef: - name: comp-condition-4 - inputs: - parameters: - pipelinechannel--flip-three-sided-die-Output: - componentInputParameter: pipelinechannel--flip-three-sided-die-Output - taskInfo: - name: condition-4 - triggerPolicy: - condition: '!(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''heads'') && !(inputs.parameter_values[''pipelinechannel--flip-three-sided-die-Output''] - == ''tails'')' + condition: '!(inputs.parameter_values[''pipelinechannel--flip-coin-Output''] + == ''heads'')' inputDefinitions: parameters: - pipelinechannel--flip-three-sided-die-Output: + pipelinechannel--flip-coin-Output: + parameterType: STRING + outputDefinitions: + parameters: + pipelinechannel--condition-branches-1-oneof-1: parameterType: STRING - comp-flip-three-sided-die: - executorLabel: exec-flip-three-sided-die + comp-flip-coin: + executorLabel: exec-flip-coin outputDefinitions: parameters: Output: @@ -141,13 +143,13 @@ components: parameterType: STRING deploymentSpec: executors: - exec-flip-three-sided-die: + exec-flip-coin: container: args: - --executor_input - '{{$}}' - --function_to_execute - - flip_three_sided_die + - flip_coin command: - sh - -c @@ -167,10 +169,8 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef flip_three_sided_die() -> str:\n import random\n val =\ - \ random.randint(0, 2)\n\n if val == 0:\n return 'heads'\n \ - \ elif val == 1:\n return 'tails'\n else:\n return 'draw'\n\ - \n" + \ *\n\ndef flip_coin() -> str:\n import random\n return 'heads' if\ + \ random.randint(0, 1) == 0 else 'tails'\n\n" image: python:3.7 exec-print-and-return: container: @@ -260,29 +260,54 @@ deploymentSpec: \ text\n\n" image: python:3.7 pipelineInfo: - name: roll-die-pipeline + name: flip-coin-pipeline root: dag: + outputs: + parameters: + Output: + valueFromParameter: + outputParameterKey: pipelinechannel--condition-branches-1-oneof-1 + producerSubtask: condition-branches-1 tasks: condition-branches-1: componentRef: name: comp-condition-branches-1 dependentTasks: - - flip-three-sided-die + - flip-coin inputs: parameters: - pipelinechannel--flip-three-sided-die-Output: + pipelinechannel--flip-coin-Output: taskOutputParameter: outputParameterKey: Output - producerTask: flip-three-sided-die + producerTask: flip-coin taskInfo: name: condition-branches-1 - flip-three-sided-die: + flip-coin: cachingOptions: enableCache: true componentRef: - name: comp-flip-three-sided-die + name: comp-flip-coin + taskInfo: + name: flip-coin + print-and-return-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-print-and-return-3 + dependentTasks: + - condition-branches-1 + inputs: + parameters: + text: + taskOutputParameter: + outputParameterKey: pipelinechannel--condition-branches-1-oneof-1 + producerTask: condition-branches-1 taskInfo: - name: flip-three-sided-die + name: print-and-return-3 + outputDefinitions: + parameters: + Output: + parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.3.0 diff --git a/sdk/python/test_data/test_data_config.yaml b/sdk/python/test_data/test_data_config.yaml index 42e12c7c79..d64d7a1aea 100644 --- a/sdk/python/test_data/test_data_config.yaml +++ b/sdk/python/test_data/test_data_config.yaml @@ -168,15 +168,18 @@ pipelines: - module: pipeline_with_metadata_fields name: dataset_concatenator execute: false - - module: if_else - name: flip_coin_pipeline - execute: false - - module: if_elif_else - name: roll_die_pipeline + - module: if_else_with_oneof_artifacts + name: outer_pipeline execute: false - module: if_elif_else_complex name: lucky_number_pipeline execute: false + - module: if_else_with_oneof_parameters + name: flip_coin_pipeline + execute: false + - module: if_elif_else_with_oneof_parameters + name: outer_pipeline + execute: false components: test_data_dir: sdk/python/test_data/components read: true From faba9223ee846d459f7bb497a6faa3c153dcf430 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 18 Oct 2023 09:24:47 -0700 Subject: [PATCH 220/253] feat(components): migrate `DataflowFlexTemplateJobOp` to GA namespace (now `v1.dataflow.DataflowFlexTemplateJobOp`) PiperOrigin-RevId: 574498920 --- components/google-cloud/RELEASE.md | 1 + .../preview/dataflow/__init__.py | 2 +- .../google_cloud_pipeline_components/v1/dataflow/__init__.py | 2 ++ .../{preview => v1}/dataflow/flex_template/__init__.py | 0 .../{preview => v1}/dataflow/flex_template/component.py | 0 5 files changed, 4 insertions(+), 1 deletion(-) rename components/google-cloud/google_cloud_pipeline_components/{preview => v1}/dataflow/flex_template/__init__.py (100%) rename components/google-cloud/google_cloud_pipeline_components/{preview => v1}/dataflow/flex_template/component.py (100%) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index d1a672a587..d33cf8a9e4 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -5,6 +5,7 @@ * Set display names for `preview.llm` pipelines. * Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline. * Support `service_account` in `ModelBatchPredictOp`. +* Release `DataflowFlexTemplateJobOp` to GA namespace (`v1.dataflow.DataflowFlexTemplateJobOp`). ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/__init__.py b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/__init__.py index 90f0263272..7a63f52c6c 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. """Dataflow preview components.""" -from google_cloud_pipeline_components.preview.dataflow.flex_template.component import dataflow_flex_template as DataflowFlexTemplateJobOp +from google_cloud_pipeline_components.v1.dataflow import DataflowFlexTemplateJobOp __all__ = [ 'DataflowFlexTemplateJobOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py index 9a57623884..110b890585 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/__init__.py @@ -15,8 +15,10 @@ """Create [Google Cloud Dataflow](https://cloud.google.com/dataflow) jobs from within Vertex AI Pipelines.""" # fmt: on +from google_cloud_pipeline_components.v1.dataflow.flex_template.component import dataflow_flex_template as DataflowFlexTemplateJobOp from google_cloud_pipeline_components.v1.dataflow.python_job.component import dataflow_python as DataflowPythonJobOp __all__ = [ 'DataflowPythonJobOp', + 'DataflowFlexTemplateJobOp', ] diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/flex_template/__init__.py similarity index 100% rename from components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/__init__.py rename to components/google-cloud/google_cloud_pipeline_components/v1/dataflow/flex_template/__init__.py diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py b/components/google-cloud/google_cloud_pipeline_components/v1/dataflow/flex_template/component.py similarity index 100% rename from components/google-cloud/google_cloud_pipeline_components/preview/dataflow/flex_template/component.py rename to components/google-cloud/google_cloud_pipeline_components/v1/dataflow/flex_template/component.py From 73d51c8a23afad97efb6d7e7436c081fa22ce24d Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 18 Oct 2023 13:37:56 -0700 Subject: [PATCH 221/253] fix(sdk): fix bug when `dsl.importer` argument is provided by loop variable (#10116) --- sdk/RELEASE.md | 1 + .../kfp/compiler/pipeline_spec_builder.py | 59 +++++++++---------- sdk/python/kfp/dsl/for_loop.py | 27 +++++---- sdk/python/kfp/dsl/importer_node_test.py | 36 ++++++++++- 4 files changed, 80 insertions(+), 43 deletions(-) diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 502f530072..f710382ccb 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -9,6 +9,7 @@ ## Bug fixes and other changes * Fix type on `dsl.ParallelFor` sub-DAG output when a `dsl.Collected` is used. Non-functional fix. [\#10069](https://github.com/kubeflow/pipelines/pull/10069) +* Fix bug when `dsl.importer` argument is provided by a `dsl.ParallelFor` loop variable. [\#10116](https://github.com/kubeflow/pipelines/pull/10116) ## Documentation updates diff --git a/sdk/python/kfp/compiler/pipeline_spec_builder.py b/sdk/python/kfp/compiler/pipeline_spec_builder.py index 1f972133c7..a7e5546c10 100644 --- a/sdk/python/kfp/compiler/pipeline_spec_builder.py +++ b/sdk/python/kfp/compiler/pipeline_spec_builder.py @@ -128,11 +128,35 @@ def build_task_spec_for_task( task._task_spec.retry_policy.to_proto()) for input_name, input_value in task.inputs.items(): + # since LoopArgument and LoopArgumentVariable are narrower types than PipelineParameterChannel, start with it + if isinstance(input_value, for_loop.LoopArgument): - if isinstance(input_value, - pipeline_channel.PipelineArtifactChannel) or ( - isinstance(input_value, for_loop.Collected) and - input_value.is_artifact_channel): + component_input_parameter = ( + compiler_utils.additional_input_name_for_pipeline_channel( + input_value)) + assert component_input_parameter in parent_component_inputs.parameters, \ + f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}' + pipeline_task_spec.inputs.parameters[ + input_name].component_input_parameter = ( + component_input_parameter) + + elif isinstance(input_value, for_loop.LoopArgumentVariable): + + component_input_parameter = ( + compiler_utils.additional_input_name_for_pipeline_channel( + input_value.loop_argument)) + assert component_input_parameter in parent_component_inputs.parameters, \ + f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}' + pipeline_task_spec.inputs.parameters[ + input_name].component_input_parameter = ( + component_input_parameter) + pipeline_task_spec.inputs.parameters[ + input_name].parameter_expression_selector = ( + f'parseJson(string_value)["{input_value.subvar_name}"]') + elif isinstance(input_value, + pipeline_channel.PipelineArtifactChannel) or ( + isinstance(input_value, for_loop.Collected) and + input_value.is_artifact_channel): if input_value.task_name: # Value is produced by an upstream task. @@ -200,31 +224,6 @@ def build_task_spec_for_task( input_name].component_input_parameter = ( component_input_parameter) - elif isinstance(input_value, for_loop.LoopArgument): - - component_input_parameter = ( - compiler_utils.additional_input_name_for_pipeline_channel( - input_value)) - assert component_input_parameter in parent_component_inputs.parameters, \ - f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}' - pipeline_task_spec.inputs.parameters[ - input_name].component_input_parameter = ( - component_input_parameter) - - elif isinstance(input_value, for_loop.LoopArgumentVariable): - - component_input_parameter = ( - compiler_utils.additional_input_name_for_pipeline_channel( - input_value.loop_argument)) - assert component_input_parameter in parent_component_inputs.parameters, \ - f'component_input_parameter: {component_input_parameter} not found. All inputs: {parent_component_inputs}' - pipeline_task_spec.inputs.parameters[ - input_name].component_input_parameter = ( - component_input_parameter) - pipeline_task_spec.inputs.parameters[ - input_name].parameter_expression_selector = ( - f'parseJson(string_value)["{input_value.subvar_name}"]') - elif isinstance(input_value, str): # Handle extra input due to string concat pipeline_channels = ( @@ -572,7 +571,7 @@ def build_importer_spec_for_task( importer_spec.metadata.CopyFrom(metadata_protobuf_struct) if isinstance(task.importer_spec.artifact_uri, - pipeline_channel.PipelineParameterChannel): + pipeline_channel.PipelineChannel): importer_spec.artifact_uri.runtime_parameter = 'uri' elif isinstance(task.importer_spec.artifact_uri, str): importer_spec.artifact_uri.constant.string_value = task.importer_spec.artifact_uri diff --git a/sdk/python/kfp/dsl/for_loop.py b/sdk/python/kfp/dsl/for_loop.py index e49c9a2951..14e0a42773 100644 --- a/sdk/python/kfp/dsl/for_loop.py +++ b/sdk/python/kfp/dsl/for_loop.py @@ -77,7 +77,7 @@ class LoopArgument(pipeline_channel.PipelineParameterChannel): Attributes: - items_or_pipeline_channel: The raw items or the PipelineChannel object + items_or_pipeline_channel: The raw items or the PipelineParameterChannel object this LoopArgument is associated to. """ LOOP_ITEM_NAME_BASE = 'loop-item' @@ -85,7 +85,7 @@ class LoopArgument(pipeline_channel.PipelineParameterChannel): def __init__( self, - items: Union[ItemList, pipeline_channel.PipelineChannel], + items: Union[ItemList, pipeline_channel.PipelineParameterChannel], name_code: Optional[str] = None, name_override: Optional[str] = None, **kwargs, @@ -99,8 +99,8 @@ def __init__( name_code: A unique code used to identify these loop arguments. Should match the code for the ParallelFor ops_group which created these LoopArguments. This prevents parameter name collisions. - name_override: The override name for PipelineChannel. - **kwargs: Any other keyword arguments passed down to PipelineChannel. + name_override: The override name for PipelineParameterChannel. + **kwargs: Any other keyword arguments passed down to PipelineParameterChannel. """ if (name_code is None) == (name_override is None): raise ValueError( @@ -112,17 +112,19 @@ def __init__( else: super().__init__(name=name_override, **kwargs) - if not isinstance(items, - (list, tuple, pipeline_channel.PipelineChannel)): + if not isinstance( + items, + (list, tuple, pipeline_channel.PipelineParameterChannel)): raise TypeError( - f'Expected list, tuple, or PipelineChannel, got {items}.') + f'Expected list, tuple, or PipelineParameterChannel, got {items}.' + ) if isinstance(items, tuple): items = list(items) self.items_or_pipeline_channel = items self.is_with_items_loop_argument = not isinstance( - items, pipeline_channel.PipelineChannel) + items, pipeline_channel.PipelineParameterChannel) self._referenced_subvars: Dict[str, LoopArgumentVariable] = {} if isinstance(items, list) and isinstance(items[0], dict): @@ -154,9 +156,10 @@ def _make_name(self, code: str): @classmethod def from_pipeline_channel( cls, - channel: pipeline_channel.PipelineChannel, + channel: pipeline_channel.PipelineParameterChannel, ) -> 'LoopArgument': - """Creates a LoopArgument object from a PipelineChannel object.""" + """Creates a LoopArgument object from a PipelineParameterChannel + object.""" return LoopArgument( items=channel, name_override=channel.name + '-' + cls.LOOP_ITEM_NAME_BASE, @@ -191,7 +194,7 @@ def name_is_loop_argument(cls, name: str) -> bool: or (cls.LOOP_ITEM_PARAM_NAME_BASE + '-') in name -class LoopArgumentVariable(pipeline_channel.PipelineChannel): +class LoopArgumentVariable(pipeline_channel.PipelineParameterChannel): """Represents a subvariable for a loop argument. This is used for cases where we're looping over maps, each of which contains @@ -246,7 +249,7 @@ def __init__( @property def items_or_pipeline_channel( - self) -> Union[ItemList, pipeline_channel.PipelineChannel]: + self) -> Union[ItemList, pipeline_channel.PipelineParameterChannel]: """Returns the loop argument items.""" return self.loop_argument.items_or_pipeline_chanenl diff --git a/sdk/python/kfp/dsl/importer_node_test.py b/sdk/python/kfp/dsl/importer_node_test.py index 0351382b10..4b3493bfc8 100644 --- a/sdk/python/kfp/dsl/importer_node_test.py +++ b/sdk/python/kfp/dsl/importer_node_test.py @@ -14,8 +14,8 @@ import unittest from kfp import dsl +from kfp.dsl import Dataset from kfp.dsl import importer_node -from kfp.dsl.types.artifact_types import Dataset class TestImporterSupportsDynamicMetadata(unittest.TestCase): @@ -184,3 +184,37 @@ def my_pipeline(integer: int = 1): "prefix2-{{$.inputs.parameters[\'metadata-2\']}}") self.assertEqual(metadata.struct_value.fields['key'].string_value, 'value') + + def test_uri_from_loop(self): + + @dsl.component + def make_args() -> list: + return [{'uri': 'gs://foo', 'key': 'foo'}] + + @dsl.pipeline + def my_pipeline(): + with dsl.ParallelFor(make_args().output) as data: + dsl.importer( + artifact_uri=data.uri, + artifact_class=Dataset, + metadata={'metadata_key': data.key}) + + self.assertEqual( + my_pipeline.pipeline_spec.deployment_spec['executors'] + ['exec-importer']['importer']['artifactUri']['runtimeParameter'], + 'uri') + self.assertEqual( + my_pipeline.pipeline_spec.deployment_spec['executors'] + ['exec-importer']['importer']['metadata']['metadata_key'], + "{{$.inputs.parameters[\'metadata\']}}") + self.assertEqual( + my_pipeline.pipeline_spec.components['comp-for-loop-1'].dag + .tasks['importer'].inputs.parameters['metadata'] + .component_input_parameter, + 'pipelinechannel--make-args-Output-loop-item') + self.assertEqual( + my_pipeline.pipeline_spec.components['comp-for-loop-1'].dag + .tasks['importer'].inputs.parameters['metadata'] + .parameter_expression_selector, + 'parseJson(string_value)["key"]', + ) From d8a0660df525f5695015e507e981bceff836dd3d Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Wed, 18 Oct 2023 16:54:25 -0700 Subject: [PATCH 222/253] feat(components): Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation feature attribution pipeline PiperOrigin-RevId: 574649081 --- components/google-cloud/RELEASE.md | 2 +- .../evaluation_feature_attribution_pipeline.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index d33cf8a9e4..38b84d88bf 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -3,7 +3,7 @@ * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. * Add `preview.automl.vision` and `DataConverterJobOp`. * Set display names for `preview.llm` pipelines. -* Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline. +* Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline and evaluation pipeline with feature attribution. * Support `service_account` in `ModelBatchPredictOp`. * Release `DataflowFlexTemplateJobOp` to GA namespace (`v1.dataflow.DataflowFlexTemplateJobOp`). diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py index 4e5a34b58b..27c34f43b2 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_feature_attribution_pipeline.py @@ -46,6 +46,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d batch_predict_explanation_data_sample_size: int = 10000, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic @@ -87,6 +88,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See [sample code](https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component) and more details on [configuring slices](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice). evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. @@ -213,6 +215,7 @@ def evaluation_feature_attribution_classification_pipeline( # pylint: disable=d encryption_spec_key_name=encryption_spec_key_name, force_runner_mode=force_runner_mode, model=get_model_task.outputs['model'], + slicing_specs=slicing_specs, ) # Import the evaluation result to Vertex AI. @@ -465,6 +468,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul batch_predict_explanation_data_sample_size: int = 10000, batch_predict_accelerator_type: str = '', batch_predict_accelerator_count: int = 0, + slicing_specs: List[Any] = [], # pylint: disable=g-bare-generic evaluation_prediction_label_column: str = '', evaluation_prediction_score_column: str = '', evaluation_class_labels: List[str] = [], # pylint: disable=g-bare-generic @@ -505,6 +509,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul batch_predict_explanation_data_sample_size: Desired size to downsample the input dataset that will then be used for batch explanation. batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set. + slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See [sample code](https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component) and more details on [configuring slices](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice). evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"]. @@ -541,6 +546,7 @@ def evaluation_feature_attribution_pipeline( # pylint: disable=dangerous-defaul batch_predict_explanation_data_sample_size=batch_predict_explanation_data_sample_size, batch_predict_accelerator_type=batch_predict_accelerator_type, batch_predict_accelerator_count=batch_predict_accelerator_count, + slicing_specs=slicing_specs, evaluation_prediction_label_column=evaluation_prediction_label_column, evaluation_prediction_score_column=evaluation_prediction_score_column, evaluation_class_labels=evaluation_class_labels, From e8fb6990dfdf036c941c522f9b384ff679b38ca6 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 19 Oct 2023 08:14:21 -0700 Subject: [PATCH 223/253] feat(components): Make `model_checkpoint` optional for `preview.llm.infer_pipeline` PiperOrigin-RevId: 574876480 --- components/google-cloud/RELEASE.md | 1 + .../preview/llm/infer/component.py | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index 38b84d88bf..fc40e16bf9 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -6,6 +6,7 @@ * Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline and evaluation pipeline with feature attribution. * Support `service_account` in `ModelBatchPredictOp`. * Release `DataflowFlexTemplateJobOp` to GA namespace (`v1.dataflow.DataflowFlexTemplateJobOp`). +* Make `model_checkpoint` optional for `preview.llm.infer_pipeline`. If not provided, the base model associated with the `large_model_reference` will be used. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py index cfa0f71556..429916e434 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/llm/infer/component.py @@ -33,8 +33,8 @@ ) def infer_pipeline( large_model_reference: str, - model_checkpoint: str, prompt_dataset: str, + model_checkpoint: Optional[str] = None, prompt_sequence_length: int = 512, target_sequence_length: int = 64, sampling_strategy: str = 'greedy', @@ -47,7 +47,7 @@ def infer_pipeline( Args: large_model_reference: Name of the base model. Supported values are `text-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001` and `t5-small` are supported in `us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`. - model_checkpoint: Cloud storage path to the model checkpoint. + model_checkpoint: Optional Cloud storage path to the model checkpoint. If not provided, the default checkpoint for the `large_model_reference` will be used. prompt_dataset: Cloud storage path to an unlabled prompt dataset used for reinforcement learning. The dataset format is jsonl. Each example in the dataset must have an `input_text` field that contains the prompt. prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512. target_sequence_length: Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64. @@ -66,7 +66,8 @@ def infer_pipeline( use_test_spec=env.get_use_test_machine_spec(), ).set_display_name('Resolve Machine Spec') reference_model_metadata = function_based.resolve_reference_model_metadata( - large_model_reference=large_model_reference + large_model_reference=large_model_reference, + reference_model_path=model_checkpoint, ).set_display_name('Resolve Model Metadata') prompt_dataset_image_uri = function_based.resolve_private_image_uri( @@ -98,7 +99,7 @@ def infer_pipeline( bulk_inference = bulk_inferrer.BulkInferrer( project=project, location=location, - input_model=model_checkpoint, + input_model=reference_model_metadata.outputs['reference_model_path'], input_dataset_path=prompt_dataset_importer.outputs['imported_data_path'], dataset_split=env.TRAIN_SPLIT, inputs_sequence_length=prompt_sequence_length, From 8d00d0eb9a1442ed994b6a90acea88604efc6423 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 19 Oct 2023 11:49:58 -0700 Subject: [PATCH 224/253] feat(sdk): support a Pythonic artifact authoring style (#9932) * chore(sdk): test observability, refactorings, and cleanup * implement unified i/o syntax * clarify container component error message * address review feedback * add dill to packages_to_install * update get_uri logic for KFP and tests * update execution tests & cleanup --- sdk/RELEASE.md | 1 + sdk/python/kfp/compiler/compiler_test.py | 411 +++++++++++++++++ sdk/python/kfp/dsl/__init__.py | 2 + sdk/python/kfp/dsl/component_factory.py | 367 +++++++-------- sdk/python/kfp/dsl/executor.py | 86 +++- sdk/python/kfp/dsl/executor_test.py | 422 +++++++++++++++++- sdk/python/kfp/dsl/types/artifact_types.py | 57 ++- .../kfp/dsl/types/artifact_types_test.py | 95 +++- sdk/python/kfp/dsl/types/type_annotations.py | 13 +- .../kfp/dsl/types/type_annotations_test.py | 28 +- .../pythonic_artifact_with_single_return.py | 58 +++ .../pythonic_artifact_with_single_return.yaml | 123 +++++ ...thonic_artifacts_with_list_of_artifacts.py | 52 +++ ...onic_artifacts_with_list_of_artifacts.yaml | 187 ++++++++ ...ythonic_artifacts_with_multiple_returns.py | 93 ++++ ...honic_artifacts_with_multiple_returns.yaml | 184 ++++++++ sdk/python/test_data/test_data_config.yaml | 9 + test/sdk-execution-tests/requirements.txt | 1 - .../sdk_execution_tests.py | 14 +- 19 files changed, 1964 insertions(+), 239 deletions(-) create mode 100644 sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.py create mode 100644 sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.yaml create mode 100644 sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.py create mode 100644 sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.yaml create mode 100644 sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.py create mode 100644 sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.yaml diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index f710382ccb..3e43e2633f 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -16,6 +16,7 @@ # 2.3.0 ## Features * Support `PipelineTaskFinalStatus` in tasks that use `.ignore_upstream_failure()` [\#10010](https://github.com/kubeflow/pipelines/pull/10010) +* Add support for a Pythonic artifact authoring style [\#9932](https://github.com/kubeflow/pipelines/pull/9932) ## Breaking changes diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index b5d7a5267d..b98d5624d6 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -33,6 +33,7 @@ from kfp.compiler import compiler_utils from kfp.dsl import Artifact from kfp.dsl import ContainerSpec +from kfp.dsl import Dataset from kfp.dsl import graph_component from kfp.dsl import Input from kfp.dsl import Model @@ -5279,5 +5280,415 @@ def roll_die_pipeline() -> str: return dsl.OneOf(t3, t4.output) +class TestPythonicArtifactAuthoring(unittest.TestCase): + # python component + def test_pythonic_input_artifact(self): + + @dsl.component + def pythonic_style(in_artifact: Artifact): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + 'system.Artifact', + ) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.parameters) + + @dsl.component + def standard_style(in_artifact: Input[Artifact]): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + ) + + def test_pythonic_input_artifact_optional(self): + + @dsl.component + def pythonic_style(in_artifact: Optional[Artifact] = None): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + 'system.Artifact', + ) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.parameters) + + @dsl.component + def standard_style(in_artifact: Optional[Input[Artifact]] = None): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + ) + + def test_pythonic_input_list_of_artifacts(self): + + @dsl.component + def pythonic_style(in_artifact: List[Artifact]): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + 'system.Artifact', + ) + self.assertTrue( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].is_artifact_list) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.parameters) + + @dsl.component + def standard_style(in_artifact: Input[List[Artifact]]): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + ) + + def test_pythonic_input_list_of_artifacts_optional(self): + + @dsl.component + def pythonic_style(in_artifact: Optional[List[Artifact]] = None): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + 'system.Artifact', + ) + self.assertTrue( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].is_artifact_list) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.parameters) + + @dsl.component + def standard_style(in_artifact: Optional[Input[List[Artifact]]] = None): + print(in_artifact) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .input_definitions.artifacts['in_artifact'].artifact_type + .schema_title, + ) + + def test_pythonic_output_artifact(self): + + @dsl.component + def pythonic_style() -> Artifact: + return Artifact(uri='gs://my_bucket/foo') + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['Output'].artifact_type.schema_title, + 'system.Artifact', + ) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.parameters) + + @dsl.component + def standard_style(named_output: Output[Artifact]): + return Artifact(uri='gs://my_bucket/foo') + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['Output'].artifact_type.schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .output_definitions.artifacts['named_output'].artifact_type + .schema_title, + ) + + def test_pythonic_output_artifact_multiple_returns(self): + + @dsl.component + def pythonic_style() -> NamedTuple('outputs', a=Artifact, d=Dataset): + a = Artifact(uri='gs://my_bucket/foo/artifact') + d = Artifact(uri='gs://my_bucket/foo/dataset') + outputs = NamedTuple('outputs', a=Artifact, d=Dataset) + return outputs(a=a, d=d) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['a'].artifact_type.schema_title, + 'system.Artifact', + ) + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['d'].artifact_type.schema_title, + 'system.Dataset', + ) + + self.assertFalse( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.parameters) + + @dsl.component + def standard_style(a: Output[Artifact], d: Output[Dataset]): + a.uri = 'gs://my_bucket/foo/artifact' + d.uri = 'gs://my_bucket/foo/dataset' + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['a'].artifact_type.schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .output_definitions.artifacts['a'].artifact_type.schema_title, + ) + + self.assertEqual( + pythonic_style.pipeline_spec.components['comp-pythonic-style'] + .output_definitions.artifacts['d'].artifact_type.schema_title, + standard_style.pipeline_spec.components['comp-standard-style'] + .output_definitions.artifacts['d'].artifact_type.schema_title, + ) + + def test_pythonic_output_list_artifacts(self): + + with self.assertRaisesRegex( + ValueError, + r"Output lists of artifacts are only supported for pipelines\. Got output list of artifacts for output parameter 'Output' of component 'pythonic-style'\." + ): + + @dsl.component + def pythonic_style() -> List[Artifact]: + pass + + def test_mixed_component_authoring_styles(self): + # can be permitted, since the expected behavior is unambiguous + + # in traditional; out pythonic + @dsl.component + def back_compat_style(in_artifact: Input[Artifact]) -> Artifact: + print(in_artifact) + return Artifact(uri='gs://my_bucket/foo') + + self.assertTrue(back_compat_style.pipeline_spec) + + # out traditional; in pythonic + @dsl.component + def mixed_style(in_artifact: Artifact, out_artifact: Output[Artifact]): + print(in_artifact) + out_artifact.uri = 'gs://my_bucket/foo' + + self.assertTrue(mixed_style.pipeline_spec) + + # pipeline + def test_pipeline_input_artifact(self): + + @dsl.component + def pythonic_style(in_artifact: Artifact): + print(in_artifact) + + @dsl.pipeline + def my_pipeline(in_artifact: Artifact): + pythonic_style(in_artifact=in_artifact) + + self.assertEqual( + my_pipeline.pipeline_spec.root.input_definitions + .artifacts['in_artifact'].artifact_type.schema_title, + 'system.Artifact', + ) + + self.assertFalse( + my_pipeline.pipeline_spec.root.input_definitions.parameters) + + def test_pipeline_input_artifact_optional(self): + + @dsl.component + def pythonic_style(in_artifact: Optional[Artifact] = None): + print(in_artifact) + + @dsl.pipeline + def my_pipeline(in_artifact: Optional[Artifact] = None): + pythonic_style(in_artifact=in_artifact) + + self.assertEqual( + my_pipeline.pipeline_spec.root.input_definitions + .artifacts['in_artifact'].artifact_type.schema_title, + 'system.Artifact', + ) + + self.assertFalse( + my_pipeline.pipeline_spec.root.input_definitions.parameters) + + def test_pipeline_input_list_of_artifacts(self): + + @dsl.component + def pythonic_style(in_artifact: List[Artifact]): + print(in_artifact) + + @dsl.pipeline + def my_pipeline(in_artifact: List[Artifact]): + pythonic_style(in_artifact=in_artifact) + + self.assertEqual( + my_pipeline.pipeline_spec.root.input_definitions + .artifacts['in_artifact'].artifact_type.schema_title, + 'system.Artifact', + ) + self.assertTrue(my_pipeline.pipeline_spec.root.input_definitions + .artifacts['in_artifact'].is_artifact_list) + + self.assertFalse( + my_pipeline.pipeline_spec.root.input_definitions.parameters) + + def test_pipeline_input_list_of_artifacts_optional(self): + + @dsl.component + def pythonic_style(in_artifact: Optional[List[Artifact]] = None): + print(in_artifact) + + @dsl.pipeline + def my_pipeline(in_artifact: Optional[List[Artifact]] = None): + pythonic_style(in_artifact=in_artifact) + + self.assertEqual( + my_pipeline.pipeline_spec.root.input_definitions + .artifacts['in_artifact'].artifact_type.schema_title, + 'system.Artifact', + ) + + self.assertFalse( + my_pipeline.pipeline_spec.root.input_definitions.parameters) + + def test_pipeline_output_artifact(self): + + @dsl.component + def pythonic_style() -> Artifact: + return Artifact(uri='gs://my_bucket/foo') + + @dsl.pipeline + def my_pipeline() -> Artifact: + return pythonic_style().output + + self.assertEqual( + my_pipeline.pipeline_spec.root.output_definitions + .artifacts['Output'].artifact_type.schema_title, 'system.Artifact') + + self.assertFalse( + my_pipeline.pipeline_spec.root.output_definitions.parameters) + + def test_pipeline_output_list_of_artifacts(self): + + @dsl.component + def noop() -> Artifact: + # write artifact + return Artifact(uri='gs://my_bucket/foo/bar') + + @dsl.pipeline + def my_pipeline() -> List[Artifact]: + with dsl.ParallelFor([1, 2, 3]): + t = noop() + + return dsl.Collected(t.output) + + self.assertEqual( + my_pipeline.pipeline_spec.root.output_definitions + .artifacts['Output'].artifact_type.schema_title, 'system.Artifact') + self.assertTrue(my_pipeline.pipeline_spec.root.output_definitions + .artifacts['Output'].is_artifact_list) + + self.assertFalse( + my_pipeline.pipeline_spec.root.output_definitions.parameters) + + # container + def test_container_input_artifact(self): + with self.assertRaisesRegex( + TypeError, + r"Container Components must wrap input and output artifact annotations with Input/Output type markers \(Input\[\] or Output\[\]\)\. Got function input 'in_artifact' with annotation \." + ): + + @dsl.container_component + def comp(in_artifact: Artifact): + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + def test_container_input_artifact_optional(self): + with self.assertRaisesRegex( + TypeError, + r"Container Components must wrap input and output artifact annotations with Input/Output type markers \(Input\[\] or Output\[\]\)\. Got function input 'in_artifact' with annotation \." + ): + + @dsl.container_component + def comp(in_artifact: Optional[Artifact] = None): + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + def test_container_input_list_of_artifacts(self): + with self.assertRaisesRegex( + TypeError, + r"Container Components must wrap input and output artifact annotations with Input/Output type markers \(Input\[\] or Output\[\]\)\. Got function input 'in_artifact' with annotation typing\.List\[kfp\.dsl\.types\.artifact_types\.Artifact\]\." + ): + + @dsl.container_component + def comp(in_artifact: List[Artifact]): + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + def test_container_input_list_of_artifacts_optional(self): + with self.assertRaisesRegex( + TypeError, + r"Container Components must wrap input and output artifact annotations with Input/Output type markers \(Input\[\] or Output\[\]\)\. Got function input 'in_artifact' with annotation typing\.List\[kfp\.dsl\.types\.artifact_types\.Artifact\]\." + ): + + @dsl.container_component + def comp(in_artifact: Optional[List[Artifact]] = None): + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + def test_container_output_artifact(self): + with self.assertRaisesRegex( + TypeError, + r'Return annotation should be either ContainerSpec or omitted for container components\.' + ): + + @dsl.container_component + def comp() -> Artifact: + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + def test_container_output_list_of_artifact(self): + with self.assertRaisesRegex( + TypeError, + r'Return annotation should be either ContainerSpec or omitted for container components\.' + ): + + @dsl.container_component + def comp() -> List[Artifact]: + return dsl.ContainerSpec(image='alpine', command=['pwd']) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index c2c70c847d..d2372156c7 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -29,6 +29,7 @@ 'Metrics', 'Model', 'SlicedClassificationMetrics', + 'get_uri', 'PIPELINE_JOB_NAME_PLACEHOLDER', 'PIPELINE_JOB_RESOURCE_NAME_PLACEHOLDER', 'PIPELINE_JOB_ID_PLACEHOLDER', @@ -44,6 +45,7 @@ from kfp.dsl.types.artifact_types import Artifact from kfp.dsl.types.artifact_types import ClassificationMetrics from kfp.dsl.types.artifact_types import Dataset +from kfp.dsl.types.artifact_types import get_uri from kfp.dsl.types.artifact_types import HTML from kfp.dsl.types.artifact_types import Markdown from kfp.dsl.types.artifact_types import Metrics diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index f34dd33fe0..29402dc131 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -17,7 +17,8 @@ import pathlib import re import textwrap -from typing import Callable, List, Mapping, Optional, Tuple, Type, Union +from typing import (Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, + Union) import warnings import docstring_parser @@ -192,7 +193,7 @@ def _get_function_source_definition(func: Callable) -> str: return '\n'.join(func_code_lines) -def _maybe_make_unique(name: str, names: List[str]): +def maybe_make_unique(name: str, names: List[str]): if name not in names: return name @@ -204,188 +205,187 @@ def _maybe_make_unique(name: str, names: List[str]): raise RuntimeError(f'Too many arguments with the name {name}') -def extract_component_interface( - func: Callable, +def get_name_to_specs( + signature: inspect.Signature, containerized: bool = False, - description: Optional[str] = None, - name: Optional[str] = None, -) -> structures.ComponentSpec: - - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - - original_docstring = inspect.getdoc(func) - parsed_docstring = docstring_parser.parse(original_docstring) - - inputs = {} - outputs = {} +) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Returns two dictionaries. - input_names = set() - output_names = set() - for parameter in parameters: - parameter_type = type_annotations.maybe_strip_optional_from_annotation( - parameter.annotation) - passing_style = None - io_name = parameter.name - is_artifact_list = False - - if type_annotations.is_Input_Output_artifact_annotation(parameter_type): - # passing_style is either type_annotations.InputAnnotation or - # type_annotations.OutputAnnotation. - passing_style = type_annotations.get_io_artifact_annotation( - parameter_type) - - # parameter_type is a type like typing_extensions.Annotated[kfp.dsl.types.artifact_types.Artifact, ] OR typing_extensions.Annotated[typing.List[kfp.dsl.types.artifact_types.Artifact], ] - - is_artifact_list = type_annotations.is_list_of_artifacts( - parameter_type.__origin__) - - parameter_type = type_annotations.get_io_artifact_class( - parameter_type) - if not type_annotations.is_artifact_class(parameter_type): - raise ValueError( - f'Input[T] and Output[T] are only supported when T is an artifact or list of artifacts. Found `{io_name} with type {parameter_type}`' + The first is a mapping of input name to input annotation. The second + is a mapping of output name to output annotation. + """ + func_params = list(signature.parameters.values()) + + name_to_input_specs = {} + name_to_output_specs = {} + + ### handle function parameter annotations ### + for func_param in func_params: + name = func_param.name + if name == SINGLE_OUTPUT_NAME: + raise ValueError( + f'"{SINGLE_OUTPUT_NAME}" is an invalid parameter name.') + # Stripping Optional from Optional[] is the only processing done + # on annotations in this flow. Other than that, we extract the raw + # annotation and process later. + annotation = type_annotations.maybe_strip_optional_from_annotation( + func_param.annotation) + + # no annotation + if annotation == inspect._empty: + raise TypeError(f'Missing type annotation for argument: {name}') + + # is Input[Artifact], Input[List[]], (e.g., str), or InputPath() + elif (type_annotations.is_artifact_wrapped_in_Input(annotation) or + isinstance( + annotation, + type_annotations.InputPath, + ) or type_utils.is_parameter_type(annotation)): + name_to_input_specs[maybe_make_unique( + name, list(name_to_input_specs))] = make_input_spec( + annotation, func_param) + # is Artifact annotation (e.g., Artifact, Dataset, etc.) + # or List[] + elif type_annotations.issubclass_of_artifact( + annotation) or type_annotations.is_list_of_artifacts( + annotation): + if containerized: + raise TypeError( + f"Container Components must wrap input and output artifact annotations with Input/Output type markers (Input[] or Output[]). Got function input '{name}' with annotation {annotation}." ) + name_to_input_specs[maybe_make_unique( + name, list(name_to_input_specs))] = make_input_spec( + annotation, func_param) + + # is Output[Artifact] or OutputPath() + elif type_annotations.is_artifact_wrapped_in_Output( + annotation) or isinstance(annotation, + type_annotations.OutputPath): + name_to_output_specs[maybe_make_unique( + name, + list(name_to_output_specs))] = make_output_spec(annotation) + + # parameter type + else: + type_string = type_utils._annotation_to_type_struct(annotation) + name_to_input_specs[maybe_make_unique( + name, list(name_to_input_specs))] = make_input_spec( + type_string, func_param) - if parameter.default is not inspect.Parameter.empty: - if passing_style in [ - type_annotations.OutputAnnotation, - type_annotations.OutputPath, - ]: - raise ValueError( - 'Default values for Output artifacts are not supported.' - ) - elif parameter.default is not None: - raise ValueError( - f'Optional Input artifacts may only have default value None. Got: {parameter.default}.' - ) - - elif isinstance( - parameter_type, - (type_annotations.InputPath, type_annotations.OutputPath)): - passing_style = type(parameter_type) - parameter_type = parameter_type.type - if parameter.default is not inspect.Parameter.empty and not ( - passing_style == type_annotations.InputPath and - parameter.default is None): - raise ValueError( - 'Path inputs only support default values of None. Default' - ' values for outputs are not supported.') - - type_struct = type_utils._annotation_to_type_struct(parameter_type) - if type_struct is None: - raise TypeError( - f'Missing type annotation for argument: {parameter.name}') + ### handle return annotations ### + return_ann = signature.return_annotation - if passing_style in [ - type_annotations.OutputAnnotation, type_annotations.OutputPath + # validate container component returns + if containerized: + if return_ann not in [ + inspect.Parameter.empty, + structures.ContainerSpec, ]: - if io_name == SINGLE_OUTPUT_NAME: - raise ValueError( - f'"{SINGLE_OUTPUT_NAME}" is an invalid parameter name.') - io_name = _maybe_make_unique(io_name, output_names) - output_names.add(io_name) - if type_annotations.is_artifact_class(parameter_type): - schema_version = parameter_type.schema_version - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - type_struct, schema_version), - is_artifact_list=is_artifact_list) - else: - output_spec = structures.OutputSpec(type=type_struct) - outputs[io_name] = output_spec - else: - io_name = _maybe_make_unique(io_name, input_names) - input_names.add(io_name) - type_ = type_utils.create_bundled_artifact_type( - type_struct, parameter_type.schema_version - ) if type_annotations.is_artifact_class( - parameter_type) else type_struct - default = None if parameter.default == inspect.Parameter.empty or type_annotations.is_artifact_class( - parameter_type) else parameter.default - optional = parameter.default is not inspect.Parameter.empty or type_utils.is_task_final_status_type( - type_struct) - input_spec = structures.InputSpec( - type=type_, - default=default, - optional=optional, - is_artifact_list=is_artifact_list, + raise TypeError( + 'Return annotation should be either ContainerSpec or omitted for container components.' ) + # ignore omitted returns + elif return_ann is None or return_ann == inspect.Parameter.empty: + pass + # is NamedTuple + elif hasattr(return_ann, '_fields'): + # Getting field type annotations. + # __annotations__ does not exist in python 3.5 and earlier + # _field_types does not exist in python 3.9 and later + field_annotations = getattr(return_ann, '__annotations__', + None) or getattr(return_ann, '_field_types') + for name in return_ann._fields: + annotation = field_annotations[name] + if not type_annotations.is_list_of_artifacts( + annotation) and not type_annotations.is_artifact_class( + annotation): + annotation = type_utils._annotation_to_type_struct(annotation) + name_to_output_specs[maybe_make_unique( + name, + list(name_to_output_specs))] = make_output_spec(annotation) + # is deprecated dict returns style + elif isinstance(return_ann, dict): + warnings.warn( + 'The ability to specify multiple outputs using the dict syntax' + ' has been deprecated. It will be removed soon after release' + ' 0.1.32. Please use typing.NamedTuple to declare multiple' + ' outputs.', DeprecationWarning) + for output_name, output_type_annotation in return_ann.items(): + output_type = type_utils._annotation_to_type_struct( + output_type_annotation) + name_to_output_specs[maybe_make_unique( + output_name, list(name_to_output_specs))] = output_type + # is the simple single return case (can be `-> ` or `-> Artifact`) + # treated the same way, since processing is done in inner functions + else: + name_to_output_specs[maybe_make_unique( + SINGLE_OUTPUT_NAME, + list(name_to_output_specs))] = make_output_spec(return_ann) + return name_to_input_specs, name_to_output_specs + + +def canonicalize_annotation(annotation: Any): + """Does cleaning on annotations that are common between input and output + annotations.""" + if type_annotations.is_Input_Output_artifact_annotation(annotation): + annotation = type_annotations.strip_Input_or_Output_marker(annotation) + if isinstance(annotation, + (type_annotations.InputPath, type_annotations.OutputPath)): + annotation = annotation.type + return annotation + + +def make_input_output_spec_args(annotation: Any) -> Dict[str, Any]: + """Gets a dict of kwargs shared between InputSpec and OutputSpec.""" + is_artifact_list = type_annotations.is_list_of_artifacts(annotation) + if is_artifact_list: + annotation = type_annotations.get_inner_type(annotation) + + if type_annotations.issubclass_of_artifact(annotation): + typ = type_utils.create_bundled_artifact_type(annotation.schema_title, + annotation.schema_version) + else: + typ = type_utils._annotation_to_type_struct(annotation) + return {'type': typ, 'is_artifact_list': is_artifact_list} - inputs[io_name] = input_spec - #Analyzing the return type annotations. - return_ann = signature.return_annotation - if not containerized: - if hasattr(return_ann, '_fields'): #NamedTuple - # Getting field type annotations. - # __annotations__ does not exist in python 3.5 and earlier - # _field_types does not exist in python 3.9 and later - field_annotations = getattr(return_ann, '__annotations__', - None) or getattr( - return_ann, '_field_types', None) - for field_name in return_ann._fields: - output_name = _maybe_make_unique(field_name, output_names) - output_names.add(output_name) - type_var = field_annotations.get(field_name) - if type_annotations.is_list_of_artifacts(type_var): - artifact_cls = type_var.__args__[0] - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - artifact_cls.schema_title, - artifact_cls.schema_version), - is_artifact_list=True) - elif type_annotations.is_artifact_class(type_var): - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - type_var.schema_title, type_var.schema_version)) - else: - type_struct = type_utils._annotation_to_type_struct( - type_var) - output_spec = structures.OutputSpec(type=type_struct) - outputs[output_name] = output_spec - # Deprecated dict-based way of declaring multiple outputs. Was only used by - # the @component decorator - elif isinstance(return_ann, dict): - warnings.warn( - 'The ability to specify multiple outputs using the dict syntax' - ' has been deprecated. It will be removed soon after release' - ' 0.1.32. Please use typing.NamedTuple to declare multiple' - ' outputs.') - for output_name, output_type_annotation in return_ann.items(): - output_type_struct = type_utils._annotation_to_type_struct( - output_type_annotation) - output_spec = structures.OutputSpec(type=output_type_struct) - outputs[name] = output_spec - elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty: - output_name = _maybe_make_unique(SINGLE_OUTPUT_NAME, output_names) - # Fixes exotic, but possible collision: - # `def func(output_path: OutputPath()) -> str: ...` - output_names.add(output_name) - return_ann = signature.return_annotation - if type_annotations.is_list_of_artifacts(return_ann): - artifact_cls = return_ann.__args__[0] - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - artifact_cls.schema_title, artifact_cls.schema_version), - is_artifact_list=True) - elif type_annotations.is_artifact_class(return_ann): - output_spec = structures.OutputSpec( - type=type_utils.create_bundled_artifact_type( - return_ann.schema_title, return_ann.schema_version), - is_artifact_list=False) - else: - type_struct = type_utils._annotation_to_type_struct(return_ann) - output_spec = structures.OutputSpec(type=type_struct) - - outputs[output_name] = output_spec - elif return_ann != inspect.Parameter.empty and return_ann != structures.ContainerSpec: - raise TypeError( - 'Return annotation should be either ContainerSpec or omitted for container components.' +def make_output_spec(annotation: Any) -> structures.OutputSpec: + annotation = canonicalize_annotation(annotation) + args = make_input_output_spec_args(annotation) + return structures.OutputSpec(**args) + + +def make_input_spec(annotation: Any, + inspect_param: inspect.Parameter) -> structures.InputSpec: + """Makes an InputSpec from a cleaned output annotation.""" + annotation = canonicalize_annotation(annotation) + input_output_spec_args = make_input_output_spec_args(annotation) + + if (type_annotations.issubclass_of_artifact(annotation) or + input_output_spec_args['is_artifact_list'] + ) and inspect_param.default not in {None, inspect._empty}: + raise ValueError( + f'Optional Input artifacts may only have default value None. Got: {inspect_param.default}.' ) - component_name = name or _python_function_name_to_component_name( - func.__name__) + default = None if inspect_param.default == inspect.Parameter.empty or type_annotations.issubclass_of_artifact( + annotation) else inspect_param.default + + optional = inspect_param.default is not inspect.Parameter.empty or type_utils.is_task_final_status_type( + getattr(inspect_param.annotation, '__name__', '')) + return structures.InputSpec( + **input_output_spec_args, + default=default, + optional=optional, + ) + + +def extract_component_interface( + func: Callable, + containerized: bool = False, + description: Optional[str] = None, + name: Optional[str] = None, +) -> structures.ComponentSpec: def assign_descriptions( inputs_or_outputs: Mapping[str, Union[structures.InputSpec, @@ -417,23 +417,32 @@ def parse_docstring_with_return_as_args( return None - assign_descriptions(inputs, parsed_docstring.params) + signature = inspect.signature(func) + name_to_input_spec, name_to_output_spec = get_name_to_specs( + signature, containerized) + original_docstring = inspect.getdoc(func) + parsed_docstring = docstring_parser.parse(original_docstring) + + assign_descriptions(name_to_input_spec, parsed_docstring.params) modified_parsed_docstring = parse_docstring_with_return_as_args( original_docstring) if modified_parsed_docstring is not None: - assign_descriptions(outputs, modified_parsed_docstring.params) + assign_descriptions(name_to_output_spec, + modified_parsed_docstring.params) description = get_pipeline_description( decorator_description=description, docstring=parsed_docstring, ) + component_name = name or _python_function_name_to_component_name( + func.__name__) return structures.ComponentSpec( name=component_name, description=description, - inputs=inputs or None, - outputs=outputs or None, + inputs=name_to_input_spec or None, + outputs=name_to_output_spec or None, implementation=structures.Implementation(), ) @@ -573,7 +582,7 @@ def make_input_for_parameterized_container_component_function( Type[artifact_types.Artifact]] ) -> Union[placeholders.Placeholder, container_component_artifact_channel .ContainerComponentArtifactChannel]: - if type_annotations.is_input_artifact(annotation): + if type_annotations.is_artifact_wrapped_in_Input(annotation): if type_annotations.is_list_of_artifacts(annotation.__origin__): return placeholders.InputListOfArtifactsPlaceholder(name) @@ -581,7 +590,7 @@ def make_input_for_parameterized_container_component_function( return container_component_artifact_channel.ContainerComponentArtifactChannel( io_type='input', var_name=name) - elif type_annotations.is_output_artifact(annotation): + elif type_annotations.is_artifact_wrapped_in_Output(annotation): if type_annotations.is_list_of_artifacts(annotation.__origin__): return placeholders.OutputListOfArtifactsPlaceholder(name) diff --git a/sdk/python/kfp/dsl/executor.py b/sdk/python/kfp/dsl/executor.py index 7429c0de2b..87d20e43c8 100644 --- a/sdk/python/kfp/dsl/executor.py +++ b/sdk/python/kfp/dsl/executor.py @@ -16,6 +16,7 @@ import os import re from typing import Any, Callable, Dict, List, Optional, Union +import warnings from kfp import dsl from kfp.dsl import task_final_status @@ -39,6 +40,12 @@ def __init__( self.func = function_to_execute self.executor_input = executor_input + self.executor_output_path = self.executor_input['outputs']['outputFile'] + + # drop executor_output.json part from the outputFile path + artifact_types.CONTAINER_TASK_ROOT = os.path.split( + self.executor_output_path)[0] + self.input_artifacts: Dict[str, Union[dsl.Artifact, List[dsl.Artifact]]] = {} self.output_artifacts: Dict[str, dsl.Artifact] = {} @@ -55,9 +62,14 @@ def assign_input_and_output_artifacts(self) -> None: if list_of_artifact_proto_structs: annotation = self.func.__annotations__[name] # InputPath has no attribute __origin__ and also should be handled as a single artifact - if type_annotations.is_Input_Output_artifact_annotation( - annotation) and type_annotations.is_list_of_artifacts( - annotation.__origin__): + annotation = type_annotations.maybe_strip_optional_from_annotation( + annotation) + is_list_of_artifacts = ( + type_annotations.is_Input_Output_artifact_annotation( + annotation) and + type_annotations.is_list_of_artifacts(annotation.__origin__) + ) or type_annotations.is_list_of_artifacts(annotation) + if is_list_of_artifacts: self.input_artifacts[name] = [ self.make_artifact( msg, @@ -129,7 +141,7 @@ def get_output_parameter_path(self, parameter_name: str) -> Optional[str]: path = parameter.get('outputFile', None) if path: - os.makedirs(os.path.dirname(path), exist_ok=True) + makedirs_recursively(path) return path def get_output_artifact_path(self, artifact_name: str) -> str: @@ -189,8 +201,29 @@ def handle_single_return_value(self, output_name: str, annotation_type: Any, f'Function `{self.func.__name__}` returned value of type {type(return_value)}; want type {origin_type}' ) self.write_output_parameter_value(output_name, return_value) + elif is_artifact(annotation_type): - self.write_output_artifact_payload(output_name, return_value) + if isinstance(return_value, artifact_types.Artifact): + # for -> Artifact annotations, where the user returns an artifact + artifact_name = self.executor_input['outputs']['artifacts'][ + output_name]['artifacts'][0]['name'] + # users should not override the name for Vertex Pipelines + # if empty string, replace + # else provide descriptive warning and prefer letting backend throw exception + running_on_vertex = 'VERTEX_AI_PIPELINES_RUN_LABELS' in os.environ + if running_on_vertex: + if return_value.name == '': + return_value.name = artifact_name + else: + # prefer letting the backend throw the runtime exception + warnings.warn( + f'If you are running your pipeline Vertex AI Pipelines, you should not provide a name for your artifact. It will be set to the Vertex artifact resource name {artifact_name} by default. Got value for name: {return_value.name}.', + RuntimeWarning, + stacklevel=2) + self.output_artifacts[output_name] = return_value + else: + # for -> Artifact annotations, where the user returns some data that the executor should serialize + self.write_output_artifact_payload(output_name, return_value) else: raise RuntimeError( f'Unknown return type: {annotation_type}. Must be one of the supported data types: https://www.kubeflow.org/docs/components/pipelines/v2/data-types/' @@ -209,18 +242,6 @@ def write_executor_output(self, Returns: Optional[str]: Returns the location of the executor_output file as a string if the file is written. Else, None. """ - if self.output_artifacts: - self.excutor_output['artifacts'] = {} - - for name, artifact in self.output_artifacts.items(): - runtime_artifact = { - 'name': artifact.name, - 'uri': artifact.uri, - 'metadata': artifact.metadata, - } - artifacts_list = {'artifacts': [runtime_artifact]} - - self.excutor_output['artifacts'][name] = artifacts_list if func_output is not None: if is_parameter(self.return_annotation) or is_artifact( @@ -248,6 +269,19 @@ def write_executor_output(self, f'Unknown return type: {self.return_annotation}. Must be one of `str`, `int`, `float`, a subclass of `Artifact`, or a NamedTuple collection of these types.' ) + if self.output_artifacts: + self.excutor_output['artifacts'] = {} + + for name, artifact in self.output_artifacts.items(): + runtime_artifact = { + 'name': artifact.name, + 'uri': artifact.uri, + 'metadata': artifact.metadata, + } + artifacts_list = {'artifacts': [runtime_artifact]} + + self.excutor_output['artifacts'][name] = artifacts_list + # This check is to ensure only one worker (in a mirrored, distributed training/compute strategy) attempts to write to the same executor output file at the same time using gcsfuse, which enforces immutability of files. write_file = True @@ -259,12 +293,10 @@ def write_executor_output(self, write_file = cluster_spec['task']['type'] in CHIEF_NODE_LABELS if write_file: - executor_output_path = self.executor_input['outputs']['outputFile'] - os.makedirs(os.path.dirname(executor_output_path), exist_ok=True) - with open(executor_output_path, 'w') as f: + makedirs_recursively(self.executor_output_path) + with open(self.executor_output_path, 'w') as f: f.write(json.dumps(self.excutor_output)) - return executor_output_path - + return self.executor_output_path return None def execute(self) -> Optional[str]: @@ -300,17 +332,23 @@ def execute(self) -> Optional[str]: error_message=value.get('error').get('message', None), ) + elif type_annotations.is_list_of_artifacts(v): + func_kwargs[k] = self.get_input_artifact(k) + elif is_parameter(v): value = self.get_input_parameter_value(k) if value is not None: func_kwargs[k] = value elif type_annotations.is_Input_Output_artifact_annotation(v): - if type_annotations.is_input_artifact(v): + if type_annotations.is_artifact_wrapped_in_Input(v): func_kwargs[k] = self.get_input_artifact(k) - if type_annotations.is_output_artifact(v): + if type_annotations.is_artifact_wrapped_in_Output(v): func_kwargs[k] = self.get_output_artifact(k) + elif is_artifact(v): + func_kwargs[k] = self.get_input_artifact(k) + elif isinstance(v, type_annotations.OutputPath): if is_parameter(v.type): func_kwargs[k] = self.get_output_parameter_path(k) diff --git a/sdk/python/kfp/dsl/executor_test.py b/sdk/python/kfp/dsl/executor_test.py index 4cc5969344..8b799d2c5a 100644 --- a/sdk/python/kfp/dsl/executor_test.py +++ b/sdk/python/kfp/dsl/executor_test.py @@ -13,6 +13,7 @@ # limitations under the License. """Tests for kfp.dsl.executor.""" +import contextlib import json import os import tempfile @@ -41,10 +42,21 @@ class ExecutorTest(parameterized.TestCase): def setUp(cls): cls.maxDiff = None cls._test_dir = tempfile.mkdtemp() + + cls.prev_gcs_prefix = artifact_types._GCS_LOCAL_MOUNT_PREFIX + cls.prev_minio_prefix = artifact_types._MINIO_LOCAL_MOUNT_PREFIX + cls.prev_s3_prefix = artifact_types._S3_LOCAL_MOUNT_PREFIX + artifact_types._GCS_LOCAL_MOUNT_PREFIX = cls._test_dir + '/' artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls._test_dir + '/minio/' artifact_types._S3_LOCAL_MOUNT_PREFIX = cls._test_dir + '/s3/' + @classmethod + def tearDown(cls): + artifact_types._GCS_LOCAL_MOUNT_PREFIX = cls.prev_gcs_prefix + artifact_types._MINIO_LOCAL_MOUNT_PREFIX = cls.prev_minio_prefix + artifact_types._S3_LOCAL_MOUNT_PREFIX = cls.prev_s3_prefix + def execute(self, func: Callable, executor_input: str) -> None: executor_input_dict = json.loads(executor_input % {'test_dir': self._test_dir}) @@ -52,12 +64,12 @@ def execute(self, func: Callable, executor_input: str) -> None: executor.Executor( executor_input=executor_input_dict, function_to_execute=func).execute() + return executor_input_dict['outputs']['outputFile'] def execute_and_load_output_metadata(self, func: Callable, executor_input: str) -> dict: - self.execute(func, executor_input) - with open(os.path.join(self._test_dir, 'output_metadata.json'), - 'r') as f: + output_file = self.execute(func, executor_input) + with open(output_file) as f: return json.loads(f.read()) def test_input_and_output_parameters(self): @@ -1153,6 +1165,76 @@ def test_func(input_artifact: Input[Artifact]): input_artifact.name, 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + + def test_single_artifact_input_pythonic(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_artifact" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_artifact: Artifact): + self.assertIsInstance(input_artifact, Artifact) + self.assertEqual( + input_artifact.name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' + ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + + def test_single_artifact_input_pythonic_with_optional(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_artifact": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_artifact" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_artifact: Optional[Artifact] = None): + self.assertIsInstance(input_artifact, Artifact) self.assertEqual( input_artifact.name, 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_artifact' @@ -1163,6 +1245,224 @@ def test_func(input_artifact: Input[Artifact]): self.assertDictEqual(output_metadata, {}) + def test_single_artifact_output_pythonic(self): + executor_input = """\ + { + "inputs": {}, + "outputs": { + "artifacts": { + "Output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func() -> Artifact: + return Artifact( + uri='gs://manually_specified_bucket/foo', + metadata={'data': 123}, + ) + + with temporary_envvar('VERTEX_AI_PIPELINES_RUN_LABELS', '12325'): + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'Output': { + 'artifacts': [{ + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://manually_specified_bucket/foo', + 'metadata': { + 'data': 123 + } + }] + } + }, + }) + + def test_single_artifact_output_pythonic_with_get_uri(self): + executor_input = """\ + { + "inputs": {}, + "outputs": { + "artifacts": { + "Output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/another_bucket/output_metadata.json" + } + } + """ + + def test_func() -> Artifact: + return Artifact( + uri=dsl.get_uri(suffix='my_artifact'), + metadata={'data': 123}, + ) + + with temporary_envvar('VERTEX_AI_PIPELINES_RUN_LABELS', '12325'): + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'Output': { + 'artifacts': [{ + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://another_bucket/my_artifact', + 'metadata': { + 'data': 123 + } + }] + } + }, + }) + + def test_multiple_artifact_output_pythonic_with_get_uri(self): + executor_input = """\ + { + "inputs": {}, + "outputs": { + "artifacts": { + "a": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + }, + "d": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/321", + "type": { + "schemaTitle": "system.Dataset" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/another_bucket/output_metadata.json" + } + } + """ + + def test_func() -> NamedTuple('outputs', a=Artifact, d=Dataset): + outputs = NamedTuple('outputs', a=Artifact, d=Dataset) + return outputs( + a=Artifact( + uri=dsl.get_uri(suffix='artifact'), + metadata={'data': 123}, + ), + d=Dataset( + uri=dsl.get_uri(suffix='dataset'), + metadata={}, + )) + + with temporary_envvar('VERTEX_AI_PIPELINES_RUN_LABELS', '12325'): + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual( + output_metadata, { + 'artifacts': { + 'a': { + 'artifacts': [{ + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/123', + 'uri': + 'gs://another_bucket/artifact', + 'metadata': { + 'data': 123 + } + }] + }, + 'd': { + 'artifacts': [{ + 'name': + 'projects/123/locations/us-central1/metadataStores/default/artifacts/321', + 'uri': + 'gs://another_bucket/dataset', + 'metadata': {} + }] + } + }, + }) + + def test_warns_if_artifact_name_for_vertex(self): + executor_input = """\ + { + "inputs": {}, + "outputs": { + "artifacts": { + "Output": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/123", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output" + } + ] + } + }, + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func() -> Artifact: + return Artifact( + name='illegal_custom_name', + uri='gs://manually_specified_bucket/foo', + metadata={'data': 123}, + ) + + with temporary_envvar('VERTEX_AI_PIPELINES_RUN_LABELS', '12325'): + with self.assertWarnsRegex( + RuntimeWarning, + r'If you are running your pipeline Vertex AI Pipelines, you should not provide a name for your artifact\. It will be set to the Vertex artifact resource name projects/123/locations/us-central1/metadataStores/default/artifacts/123 by default\. Got value for name: illegal_custom_name\.' + ): + self.execute_and_load_output_metadata(test_func, executor_input) + def test_list_of_artifacts_input(self): executor_input = """\ { @@ -1212,6 +1512,104 @@ def test_func(input_list: Input[List[Artifact]]): self.assertDictEqual(output_metadata, {}) + def test_list_of_artifacts_input_pythonic(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_list": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/0" + }, + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/1" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_list: List[Artifact]): + self.assertEqual(len(input_list), 2) + self.assertEqual( + input_list[0].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0' + ) + self.assertEqual( + input_list[1].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1' + ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + + def test_list_of_artifacts_input_pythonic_with_optional(self): + executor_input = """\ + { + "inputs": { + "artifacts": { + "input_list": { + "artifacts": [ + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/0" + }, + { + "metadata": {}, + "name": "projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1", + "type": { + "schemaTitle": "system.Artifact" + }, + "uri": "gs://some-bucket/output/input_list/1" + } + ] + } + } + }, + "outputs": { + "outputFile": "%(test_dir)s/output_metadata.json" + } + } + """ + + def test_func(input_list: List[Artifact] = None): + self.assertEqual(len(input_list), 2) + self.assertEqual( + input_list[0].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/0' + ) + self.assertEqual( + input_list[1].name, + 'projects/123/locations/us-central1/metadataStores/default/artifacts/input_list/1' + ) + + output_metadata = self.execute_and_load_output_metadata( + test_func, executor_input) + + self.assertDictEqual(output_metadata, {}) + class TestDictToArtifact(parameterized.TestCase): @@ -1329,5 +1727,23 @@ def test_dict_to_artifact_kfp_artifact( executor.create_artifact_instance(runtime_artifact), expected_type) +@contextlib.contextmanager +def temporary_envvar(key: str, value: str) -> None: + # Save the old value if it exists + old_value = os.environ.get(key, None) + + # Set the new value + os.environ[key] = value + + try: + yield + finally: + # Restore the old value or delete the key if it didn't exist before + if old_value is not None: + os.environ[key] = old_value + else: + del os.environ[key] + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/types/artifact_types.py b/sdk/python/kfp/dsl/types/artifact_types.py index 2c6999c2d8..f7a676573d 100644 --- a/sdk/python/kfp/dsl/types/artifact_types.py +++ b/sdk/python/kfp/dsl/types/artifact_types.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Classes for input/output Artifacts in KFP SDK.""" +"""Classes and utilities for using and creating artifacts in components.""" +import os from typing import Dict, List, Optional, Type +import warnings _GCS_LOCAL_MOUNT_PREFIX = '/gcs/' _MINIO_LOCAL_MOUNT_PREFIX = '/minio/' @@ -90,13 +92,17 @@ def _get_path(self) -> Optional[str]: return None def _set_path(self, path: str) -> None: - if path.startswith(_GCS_LOCAL_MOUNT_PREFIX): - path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):] - elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX): - path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):] - elif path.startswith(_S3_LOCAL_MOUNT_PREFIX): - path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):] - self.uri = path + self.uri = convert_local_path_to_remote_path(path) + + +def convert_local_path_to_remote_path(path: str) -> str: + if path.startswith(_GCS_LOCAL_MOUNT_PREFIX): + return 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):] + elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX): + return 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):] + elif path.startswith(_S3_LOCAL_MOUNT_PREFIX): + return 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):] + return path class Model(Artifact): @@ -470,3 +476,38 @@ class Markdown(Artifact): Markdown, ] } + +CONTAINER_TASK_ROOT: Optional[str] = None + + +# suffix default of 'Output' should be the same key as the default key for a +# single output component, but use value not variable for reference docs +def get_uri(suffix: str = 'Output') -> str: + """Gets the task root URI, a unique object storage URI associated with the + current task. This function may only be called at task runtime. + + Returns an empty string if the task root cannot be inferred from the runtime environment. + + Args: + suffix: A suffix to append to the URI. This is a helpful for creating unique subdirectories when the component has multiple outputs. + + Returns: + The URI or empty string. + """ + if CONTAINER_TASK_ROOT is None: + raise RuntimeError( + f"'dsl.{get_uri.__name__}' can only be called at task runtime. The task root is unknown in the current environment." + ) + UNSUPPORTED_KFP_PATH = '/tmp/kfp_outputs' + if CONTAINER_TASK_ROOT == UNSUPPORTED_KFP_PATH: + warnings.warn( + f'dsl.{get_uri.__name__} is not yet supported by the KFP backend. Please specify a URI explicitly.', + RuntimeWarning, + stacklevel=2, + ) + # return empty string, not None, to conform with logic in artifact + # constructor which immediately converts uri=None to uri='' + # this way the .path property can worry about handling fewer input types + return '' + remote_task_root = convert_local_path_to_remote_path(CONTAINER_TASK_ROOT) + return os.path.join(remote_task_root, suffix) diff --git a/sdk/python/kfp/dsl/types/artifact_types_test.py b/sdk/python/kfp/dsl/types/artifact_types_test.py index 917ad95a45..c34f4a6bba 100644 --- a/sdk/python/kfp/dsl/types/artifact_types_test.py +++ b/sdk/python/kfp/dsl/types/artifact_types_test.py @@ -13,18 +13,20 @@ # limitations under the License. """Tests for kfp.components.types.artifact_types.""" +import contextlib import json import os import unittest from absl.testing import parameterized +from kfp import dsl from kfp.dsl.types import artifact_types -class ArtifactsTest(parameterized.TestCase): +class ArtifactsTest(unittest.TestCase): def test_complex_metrics(self): - metrics = artifact_types.ClassificationMetrics() + metrics = dsl.ClassificationMetrics() metrics.log_roc_data_point(threshold=0.1, tpr=98.2, fpr=96.2) metrics.log_roc_data_point(threshold=24.3, tpr=24.5, fpr=98.4) metrics.set_confusion_matrix_categories(['dog', 'cat', 'horses']) @@ -41,7 +43,7 @@ def test_complex_metrics(self): self.assertEqual(expected_json, metrics.metadata) def test_complex_metrics_bulk_loading(self): - metrics = artifact_types.ClassificationMetrics() + metrics = dsl.ClassificationMetrics() metrics.log_roc_curve( fpr=[85.1, 85.1, 85.1], tpr=[52.6, 52.6, 52.6], @@ -57,5 +59,92 @@ def test_complex_metrics_bulk_loading(self): self.assertEqual(expected_json, metrics.metadata) +@contextlib.contextmanager +def set_temporary_task_root(task_root: str): + artifact_types.CONTAINER_TASK_ROOT = task_root + try: + yield + finally: + artifact_types.CONTAINER_TASK_ROOT = None + + +class TestGetUri(unittest.TestCase): + + def test_raise_if_no_env_var(self): + + with self.assertRaisesRegex( + RuntimeError, + r"'dsl\.get_uri' can only be called at task runtime\. The task root is unknown in the current environment\." + ): + dsl.get_uri() + + def test_default_gcs(self): + with set_temporary_task_root( + '/gcs/my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789' + ): + self.assertEqual( + 'gs://my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789/Output', + dsl.get_uri()) + + def test_default_s3(self): + with set_temporary_task_root( + '/s3/my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789' + ): + self.assertEqual( + 's3://my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789/Output', + dsl.get_uri()) + + def test_default_minio(self): + with set_temporary_task_root( + '/minio/my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789' + ): + self.assertEqual( + 'minio://my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789/Output', + dsl.get_uri()) + + def test_suffix_arg_gcs(self): + with set_temporary_task_root( + '/gcs/my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789' + ): + self.assertEqual( + 'gs://my_bucket/123456789/abc-09-14-2023-14-21-53/foo_123456789/model', + dsl.get_uri('model')) + + def test_suffix_arg_tmp_no_suffix(self): + with set_temporary_task_root('/tmp/kfp_outputs'): + with self.assertWarnsRegex( + RuntimeWarning, + r'dsl\.get_uri is not yet supported by the KFP backend\. Please specify a URI explicitly\.' + ): + actual = dsl.get_uri('model') + self.assertEqual('', actual) + + def test_suffix_arg_tmp_with_suffix(self): + with set_temporary_task_root('/tmp/kfp_outputs'): + with self.assertWarnsRegex( + RuntimeWarning, + r'dsl\.get_uri is not yet supported by the KFP backend\. Please specify a URI explicitly\.' + ): + actual = dsl.get_uri('model') + self.assertEqual('', actual) + + +class TestConvertLocalPathToRemotePath(parameterized.TestCase): + + @parameterized.parameters([{ + 'local_path': local_path, + 'expected': expected + } for local_path, expected in [ + ('/gcs/foo/bar', 'gs://foo/bar'), + ('/minio/foo/bar', 'minio://foo/bar'), + ('/s3/foo/bar', 's3://foo/bar'), + ('/tmp/kfp_outputs', '/tmp/kfp_outputs'), + ('/some/random/path', '/some/random/path'), + ]]) + def test_gcs(self, local_path, expected): + actual = artifact_types.convert_local_path_to_remote_path(local_path) + self.assertEqual(actual, expected) + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/dsl/types/type_annotations.py b/sdk/python/kfp/dsl/types/type_annotations.py index 1d9e2f2b0e..cd6adb89d8 100644 --- a/sdk/python/kfp/dsl/types/type_annotations.py +++ b/sdk/python/kfp/dsl/types/type_annotations.py @@ -135,7 +135,7 @@ def is_Input_Output_artifact_annotation(typ) -> bool: return True -def is_input_artifact(typ) -> bool: +def is_artifact_wrapped_in_Input(typ: Any) -> bool: """Returns True if typ is of type Input[T].""" if not is_Input_Output_artifact_annotation(typ): return False @@ -143,7 +143,7 @@ def is_input_artifact(typ) -> bool: return typ.__metadata__[0] == InputAnnotation -def is_output_artifact(typ) -> bool: +def is_artifact_wrapped_in_Output(typ: Any) -> bool: """Returns True if typ is of type Output[T].""" if not is_Input_Output_artifact_annotation(typ): return False @@ -160,14 +160,19 @@ def get_io_artifact_class(typ): return None # extract inner type from list of artifacts - inner = typ.__args__[0] + inner = strip_Input_or_Output_marker(typ) if hasattr(inner, '__origin__') and inner.__origin__ == list: return inner.__args__[0] return inner -def get_io_artifact_annotation(typ): +def strip_Input_or_Output_marker(typ: Any) -> artifact_types.Artifact: + return typ.__args__[0] + + +def get_input_or_output_marker( + typ) -> Optional[Union[InputAnnotation, OutputAnnotation]]: if not is_Input_Output_artifact_annotation(typ): return None diff --git a/sdk/python/kfp/dsl/types/type_annotations_test.py b/sdk/python/kfp/dsl/types/type_annotations_test.py index b57e254082..df34682ce0 100644 --- a/sdk/python/kfp/dsl/types/type_annotations_test.py +++ b/sdk/python/kfp/dsl/types/type_annotations_test.py @@ -58,21 +58,24 @@ def test_is_not_artifact_annotation(self, annotation): Input, ]) def test_is_input_artifact(self, annotation): - self.assertTrue(type_annotations.is_input_artifact(annotation)) + self.assertTrue( + type_annotations.is_artifact_wrapped_in_Input(annotation)) @parameterized.parameters([ Output[Model], Output, ]) def test_is_not_input_artifact(self, annotation): - self.assertFalse(type_annotations.is_input_artifact(annotation)) + self.assertFalse( + type_annotations.is_artifact_wrapped_in_Input(annotation)) @parameterized.parameters([ Output[Model], Output[List[Model]], ]) def test_is_output_artifact(self, annotation): - self.assertTrue(type_annotations.is_output_artifact(annotation)) + self.assertTrue( + type_annotations.is_artifact_wrapped_in_Output(annotation)) @parameterized.parameters([ Input[Model], @@ -80,7 +83,8 @@ def test_is_output_artifact(self, annotation): Input, ]) def test_is_not_output_artifact(self, annotation): - self.assertFalse(type_annotations.is_output_artifact(annotation)) + self.assertFalse( + type_annotations.is_artifact_wrapped_in_Output(annotation)) def test_get_io_artifact_class(self): self.assertEqual( @@ -97,26 +101,26 @@ def test_get_io_artifact_class(self): def test_get_io_artifact_annotation(self): self.assertEqual( - type_annotations.get_io_artifact_annotation(Output[Model]), + type_annotations.get_input_or_output_marker(Output[Model]), OutputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Output[List[Model]]), + type_annotations.get_input_or_output_marker(Output[List[Model]]), OutputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Input[Model]), + type_annotations.get_input_or_output_marker(Input[Model]), InputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Input[List[Model]]), + type_annotations.get_input_or_output_marker(Input[List[Model]]), InputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Input), InputAnnotation) + type_annotations.get_input_or_output_marker(Input), InputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Output), + type_annotations.get_input_or_output_marker(Output), OutputAnnotation) self.assertEqual( - type_annotations.get_io_artifact_annotation(Model), None) - self.assertEqual(type_annotations.get_io_artifact_annotation(str), None) + type_annotations.get_input_or_output_marker(Model), None) + self.assertEqual(type_annotations.get_input_or_output_marker(str), None) @parameterized.parameters( { diff --git a/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.py b/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.py new file mode 100644 index 0000000000..31353e852b --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.py @@ -0,0 +1,58 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from kfp import dsl +from kfp.dsl import Dataset +from kfp.dsl import Model + + +@dsl.component(packages_to_install=['dill==0.3.7']) +def make_language_model(text_dataset: Dataset) -> Model: + # dill allows pickling objects belonging to a function's local namespace + import dill + + with open(text_dataset.path) as f: + text = f.read() + + # insert train on text here # + + def dummy_model(x: str) -> str: + return x + + model = Model( + uri=dsl.get_uri(suffix='model'), + metadata={'data': text_dataset.name}, + ) + + with open(model.path, 'wb') as f: + dill.dump(dummy_model, f) + + return model + + +@dsl.pipeline +def make_language_model_pipeline() -> Model: + importer = dsl.importer( + artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt', + artifact_class=Dataset, + reimport=False, + metadata={'key': 'value'}) + return make_language_model(text_dataset=importer.output).output + + +if __name__ == '__main__': + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=make_language_model_pipeline, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.yaml b/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.yaml new file mode 100644 index 0000000000..d010d50a6b --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifact_with_single_return.yaml @@ -0,0 +1,123 @@ +# PIPELINE DEFINITION +# Name: make-language-model-pipeline +# Outputs: +# Output: system.Model +components: + comp-importer: + executorLabel: exec-importer + inputDefinitions: + parameters: + uri: + parameterType: STRING + outputDefinitions: + artifacts: + artifact: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-make-language-model: + executorLabel: exec-make-language-model + inputDefinitions: + artifacts: + text_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-importer: + importer: + artifactUri: + constant: gs://ml-pipeline-playground/shakespeare1.txt + metadata: + key: value + typeSchema: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + exec-make-language-model: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - make_language_model + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.2.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'dill==0.3.7'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef make_language_model(text_dataset: Dataset) -> Model:\n # dill\ + \ allows pickling objects belonging to a function's local namespace\n \ + \ import dill\n\n with open(text_dataset.path) as f:\n text =\ + \ f.read()\n\n # insert train on text here #\n\n def dummy_model(x:\ + \ str) -> str:\n return x\n\n model = Model(\n uri=dsl.get_uri(suffix='model'),\n\ + \ metadata={'data': text_dataset.name},\n )\n\n with open(model.path,\ + \ 'wb') as f:\n dill.dump(dummy_model, f)\n\n return model\n\n" + image: python:3.7 +pipelineInfo: + name: make-language-model-pipeline +root: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: Output + producerSubtask: make-language-model + tasks: + importer: + cachingOptions: + enableCache: true + componentRef: + name: comp-importer + inputs: + parameters: + uri: + runtimeValue: + constant: gs://ml-pipeline-playground/shakespeare1.txt + taskInfo: + name: importer + make-language-model: + cachingOptions: + enableCache: true + componentRef: + name: comp-make-language-model + dependentTasks: + - importer + inputs: + artifacts: + text_dataset: + taskOutputArtifact: + outputArtifactKey: artifact + producerTask: importer + taskInfo: + name: make-language-model + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.2.0 diff --git a/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.py b/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.py new file mode 100644 index 0000000000..899bc483df --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.py @@ -0,0 +1,52 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +from kfp import dsl +from kfp.dsl import Dataset + + +@dsl.component +def make_dataset(text: str) -> Dataset: + dataset = Dataset(uri=dsl.get_uri(), metadata={'length': len(text)}) + with open(dataset.path, 'w') as f: + f.write(text) + return dataset + + +@dsl.component +def join_datasets(datasets: List[Dataset]) -> Dataset: + texts = [] + for dataset in datasets: + with open(dataset.path, 'r') as f: + texts.append(f.read()) + + return ''.join(texts) + + +@dsl.pipeline +def make_and_join_datasets( + texts: List[str] = ['Hello', ',', ' ', 'world!']) -> Dataset: + with dsl.ParallelFor(texts) as text: + t1 = make_dataset(text=text) + + return join_datasets(datasets=dsl.Collected(t1.output)).output + + +if __name__ == '__main__': + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=make_and_join_datasets, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.yaml b/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.yaml new file mode 100644 index 0000000000..53b5fb17f8 --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifacts_with_list_of_artifacts.yaml @@ -0,0 +1,187 @@ +# PIPELINE DEFINITION +# Name: make-and-join-datasets +# Inputs: +# texts: list [Default: ['Hello', ',', ' ', 'world!']] +# Outputs: +# Output: system.Dataset +components: + comp-for-loop-1: + dag: + outputs: + artifacts: + pipelinechannel--make-dataset-Output: + artifactSelectors: + - outputArtifactKey: Output + producerSubtask: make-dataset + tasks: + make-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-make-dataset + inputs: + parameters: + text: + componentInputParameter: pipelinechannel--texts-loop-item + taskInfo: + name: make-dataset + inputDefinitions: + parameters: + pipelinechannel--texts: + parameterType: LIST + pipelinechannel--texts-loop-item: + parameterType: STRING + outputDefinitions: + artifacts: + pipelinechannel--make-dataset-Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isArtifactList: true + comp-join-datasets: + executorLabel: exec-join-datasets + inputDefinitions: + artifacts: + datasets: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + isArtifactList: true + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-make-dataset: + executorLabel: exec-make-dataset + inputDefinitions: + parameters: + text: + parameterType: STRING + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-join-datasets: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - join_datasets + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.2.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef join_datasets(datasets: List[Dataset]) -> Dataset:\n texts\ + \ = []\n for dataset in datasets:\n with open(dataset.path, 'r')\ + \ as f:\n texts.append(f.read())\n\n return ''.join(texts)\n\ + \n" + image: python:3.7 + exec-make-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - make_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.2.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef make_dataset(text: str) -> Dataset:\n dataset = Dataset(uri=dsl.get_uri(),\ + \ metadata={'length': len(text)})\n with open(dataset.path, 'w') as f:\n\ + \ f.write(text)\n return dataset\n\n" + image: python:3.7 +pipelineInfo: + name: make-and-join-datasets +root: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: Output + producerSubtask: join-datasets + tasks: + for-loop-1: + componentRef: + name: comp-for-loop-1 + inputs: + parameters: + pipelinechannel--texts: + componentInputParameter: texts + parameterIterator: + itemInput: pipelinechannel--texts-loop-item + items: + inputParameter: pipelinechannel--texts + taskInfo: + name: for-loop-1 + join-datasets: + cachingOptions: + enableCache: true + componentRef: + name: comp-join-datasets + dependentTasks: + - for-loop-1 + inputs: + artifacts: + datasets: + taskOutputArtifact: + outputArtifactKey: pipelinechannel--make-dataset-Output + producerTask: for-loop-1 + taskInfo: + name: join-datasets + inputDefinitions: + parameters: + texts: + defaultValue: + - Hello + - ',' + - ' ' + - world! + isOptional: true + parameterType: LIST + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.2.0 diff --git a/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.py b/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.py new file mode 100644 index 0000000000..845ebb7472 --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.py @@ -0,0 +1,93 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import NamedTuple + +from kfp import dsl +from kfp.dsl import Artifact +from kfp.dsl import Dataset + + +@dsl.component +def dataset_splitter( + in_dataset: Dataset +) -> NamedTuple( + 'outputs', + dataset1=Dataset, + dataset2=Dataset, +): + + with open(in_dataset.path) as f: + in_data = f.read() + + out_data1, out_data2 = in_data[:len(in_data) // 2], in_data[len(in_data) // + 2:] + + dataset1 = Dataset( + uri=dsl.get_uri(suffix='dataset1'), + metadata={'original_data': in_dataset.name}, + ) + with open(dataset1.path, 'w') as f: + f.write(out_data1) + + dataset2 = Dataset( + uri=dsl.get_uri(suffix='dataset2'), + metadata={'original_data': in_dataset.name}, + ) + with open(dataset2.path, 'w') as f: + f.write(out_data2) + + outputs = NamedTuple( + 'outputs', + dataset1=Dataset, + dataset2=Dataset, + ) + return outputs(dataset1=dataset1, dataset2=dataset2) + + +outputs = NamedTuple( + 'outputs', + dataset1=Dataset, + dataset2=Dataset, +) + + +@dsl.pipeline +def splitter_pipeline(in_dataset: Dataset) -> outputs: + task = dataset_splitter(in_dataset=in_dataset) + return outputs( + task.outputs['dataset1'], + task.outputs['dataset1'], + ) + + +@dsl.component +def make_dataset() -> Artifact: + artifact = Artifact(uri=dsl.get_uri('dataset')) + with open(artifact.path, 'w') as f: + f.write('Hello, world') + return artifact + + +@dsl.pipeline +def split_datasets_and_return_first() -> Dataset: + t1 = make_dataset() + return splitter_pipeline(in_dataset=t1.output).outputs['dataset1'] + + +if __name__ == '__main__': + from kfp import compiler + compiler.Compiler().compile( + pipeline_func=split_datasets_and_return_first, + package_path=__file__.replace('.py', '.yaml')) diff --git a/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.yaml b/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.yaml new file mode 100644 index 0000000000..2f655b097c --- /dev/null +++ b/sdk/python/test_data/pipelines/pythonic_artifacts_with_multiple_returns.yaml @@ -0,0 +1,184 @@ +# PIPELINE DEFINITION +# Name: split-datasets-and-return-first +# Outputs: +# Output: system.Dataset +components: + comp-dataset-splitter: + executorLabel: exec-dataset-splitter + inputDefinitions: + artifacts: + in_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + dataset1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + dataset2: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-make-dataset: + executorLabel: exec-make-dataset + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-splitter-pipeline: + dag: + outputs: + artifacts: + dataset1: + artifactSelectors: + - outputArtifactKey: dataset1 + producerSubtask: dataset-splitter + dataset2: + artifactSelectors: + - outputArtifactKey: dataset1 + producerSubtask: dataset-splitter + tasks: + dataset-splitter: + cachingOptions: + enableCache: true + componentRef: + name: comp-dataset-splitter + inputs: + artifacts: + in_dataset: + componentInputArtifact: in_dataset + taskInfo: + name: dataset-splitter + inputDefinitions: + artifacts: + in_dataset: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + outputDefinitions: + artifacts: + dataset1: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + dataset2: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-dataset-splitter: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - dataset_splitter + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.2.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef dataset_splitter(\n in_dataset: Dataset\n) -> NamedTuple(\n\ + \ 'outputs',\n dataset1=Dataset,\n dataset2=Dataset,\n\ + ):\n\n with open(in_dataset.path) as f:\n in_data = f.read()\n\ + \n out_data1, out_data2 = in_data[:len(in_data) // 2], in_data[len(in_data)\ + \ //\n 2:]\n\ + \n dataset1 = Dataset(\n uri=dsl.get_uri(suffix='dataset1'),\n\ + \ metadata={'original_data': in_dataset.name},\n )\n with open(dataset1.path,\ + \ 'w') as f:\n f.write(out_data1)\n\n dataset2 = Dataset(\n \ + \ uri=dsl.get_uri(suffix='dataset2'),\n metadata={'original_data':\ + \ in_dataset.name},\n )\n with open(dataset2.path, 'w') as f:\n \ + \ f.write(out_data2)\n\n outputs = NamedTuple(\n 'outputs',\n\ + \ dataset1=Dataset,\n dataset2=Dataset,\n )\n return\ + \ outputs(dataset1=dataset1, dataset2=dataset2)\n\n" + image: python:3.7 + exec-make-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - make_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.2.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef make_dataset() -> Artifact:\n artifact = Artifact(uri=dsl.get_uri('dataset'))\n\ + \ with open(artifact.path, 'w') as f:\n f.write('Hello, world')\n\ + \ return artifact\n\n" + image: python:3.7 +pipelineInfo: + name: split-datasets-and-return-first +root: + dag: + outputs: + artifacts: + Output: + artifactSelectors: + - outputArtifactKey: dataset1 + producerSubtask: splitter-pipeline + tasks: + make-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-make-dataset + taskInfo: + name: make-dataset + splitter-pipeline: + cachingOptions: + enableCache: true + componentRef: + name: comp-splitter-pipeline + dependentTasks: + - make-dataset + inputs: + artifacts: + in_dataset: + taskOutputArtifact: + outputArtifactKey: Output + producerTask: make-dataset + taskInfo: + name: splitter-pipeline + outputDefinitions: + artifacts: + Output: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +schemaVersion: 2.1.0 +sdkVersion: kfp-2.2.0 diff --git a/sdk/python/test_data/test_data_config.yaml b/sdk/python/test_data/test_data_config.yaml index d64d7a1aea..7421c83303 100644 --- a/sdk/python/test_data/test_data_config.yaml +++ b/sdk/python/test_data/test_data_config.yaml @@ -180,6 +180,15 @@ pipelines: - module: if_elif_else_with_oneof_parameters name: outer_pipeline execute: false + - module: pythonic_artifact_with_single_return + name: make_language_model_pipeline + execute: false + - module: pythonic_artifacts_with_multiple_returns + name: split_datasets_and_return_first + execute: false + - module: pythonic_artifacts_with_list_of_artifacts + name: make_and_join_datasets + execute: false components: test_data_dir: sdk/python/test_data/components read: true diff --git a/test/sdk-execution-tests/requirements.txt b/test/sdk-execution-tests/requirements.txt index d9df3d9858..bf44f12049 100644 --- a/test/sdk-execution-tests/requirements.txt +++ b/test/sdk-execution-tests/requirements.txt @@ -1,4 +1,3 @@ sdk/python pytest==7.1.3 pytest-asyncio-cooperative==0.28.0 -pytest-mock==3.8.2 diff --git a/test/sdk-execution-tests/sdk_execution_tests.py b/test/sdk-execution-tests/sdk_execution_tests.py index 1613cb627b..b05f185acb 100644 --- a/test/sdk-execution-tests/sdk_execution_tests.py +++ b/test/sdk-execution-tests/sdk_execution_tests.py @@ -65,7 +65,9 @@ def create_test_case_parameters() -> List[TestCase]: return parameters -def wait(run_result: client.client.RunPipelineResult) -> kfp_server_api.V2beta1Run: +def wait( + run_result: client.client.RunPipelineResult +) -> kfp_server_api.V2beta1Run: return kfp_client.wait_for_run_completion( run_id=run_result.run_id, timeout=int(TIMEOUT_SECONDS)) @@ -104,16 +106,14 @@ def get_kfp_package_path() -> str: return path -partial_component_decorator = functools.partial( +dsl.component = functools.partial( dsl.component, kfp_package_path=get_kfp_package_path()) @pytest.mark.asyncio_cooperative @pytest.mark.parametrize('test_case', create_test_case_parameters()) -async def test(test_case: TestCase, mocker) -> None: +async def test(test_case: TestCase) -> None: """Asynchronously runs all samples and test that they succeed.""" - mocker.patch.object(dsl, 'component', partial_component_decorator) - event_loop = asyncio.get_running_loop() try: run_url, run_result = run(test_case) @@ -123,3 +123,7 @@ async def test(test_case: TestCase, mocker) -> None: api_run = await event_loop.run_in_executor(None, wait, run_result) assert api_run.state == 'SUCCEEDED', f'Pipeline {test_case.name} ended with incorrect status: {api_run.state}. More info: {run_url}' + + +if __name__ == '__main__': + pytest.main() From 0e240db39799cb0afbd8c7f982ffdd4f9eb58121 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 19 Oct 2023 12:37:40 -0700 Subject: [PATCH 225/253] No public description PiperOrigin-RevId: 574969883 --- .../_implementation/llm/function_based.py | 28 +++ .../llm/preprocess_chat_dataset.py | 201 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py index 122b67201c..67b914811f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/function_based.py @@ -268,6 +268,15 @@ def resolve_reference_model_metadata( reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', is_supported=True, ), + 'chat-bison@001': reference_model_metadata( + large_model_reference='BISON', + reference_model_path=( + 'gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_bison/' + ), + reward_model_reference='OTTER', + reward_model_path='gs://vertex-rlhf-restricted/pretrained_models/palm/t5x_otter_pretrain/', + is_supported=True, + ), 'elephant': reference_model_metadata( large_model_reference='ELEPHANT', reference_model_path=( @@ -461,3 +470,22 @@ def resolve_upload_model(large_model_reference: str) -> bool: if large_model_reference in supported_models: return True return False + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def resolve_instruction( + large_model_reference: str, instruction: Optional[str] = None +) -> str: + """Resolves the instruction to use for a given reference model. + + Args: + large_model_reference: Base model tuned by the pipeline. + instruction: Instruction provided at runtime. + + Returns: + Instruction to use during tokenization based on model type. Returns an empty + string for chat models because the instruction is prepended as the default + context. Otherwise the original instruction is returned. + """ + instruction = instruction or '' + return instruction if 'chat' not in large_model_reference.lower() else '' diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py new file mode 100644 index 0000000000..d84e75dd63 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py @@ -0,0 +1,201 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""KFP Component the preprocesses chat dataset before tokenization.""" + +from google_cloud_pipeline_components import _image +from kfp import dsl + + +@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False) +def preprocess_chat_dataset( + large_model_reference: str, + input_dataset_uri: str, + processed_dataset: dsl.OutputPath(dsl.Artifact), # pytype: disable=invalid-annotation + processed_dataset_uri: dsl.OutputPath(str), # pytype: disable=invalid-annotation + default_context: str = '', + allow_local_files: bool = False, +): # pylint: disable=g-doc-args + # fmt: off + """Preprocesses datasets before tokenization. + + For text datasets, this is a no-op. + + Args: + large_model_reference: Name of the base model. Supported values are `text-bison@001`, `chat-bison@001`, `t5-small`, `t5-large`, `t5-xl` and `t5-xxl`. `text-bison@001`, `chat-bison@001` and `t5-small` are supported in ``us-central1` and `europe-west4`. `t5-large`, `t5-xl` and `t5-xxl` are only supported in `europe-west4`. + input_dataset_uri: Path to an unprocessed JSONL dataset. + default_context: Default context to apply to each example if a chat model is specified. + allow_local_files: Whether input URIs can specify local file paths. + + Returns: + processed_dataset: Processed chat dataset. Each example will contain fields `input_text` and `output_text`. + processed_dataset_uri: String pattern that can be used to find the processed dataset in downstream components. + + """ + # fmt: on + # pylint: disable=g-import-not-at-top + import json + import os + from typing import List, Mapping, Any + import apache_beam as beam + # pylint: enable=g-import-not-at-top + + # [ Define helper methods and classes for preprocessing + # pylint: disable=invalid-name + INPUT_TEXT_KEY = 'input_text' + OUTPUT_TEXT_KEY = 'output_text' + CONTEXT_KEY = 'context' + MESSAGES_KEY = 'messages' + AUTHOR_KEY = 'author' + CONTENT_KEY = 'content' + GLOBAL_PREFIX = 'Only answer after [assistant] and never reply as [user]:' + CONTEXT_PREFIX = '[SYSTEM]:' + AUTHOR_USER = 'user' + AUTHOR_ASSISTANT = 'assistant' + USER_PREFIX = '[user]:' + ASSISTANT_PREFIX = '[assistant]:' + AUTHOR_ENCODING_PREFIX_MAPPING = { + AUTHOR_USER: USER_PREFIX, + AUTHOR_ASSISTANT: ASSISTANT_PREFIX, + } + VALID_AUTHORS = {AUTHOR_USER, AUTHOR_ASSISTANT} + # pylint: enable=invalid-name + + def get_gcs_path(input_path: str, allow_local_files: bool) -> str: + """Gets the /gcs/ path for a given URI.""" + if input_path.startswith('gs://'): + return input_path.replace('gs://', '/gcs/', 1) + elif input_path.startswith('/gcs/') or allow_local_files: + return input_path + else: + raise ValueError( + f'Invalid Cloud storage URI {input_path}. ' + 'Must start with `gs://` or `/gcs/`.' + ) + + def get_gs_path(input_path: str, allow_local_files: bool) -> str: + """Gets the gs:// path for a given URI.""" + if input_path.startswith('/gcs/'): + return input_path.replace('/gcs/', 'gs://', 1) + elif input_path.startswith('gs://') or allow_local_files: + return input_path + else: + raise ValueError( + f'Invalid Cloud storage URI {input_path}. ' + 'Must start with `gs://` or `/gcs/`.' + ) + + class JsonCoder(beam.coders.Coder): + """A coder that encodes/decodes lines as JSON strings.""" + + def encode(self, x): + return json.dumps(x).encode('utf-8') + + def decode(self, x): + return json.loads(x) + + class ChatDatasetProcessor(beam.DoFn): + """Converts chat data from input format to the format expected by the model.""" + + def __init__(self, default_context: str = ''): + self._default_context = default_context + + def _get_messages_or_fail( + self, element: Mapping[str, Any] + ) -> List[Mapping[str, str]]: + messages = element.get(MESSAGES_KEY) + if not messages or len(messages) <= 1: + raise ValueError( + 'Chat messages length should be greater than 1. Please include a ' + f'`messages` field in each line of dataset: {element}.' + ) + return messages + + def _get_author_or_fail(self, message: Mapping[str, str]) -> str: + author = message.get(AUTHOR_KEY) + if not author or author not in VALID_AUTHORS: + raise ValueError( + 'The `author` of each message needs to be from one of' + f' {VALID_AUTHORS}. Got author = {author}.' + ) + return author + + def _get_content_or_fail(self, message: Mapping[str, str]) -> str: + content = message.get(CONTENT_KEY) + if not content: + raise ValueError( + 'The `content` of each message needs to be non-empty. ' + f'Invalid message: {message}' + ) + return content + + def process(self, element): + context = element.get(CONTEXT_KEY, self._default_context) + messages = self._get_messages_or_fail(element) + + per_conversation_context = ( + f'{CONTEXT_PREFIX}{context}\n\n' if context else '' + ) + message_prefix = f'{GLOBAL_PREFIX}\n{per_conversation_context}' + message_history = [] + for message in messages: + author = self._get_author_or_fail(message) + content = self._get_content_or_fail(message) + if author == AUTHOR_ASSISTANT: + joined_messages = '\n'.join(message_history) + input_text = f'{message_prefix}{joined_messages}\n{ASSISTANT_PREFIX}' + yield {INPUT_TEXT_KEY: input_text, OUTPUT_TEXT_KEY: content} + message_history.append( + f'{AUTHOR_ENCODING_PREFIX_MAPPING[author]}{content}' + ) + + # ] + + processed_dataset_uri = get_gcs_path(processed_dataset_uri, allow_local_files) + + # Reuse the input dataset if no preprocessing is needed. + if large_model_reference.lower() != 'chat-bison@001': + with open(processed_dataset_uri, 'w') as f: + f.write(input_dataset_uri) + return + + # Provide gs:// paths for datasets processed by Beam. + input_dataset_uri = get_gs_path(input_dataset_uri, allow_local_files) + processed_dataset = get_gs_path(processed_dataset, allow_local_files) + os.makedirs(processed_dataset, exist_ok=True) + processed_dataset_prefix = os.path.join(processed_dataset, 'shard') + + pipeline_options = ( + beam.options.pipeline_options.PipelineOptions.from_dictionary({ + 'runner': 'DirectRunner', + }) + ) + with beam.Pipeline(options=pipeline_options) as pipeline: + _ = ( + pipeline + | 'Read JSON from input dataset' + >> beam.io.ReadFromText(input_dataset_uri, coder=JsonCoder()) + | 'Process chat dataset' + >> beam.ParDo(ChatDatasetProcessor(default_context=default_context)) + | 'Write processed JSON to output file' + >> beam.io.WriteToText( + file_path_prefix=processed_dataset_prefix, + file_name_suffix='.jsonl', + coder=JsonCoder(), + ) + ) + + # Write file pattern that the tokenizer can use to find all processed files. + with open(processed_dataset_uri, 'w') as f: + processed_dataset_pattern = os.path.join(processed_dataset, '*.jsonl') + f.write(processed_dataset_pattern) From 99fd2017a76660f30d0a04b71542cbef45783633 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 19 Oct 2023 14:35:08 -0700 Subject: [PATCH 226/253] feat(components): Add ability to preprocess chat llama datasets to `_implementation.llm.chat_dataset_preprocessor` PiperOrigin-RevId: 575004978 --- .../llm/preprocess_chat_dataset.py | 97 ++++++++++++++----- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py index d84e75dd63..e4e5697e37 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/preprocess_chat_dataset.py @@ -44,9 +44,10 @@ def preprocess_chat_dataset( """ # fmt: on # pylint: disable=g-import-not-at-top + import dataclasses import json import os - from typing import List, Mapping, Any + from typing import Any, Callable, List, Mapping import apache_beam as beam # pylint: enable=g-import-not-at-top @@ -58,18 +59,51 @@ def preprocess_chat_dataset( MESSAGES_KEY = 'messages' AUTHOR_KEY = 'author' CONTENT_KEY = 'content' - GLOBAL_PREFIX = 'Only answer after [assistant] and never reply as [user]:' - CONTEXT_PREFIX = '[SYSTEM]:' AUTHOR_USER = 'user' AUTHOR_ASSISTANT = 'assistant' - USER_PREFIX = '[user]:' - ASSISTANT_PREFIX = '[assistant]:' - AUTHOR_ENCODING_PREFIX_MAPPING = { - AUTHOR_USER: USER_PREFIX, - AUTHOR_ASSISTANT: ASSISTANT_PREFIX, - } VALID_AUTHORS = {AUTHOR_USER, AUTHOR_ASSISTANT} + # pylint: enable=invalid-name + @dataclasses.dataclass + class PromptSchema: + global_prefix: str + user_prefix: str + user_postfix: str + assistant_prefix: str + assistant_postfix: str + get_system_message: Callable[[str], str] # pytype: disable=invalid-annotation + + def _get_chat_bison_001_system_message(context: str) -> str: + return f'[SYSTEM]:{context}\n\n' if context else '' + + chat_bison_001_schema = PromptSchema( + global_prefix=( + 'Only answer after [assistant] and never reply as [user]:\n' + ), + get_system_message=_get_chat_bison_001_system_message, + user_prefix='[user]:', + user_postfix='\n', + assistant_prefix='[assistant]:', + assistant_postfix='\n', + ) + + def _get_chat_llama_system_message(context: str) -> str: + return f'<>\n{context}\n<>\n\n' if context else '' + + chat_llama_schema = PromptSchema( + global_prefix='[INST] ', + get_system_message=_get_chat_llama_system_message, + user_prefix='', + user_postfix=' [/INST]', + assistant_prefix=' ', + assistant_postfix='[INST] ', + ) + + MODEL_TO_SCHEMA_MAPPING = { # pylint: disable=invalid-name + 'chat-bison@001': chat_bison_001_schema, + 'llama-2-7b-chat': chat_llama_schema, + 'llama-2-13b-chat': chat_llama_schema, + } def get_gcs_path(input_path: str, allow_local_files: bool) -> str: """Gets the /gcs/ path for a given URI.""" @@ -107,8 +141,9 @@ def decode(self, x): class ChatDatasetProcessor(beam.DoFn): """Converts chat data from input format to the format expected by the model.""" - def __init__(self, default_context: str = ''): + def __init__(self, default_context: str, prompt_schema: PromptSchema): self._default_context = default_context + self._schema = prompt_schema def _get_messages_or_fail( self, element: Mapping[str, Any] @@ -143,32 +178,42 @@ def process(self, element): context = element.get(CONTEXT_KEY, self._default_context) messages = self._get_messages_or_fail(element) - per_conversation_context = ( - f'{CONTEXT_PREFIX}{context}\n\n' if context else '' - ) - message_prefix = f'{GLOBAL_PREFIX}\n{per_conversation_context}' - message_history = [] + message_history = [ + self._schema.global_prefix, + self._schema.get_system_message(context), + ] for message in messages: author = self._get_author_or_fail(message) content = self._get_content_or_fail(message) - if author == AUTHOR_ASSISTANT: - joined_messages = '\n'.join(message_history) - input_text = f'{message_prefix}{joined_messages}\n{ASSISTANT_PREFIX}' - yield {INPUT_TEXT_KEY: input_text, OUTPUT_TEXT_KEY: content} - message_history.append( - f'{AUTHOR_ENCODING_PREFIX_MAPPING[author]}{content}' - ) + if author == AUTHOR_USER: + message_history.append( + f'{self._schema.user_prefix}{content}{self._schema.user_postfix}' + ) + elif author == AUTHOR_ASSISTANT: + message_history.append(self._schema.assistant_prefix) + input_text = ''.join(message_history) + yield {INPUT_TEXT_KEY: input_text.rstrip(), OUTPUT_TEXT_KEY: content} + message_history = [ + input_text, + f'{content}{self._schema.assistant_postfix}', + ] + else: + raise ValueError( + f'Unknown author {author}. Must be one of {VALID_AUTHORS}.' + ) # ] processed_dataset_uri = get_gcs_path(processed_dataset_uri, allow_local_files) # Reuse the input dataset if no preprocessing is needed. - if large_model_reference.lower() != 'chat-bison@001': + if large_model_reference.lower() not in MODEL_TO_SCHEMA_MAPPING: with open(processed_dataset_uri, 'w') as f: f.write(input_dataset_uri) return + prompt_schema = MODEL_TO_SCHEMA_MAPPING[large_model_reference] + # Provide gs:// paths for datasets processed by Beam. input_dataset_uri = get_gs_path(input_dataset_uri, allow_local_files) processed_dataset = get_gs_path(processed_dataset, allow_local_files) @@ -186,7 +231,11 @@ def process(self, element): | 'Read JSON from input dataset' >> beam.io.ReadFromText(input_dataset_uri, coder=JsonCoder()) | 'Process chat dataset' - >> beam.ParDo(ChatDatasetProcessor(default_context=default_context)) + >> beam.ParDo( + ChatDatasetProcessor( + default_context=default_context, prompt_schema=prompt_schema + ) + ) | 'Write processed JSON to output file' >> beam.io.WriteToText( file_path_prefix=processed_dataset_prefix, From 1ae72718044bd219b35fb3f17aba4e66354e3448 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 19 Oct 2023 15:21:33 -0700 Subject: [PATCH 227/253] chore(components): pin `apache_beam[gcp]` to `2.50.0` PiperOrigin-RevId: 575018545 --- components/google-cloud/Dockerfile | 9 ++++++++- components/google-cloud/RELEASE.md | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index 36e5e5e913..c9297ac28e 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -28,7 +28,14 @@ RUN pip3 install -U google-cloud-storage RUN pip3 install -U google-api-python-client # Required by dataflow_launcher -RUN pip3 install -U "apache_beam[gcp]" +# Pin to `2.50.0` for compatibility with `google-cloud-aiplatform`, which +# depends on `shapely<3.0.0dev`. +# Prefer an exact pin, since GCPC's apache_beam version must match the +# version the in custom Dataflow worker images for the Dataflow job to succeed. +# Inexact pins risk that the apache_beam in GCPC drifts away from a +# user-specified version in the image. +# From docs: """When running your pipeline, launch the pipeline using the Apache Beam SDK with the same version and language version as the SDK on your custom container image. This step avoids unexpected errors from incompatible dependencies or SDKs.""" https://cloud.google.com/dataflow/docs/guides/using-custom-containers#before_you_begin_2 +RUN pip3 install -U "apache_beam[gcp]==2.50.0" # Required for sklearn/train_test_split_jsonl RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn<=1.0.2" diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index fc40e16bf9..ea87c0f659 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -7,6 +7,7 @@ * Support `service_account` in `ModelBatchPredictOp`. * Release `DataflowFlexTemplateJobOp` to GA namespace (`v1.dataflow.DataflowFlexTemplateJobOp`). * Make `model_checkpoint` optional for `preview.llm.infer_pipeline`. If not provided, the base model associated with the `large_model_reference` will be used. +* Bump `apache_beam[gcp]` version in GCPC container image from `<2.34.0` to `==2.50.0` for compatibility with `google-cloud-aiplatform`, which depends on `shapely<3.0.0dev`. Note: upgrades to `google-cloud-pipeline-components`>=2.5.0 and later may require using a Dataflow worker image with `apache_beam==2.50.0`. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. From 8d979e8be4233585896bb24653487a4dbcdc8e04 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 19 Oct 2023 16:43:04 -0700 Subject: [PATCH 228/253] docs(sdk): add KFP SDK reference docs local build script (#10124) --- docs/build_docs_locally.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 docs/build_docs_locally.sh diff --git a/docs/build_docs_locally.sh b/docs/build_docs_locally.sh new file mode 100644 index 0000000000..74a31608e9 --- /dev/null +++ b/docs/build_docs_locally.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright 2023 Kubeflow Pipelines contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# install requirements + +pushd .. +pip install -r docs/requirements.txt +popd + +# build docs +make clean html + +# serve docs +pushd _build/html +python3 -m http.server +popd From c0adea9946e874fd6a120f0f7c3bc3576bed8317 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Thu, 19 Oct 2023 16:49:03 -0700 Subject: [PATCH 229/253] chore(sdk): add test for key error bug resolved in #10067 (#10128) --- sdk/python/kfp/compiler/compiler_test.py | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sdk/python/kfp/compiler/compiler_test.py b/sdk/python/kfp/compiler/compiler_test.py index b98d5624d6..049975f1c0 100644 --- a/sdk/python/kfp/compiler/compiler_test.py +++ b/sdk/python/kfp/compiler/compiler_test.py @@ -5690,5 +5690,34 @@ def comp() -> List[Artifact]: return dsl.ContainerSpec(image='alpine', command=['pwd']) +class TestPipelineSpecAttributeUniqueError(unittest.TestCase): + + def test_compiles(self): + # in a previous version of the KFP SDK there was an error when: + # - a component has a dsl.OutputPath parameter + # - the pipeline has an existing component by a different name + # - the user calls component.pipeline_spec inside their pipeline definition + # this was resolved coincidentally in + # https://github.com/kubeflow/pipelines/pull/10067, so test that it + # doesn't come back + + @dsl.container_component + def existing_comp(): + return dsl.ContainerSpec( + image='alpine', command=['echo'], args=['foo']) + + @dsl.container_component + def issue_comp(v: dsl.OutputPath(str)): + return dsl.ContainerSpec(image='alpine', command=['echo'], args=[v]) + + @dsl.pipeline + def my_pipeline(): + existing_comp() + issue_comp.pipeline_spec + + # should compile without error + self.assertTrue(my_pipeline.pipeline_spec) + + if __name__ == '__main__': unittest.main() From 5cb9ebf9647ae880d03ac0dbcf14f12ea75dfdf4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Oct 2023 04:21:04 +0000 Subject: [PATCH 230/253] chore(deps): bump word-wrap from 1.2.3 to 1.2.4 in /frontend/server (#9762) Bumps [word-wrap](https://github.com/jonschlinkert/word-wrap) from 1.2.3 to 1.2.4. - [Release notes](https://github.com/jonschlinkert/word-wrap/releases) - [Commits](https://github.com/jonschlinkert/word-wrap/compare/1.2.3...1.2.4) --- updated-dependencies: - dependency-name: word-wrap dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- frontend/server/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index 44be64ff5d..c5494c8ccf 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -9857,9 +9857,9 @@ "dev": true }, "word-wrap": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz", - "integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==" + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.4.tgz", + "integrity": "sha512-2V81OA4ugVo5pRo46hAoD2ivUJx8jXmWXfUkY4KFNw0hEptvN0QfH3K4nHiwzGeKl5rFKedV48QVoqYavy4YpA==" }, "wrap-ansi": { "version": "6.2.0", From c32cdc70d31dcb2c5f9d5a72ceb5669761eece7f Mon Sep 17 00:00:00 2001 From: David van der Spek <28541758+DavidSpek@users.noreply.github.com> Date: Fri, 20 Oct 2023 07:19:03 +0200 Subject: [PATCH 231/253] chore(frontend): update all references to python 3.9 (#10020) Signed-off-by: David van der Spek --- ...htweight_python_functions_v2_pipeline.json | 4 +-- ...ight_python_functions_v2_pipeline_rev.yaml | 4 +-- .../pipeline_with_loops_and_conditions.json | 26 +++++++++---------- .../pipeline_with_loops_and_conditions.yaml | 26 +++++++++---------- .../v2/pipeline/protobuf_value_params_v2.json | 2 +- .../v2/pipeline/xgboost_sample_pipeline.json | 14 +++++----- .../v2/pipeline/xgboost_sample_pipeline.yaml | 14 +++++----- .../tabs/StaticNodeDetailsV2.test.tsx | 6 ++--- .../test/create_mount_delete_dynamic_pvc.yaml | 4 +-- ...ight_python_functions_v2_pipeline_rev.yaml | 4 +-- .../pipeline_with_loops_and_conditions.yaml | 26 +++++++++---------- .../data/test/xgboost_sample_pipeline.yaml | 14 +++++----- frontend/src/lib/v2/WorkflowUtils.test.ts | 4 +-- 13 files changed, 74 insertions(+), 74 deletions(-) diff --git a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline.json b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline.json index 7a2f6abe75..2e5c699128 100644 --- a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline.json +++ b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline.json @@ -107,7 +107,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nfrom kfp.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # An input parameter of type dict.\n input_dict_parameter: Dict[str, int],\n # An input parameter of type list.\n input_list_parameter: List[str],\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step.\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(\n str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps(input_dict_parameter))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(input_list_parameter))\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-train": { @@ -122,7 +122,7 @@ "program_path=$(mktemp)\nprintf \"%s\" \"$0\" > \"$program_path\"\npython3 -u \"$program_path\" \"$@\"\n", "\nimport json\nimport inspect\nfrom typing import *\n\n# Copyright 2021 The Kubeflow Authors\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\"\"\"Classes for input/output types in KFP SDK.\n\nThese are only compatible with v2 Pipelines.\n\"\"\"\n\nimport os\nfrom typing import Dict, Generic, List, Optional, Type, TypeVar, Union\n\n\n_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'\n_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'\n_S3_LOCAL_MOUNT_PREFIX = '/s3/'\n\n\nclass Artifact(object):\n \"\"\"Generic Artifact class.\n\n This class is meant to represent the metadata around an input or output\n machine-learning Artifact. Artifacts have URIs, which can either be a location\n on disk (or Cloud storage) or some other resource identifier such as\n an API resource name.\n\n Artifacts carry a `metadata` field, which is a dictionary for storing\n metadata related to this artifact.\n \"\"\"\n TYPE_NAME = 'system.Artifact'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n \"\"\"Initializes the Artifact with the given name, URI and metadata.\"\"\"\n self.uri = uri or ''\n self.name = name or ''\n self.metadata = metadata or {}\n\n @property\n def path(self):\n return self._get_path()\n\n @path.setter\n def path(self, path):\n self._set_path(path)\n\n def _get_path(self) -> Optional[str]:\n if self.uri.startswith('gs://'):\n return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]\n elif self.uri.startswith('minio://'):\n return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]\n elif self.uri.startswith('s3://'):\n return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]\n return None\n\n def _set_path(self, path):\n if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):\n path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):\n path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]\n elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):\n path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]\n self.uri = path\n\n\nclass Model(Artifact):\n \"\"\"An artifact representing an ML Model.\"\"\"\n TYPE_NAME = 'system.Model'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n @property\n def framework(self) -> str:\n return self._get_framework()\n\n def _get_framework(self) -> str:\n return self.metadata.get('framework', '')\n\n @framework.setter\n def framework(self, framework: str):\n self._set_framework(framework)\n\n def _set_framework(self, framework: str):\n self.metadata['framework'] = framework\n\n\nclass Dataset(Artifact):\n \"\"\"An artifact representing an ML Dataset.\"\"\"\n TYPE_NAME = 'system.Dataset'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n\nclass Metrics(Artifact):\n \"\"\"Represent a simple base Artifact type to store key-value scalar metrics.\"\"\"\n TYPE_NAME = 'system.Metrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_metric(self, metric: str, value: float):\n \"\"\"Sets a custom scalar metric.\n\n Args:\n metric: Metric key\n value: Value of the metric.\n \"\"\"\n self.metadata[metric] = value\n\n\nclass ClassificationMetrics(Artifact):\n \"\"\"Represents Artifact class to store Classification Metrics.\"\"\"\n TYPE_NAME = 'system.ClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):\n \"\"\"Logs a single data point in the ROC Curve.\n\n Args:\n fpr: False positive rate value of the data point.\n tpr: True positive rate value of the data point.\n threshold: Threshold value for the data point.\n \"\"\"\n\n roc_reading = {\n 'confidenceThreshold': threshold,\n 'recall': tpr,\n 'falsePositiveRate': fpr\n }\n if 'confidenceMetrics' not in self.metadata.keys():\n self.metadata['confidenceMetrics'] = []\n\n self.metadata['confidenceMetrics'].append(roc_reading)\n\n def log_roc_curve(self, fpr: List[float], tpr: List[float],\n threshold: List[float]):\n \"\"\"Logs an ROC curve.\n\n The list length of fpr, tpr and threshold must be the same.\n\n Args:\n fpr: List of false positive rate values.\n tpr: List of true positive rate values.\n threshold: List of threshold values.\n \"\"\"\n if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(\n threshold):\n raise ValueError('Length of fpr, tpr and threshold must be the same. '\n 'Got lengths {}, {} and {} respectively.'.format(\n len(fpr), len(tpr), len(threshold)))\n\n for i in range(len(fpr)):\n self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])\n\n def set_confusion_matrix_categories(self, categories: List[str]):\n \"\"\"Stores confusion matrix categories.\n\n Args:\n categories: List of strings specifying the categories.\n \"\"\"\n\n self._categories = []\n annotation_specs = []\n for category in categories:\n annotation_spec = {'displayName': category}\n self._categories.append(category)\n annotation_specs.append(annotation_spec)\n\n self._matrix = []\n for row in range(len(self._categories)):\n self._matrix.append({'row': [0] * len(self._categories)})\n\n self._confusion_matrix = {}\n self._confusion_matrix['annotationSpecs'] = annotation_specs\n self._confusion_matrix['rows'] = self._matrix\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_row(self, row_category: str, row: List[float]):\n \"\"\"Logs a confusion matrix row.\n\n Args:\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n\n Raises:\n ValueError: If row_category is not in the list of categories\n set in set_categories call.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if len(row) != len(self._categories):\n raise ValueError('Invalid row. Expected size: {} got: {}'.\\\n format(len(self._categories), len(row)))\n\n self._matrix[self._categories.index(row_category)] = {'row': row}\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix_cell(self, row_category: str, col_category: str,\n value: int):\n \"\"\"Logs a cell in the confusion matrix.\n\n Args:\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n\n Raises:\n ValueError: If row_category or col_category is not in the list of\n categories set in set_categories.\n \"\"\"\n if row_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n if col_category not in self._categories:\n raise ValueError('Invalid category: {} passed. Expected one of: {}'.\\\n format(row_category, self._categories))\n\n self._matrix[self._categories.index(row_category)]['row'][\n self._categories.index(col_category)] = value\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n def log_confusion_matrix(self, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Logs a confusion matrix.\n\n Args:\n categories: List of the category names.\n matrix: Complete confusion matrix.\n\n Raises:\n ValueError: Length of categories does not match number of rows or columns.\n \"\"\"\n self.set_confusion_matrix_categories(categories)\n\n if len(matrix) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n for index in range(len(categories)):\n if len(matrix[index]) != len(categories):\n raise ValueError('Invalid matrix: {} passed for categories: {}'.\\\n format(matrix, categories))\n\n self.log_confusion_matrix_row(categories[index], matrix[index])\n\n self.metadata['confusionMatrix'] = self._confusion_matrix\n\n\nclass SlicedClassificationMetrics(Artifact):\n \"\"\"Metrics class representing Sliced Classification Metrics.\n\n Similar to ClassificationMetrics clients using this class are expected to use\n log methods of the class to log metrics with the difference being each log\n method takes a slice to associate the ClassificationMetrics.\n\n \"\"\"\n\n TYPE_NAME = 'system.SlicedClassificationMetrics'\n\n def __init__(self,\n name: Optional[str] = None,\n uri: Optional[str] = None,\n metadata: Optional[Dict] = None):\n super().__init__(uri=uri, name=name, metadata=metadata)\n\n def _upsert_classification_metrics_for_slice(self, slice: str):\n \"\"\"Upserts the classification metrics instance for a slice.\"\"\"\n if slice not in self._sliced_metrics:\n self._sliced_metrics[slice] = ClassificationMetrics()\n\n def _update_metadata(self, slice: str):\n \"\"\"Updates metadata to adhere to the metrics schema.\"\"\"\n self.metadata = {}\n self.metadata['evaluationSlices'] = []\n for slice in self._sliced_metrics.keys():\n slice_metrics = {\n 'slice': slice,\n 'sliceClassificationMetrics': self._sliced_metrics[slice].metadata\n }\n self.metadata['evaluationSlices'].append(slice_metrics)\n\n def log_roc_reading(self, slice: str, threshold: float, tpr: float,\n fpr: float):\n \"\"\"Logs a single data point in the ROC Curve of a slice.\n\n Args:\n slice: String representing slice label.\n threshold: Thresold value for the data point.\n tpr: True positive rate value of the data point.\n fpr: False positive rate value of the data point.\n \"\"\"\n\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)\n self._update_metadata(slice)\n\n def load_roc_readings(self, slice: str, readings: List[List[float]]):\n \"\"\"Supports bulk loading ROC Curve readings for a slice.\n\n Args:\n slice: String representing slice label.\n readings: A 2-D list providing ROC Curve data points.\n The expected order of the data points is: threshold,\n true_positive_rate, false_positive_rate.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].load_roc_readings(readings)\n self._update_metadata(slice)\n\n def set_confusion_matrix_categories(self, slice: str, categories: List[str]):\n \"\"\"Stores confusion matrix categories for a slice..\n\n Categories are stored in the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n categories: List of strings specifying the categories.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].set_confusion_matrix_categories(categories)\n self._update_metadata(slice)\n\n def log_confusion_matrix_row(self, slice: str, row_category: str,\n row: List[int]):\n \"\"\"Logs a confusion matrix row for a slice.\n\n Row is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: Category to which the row belongs.\n row: List of integers specifying the values for the row.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)\n self._update_metadata(slice)\n\n def log_confusion_matrix_cell(self, slice: str, row_category: str,\n col_category: str, value: int):\n \"\"\"Logs a confusion matrix cell for a slice..\n\n Cell is updated on the internal metrics_utils.ConfusionMatrix\n instance of the slice.\n\n Args:\n slice: String representing slice label.\n row_category: String representing the name of the row category.\n col_category: String representing the name of the column category.\n value: Int value of the cell.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(\n row_category, col_category, value)\n self._update_metadata(slice)\n\n def load_confusion_matrix(self, slice: str, categories: List[str],\n matrix: List[List[int]]):\n \"\"\"Supports bulk loading the whole confusion matrix for a slice.\n\n Args:\n slice: String representing slice label.\n categories: List of the category names.\n matrix: Complete confusion matrix.\n \"\"\"\n self._upsert_classification_metrics_for_slice(slice)\n self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)\n self._update_metadata(slice)\n\n\nT = TypeVar('T')\n\n\nclass InputAnnotation():\n \"\"\"Marker type for input artifacts.\"\"\"\n pass\n\n\n\nclass OutputAnnotation():\n \"\"\"Marker type for output artifacts.\"\"\"\n pass\n\n\n# TODO: Use typing.Annotated instead of this hack.\n# With typing.Annotated (Python 3.9+ or typing_extensions package), the\n# following would look like:\n# Input = typing.Annotated[T, InputAnnotation]\n# Output = typing.Annotated[T, OutputAnnotation]\n\n\n# Input represents an Input artifact of type T.\nInput = Union[T, InputAnnotation]\n\n# Output represents an Output artifact of type T.\nOutput = Union[T, OutputAnnotation]\n\n\ndef is_artifact_annotation(typ) -> bool:\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [InputAnnotation, OutputAnnotation]\n\n if not hasattr(typ, '__origin__'):\n return False\n\n\n if typ.__origin__ != Union and type(typ.__origin__) != type(Union):\n return False\n\n\n if not hasattr(typ, '__args__') or len(typ.__args__) != 2:\n return False\n\n if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:\n return False\n\n return True\n\ndef is_input_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Input[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation\n\n return typ.__args__[1] == InputAnnotation\n\ndef is_output_artifact(typ) -> bool:\n \"\"\"Returns True if typ is of type Output[T].\"\"\"\n if not is_artifact_annotation(typ):\n return False\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation\n\n return typ.__args__[1] == OutputAnnotation\n\ndef get_io_artifact_class(typ):\n if not is_artifact_annotation(typ):\n return None\n if typ == Input or typ == Output:\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[1]\n\n return typ.__args__[0]\n\ndef get_io_artifact_annotation(typ):\n if not is_artifact_annotation(typ):\n return None\n\n if hasattr(typ, '_subs_tree'): # Python 3.6\n subs_tree = typ._subs_tree()\n if len(subs_tree) != 3:\n return None\n return subs_tree[2]\n\n return typ.__args__[1]\n\n\n\n_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {\n x.TYPE_NAME: x\n for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]\n}\n\n\ndef create_runtime_artifact(runtime_artifact: Dict) -> Artifact:\n \"\"\"Creates an Artifact instance from the specified RuntimeArtifact.\n\n Args:\n runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.\n \"\"\"\n schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')\n\n artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)\n if not artifact_type:\n artifact_type = Artifact\n return artifact_type(\n uri=runtime_artifact.get('uri', ''),\n name=runtime_artifact.get('name', ''),\n metadata=runtime_artifact.get('metadata', {}),\n )\n\nclass InputPath:\n '''When creating component from function, :class:`.InputPath` should be used as function parameter annotation to tell the system to pass the *data file path* to the function instead of passing the actual data.'''\n def __init__(self, type=None):\n self.type = type\n\nclass OutputPath:\n '''When creating component from function, :class:`.OutputPath` should be used as function parameter annotation to tell the system that the function wants to output data by writing it into a file with the given path instead of returning the data from the function.'''\n def __init__(self, type=None):\n self.type = type\n\nclass Executor():\n \"\"\"Executor executes v2-based Python function components.\"\"\"\n\n def __init__(self, executor_input: Dict, function_to_execute: Callable):\n self._func = function_to_execute\n self._input = executor_input\n self._input_artifacts: Dict[str, Artifact] = {}\n self._output_artifacts: Dict[str, Artifact] = {}\n\n for name, artifacts in self._input.get('inputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._input_artifacts[name] = self._make_input_artifact(\n artifacts_list[0])\n\n for name, artifacts in self._input.get('outputs', {}).get('artifacts',\n {}).items():\n artifacts_list = artifacts.get('artifacts')\n if artifacts_list:\n self._output_artifacts[name] = self._make_output_artifact(\n artifacts_list[0])\n\n self._return_annotation = inspect.signature(self._func).return_annotation\n self._executor_output = {}\n\n @classmethod\n def _make_input_artifact(cls, runtime_artifact: Dict):\n return create_runtime_artifact(runtime_artifact)\n\n @classmethod\n def _make_output_artifact(cls, runtime_artifact: Dict):\n import os\n artifact = create_runtime_artifact(runtime_artifact)\n os.makedirs(os.path.dirname(artifact.path), exist_ok=True)\n return artifact\n\n def _get_input_artifact(self, name: str):\n return self._input_artifacts.get(name)\n\n def _get_output_artifact(self, name: str):\n return self._output_artifacts.get(name)\n\n def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):\n parameter = self._input.get('inputs', {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n if parameter.get('stringValue'):\n if parameter_type == str:\n return parameter['stringValue']\n elif parameter_type == bool:\n # Use `.lower()` so it can also handle 'True' and 'False' (resulted from\n # `str(True)` and `str(False)`, respectively.\n return json.loads(parameter['stringValue'].lower())\n else:\n return json.loads(parameter['stringValue'])\n elif parameter.get('intValue'):\n return int(parameter['intValue'])\n elif parameter.get('doubleValue'):\n return float(parameter['doubleValue'])\n\n def _get_output_parameter_path(self, parameter_name: str):\n parameter_name = self._maybe_strip_path_suffix(parameter_name)\n parameter = self._input.get('outputs',\n {}).get('parameters',\n {}).get(parameter_name, None)\n if parameter is None:\n return None\n\n import os\n path = parameter.get('outputFile', None)\n if path:\n os.makedirs(os.path.dirname(path), exist_ok=True)\n return path\n\n def _get_output_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n output_artifact = self._output_artifacts.get(artifact_name)\n if not output_artifact:\n raise ValueError(\n 'Failed to get output artifact path for artifact name {}'.format(\n artifact_name))\n return output_artifact.path\n\n def _get_input_artifact_path(self, artifact_name: str):\n artifact_name = self._maybe_strip_path_suffix(artifact_name)\n input_artifact = self._input_artifacts.get(artifact_name)\n if not input_artifact:\n raise ValueError(\n 'Failed to get input artifact path for artifact name {}'.format(\n artifact_name))\n return input_artifact.path\n\n def _write_output_parameter_value(self, name: str,\n value: Union[str, int, float, bool, dict,\n list, Dict, List]):\n if type(value) == str:\n output = {'stringValue': value}\n elif type(value) == int:\n output = {'intValue': value}\n elif type(value) == float:\n output = {'doubleValue': value}\n else:\n # For bool, list, dict, List, Dict, json serialize the value.\n output = {'stringValue': json.dumps(value)}\n\n if not self._executor_output.get('parameters'):\n self._executor_output['parameters'] = {}\n\n self._executor_output['parameters'][name] = output\n\n def _write_output_artifact_payload(self, name: str, value: Any):\n path = self._get_output_artifact_path(name)\n with open(path, 'w') as f:\n f.write(str(value))\n\n # TODO: extract to a util\n @classmethod\n def _get_short_type_name(cls, type_name: str) -> str:\n \"\"\"Extracts the short form type name.\n\n This method is used for looking up serializer for a given type.\n\n For example:\n typing.List -> List\n typing.List[int] -> List\n typing.Dict[str, str] -> Dict\n List -> List\n str -> str\n\n Args:\n type_name: The original type name.\n\n Returns:\n The short form type name or the original name if pattern doesn't match.\n \"\"\"\n import re\n match = re.match('(typing\\.)?(?P\\w+)(?:\\[.+\\])?', type_name)\n if match:\n return match.group('type')\n else:\n return type_name\n\n # TODO: merge with type_utils.is_parameter_type\n @classmethod\n def _is_parameter(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return annotation in [str, int, float, bool, dict, list]\n\n # Annotation could be, for instance `typing.Dict[str, str]`, etc.\n return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']\n\n @classmethod\n def _is_artifact(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, Artifact)\n return False\n\n @classmethod\n def _is_named_tuple(cls, annotation: Any) -> bool:\n if type(annotation) == type:\n return issubclass(annotation, tuple) and hasattr(\n annotation, '_fields') and hasattr(annotation, '__annotations__')\n return False\n\n def _handle_single_return_value(self, output_name: str, annotation_type: Any,\n return_value: Any):\n if self._is_parameter(annotation_type):\n if type(return_value) != annotation_type:\n raise ValueError(\n 'Function `{}` returned value of type {}; want type {}'.format(\n self._func.__name__, type(return_value), annotation_type))\n self._write_output_parameter_value(output_name, return_value)\n elif self._is_artifact(annotation_type):\n self._write_output_artifact_payload(output_name, return_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'\n ' subclass of `Artifact`'.format(annotation_type))\n\n def _write_executor_output(self, func_output: Optional[Any] = None):\n if self._output_artifacts:\n self._executor_output['artifacts'] = {}\n\n for name, artifact in self._output_artifacts.items():\n runtime_artifact = {\n 'name': artifact.name,\n 'uri': artifact.uri,\n 'metadata': artifact.metadata,\n }\n artifacts_list = {'artifacts': [runtime_artifact]}\n\n self._executor_output['artifacts'][name] = artifacts_list\n\n if func_output is not None:\n if self._is_parameter(self._return_annotation) or self._is_artifact(\n self._return_annotation):\n # Note: single output is named `Output` in component.yaml.\n self._handle_single_return_value('Output', self._return_annotation,\n func_output)\n elif self._is_named_tuple(self._return_annotation):\n if len(self._return_annotation._fields) != len(func_output):\n raise RuntimeError(\n 'Expected {} return values from function `{}`, got {}'.format(\n len(self._return_annotation._fields), self._func.__name__,\n len(func_output)))\n for i in range(len(self._return_annotation._fields)):\n field = self._return_annotation._fields[i]\n field_type = self._return_annotation.__annotations__[field]\n if type(func_output) == tuple:\n field_value = func_output[i]\n else:\n field_value = getattr(func_output, field)\n self._handle_single_return_value(field, field_type, field_value)\n else:\n raise RuntimeError(\n 'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'\n ' subclass of `Artifact`, or a NamedTuple collection of these types.'\n .format(self._return_annotation))\n\n import os\n os.makedirs(\n os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)\n with open(self._input['outputs']['outputFile'], 'w') as f:\n f.write(json.dumps(self._executor_output))\n\n def _maybe_strip_path_suffix(self, name) -> str:\n if name.endswith('_path'):\n name = name[0:-len('_path')]\n if name.endswith('_file'):\n name = name[0:-len('_file')]\n return name\n\n def execute(self):\n annotations = inspect.getfullargspec(self._func).annotations\n\n # Function arguments.\n func_kwargs = {}\n\n for k, v in annotations.items():\n if k == 'return':\n continue\n\n if self._is_parameter(v):\n func_kwargs[k] = self._get_input_parameter_value(k, v)\n\n if is_artifact_annotation(v):\n if is_input_artifact(v):\n func_kwargs[k] = self._get_input_artifact(k)\n if is_output_artifact(v):\n func_kwargs[k] = self._get_output_artifact(k)\n\n elif isinstance(v, OutputPath):\n if self._is_parameter(v.type):\n func_kwargs[k] = self._get_output_parameter_path(k)\n else:\n func_kwargs[k] = self._get_output_artifact_path(k)\n elif isinstance(v, InputPath):\n func_kwargs[k] = self._get_input_artifact_path(k)\n\n result = self._func(**func_kwargs)\n self._write_executor_output(result)\n\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # model is an instance of Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n\ndef executor_main():\n import argparse\n import json\n\n parser = argparse.ArgumentParser(description='Process some integers.')\n parser.add_argument('--executor_input', type=str)\n parser.add_argument('--function_to_execute', type=str)\n\n args, _ = parser.parse_known_args()\n executor_input = json.loads(args.executor_input)\n function_to_execute = globals()[args.function_to_execute]\n\n executor = Executor(executor_input=executor_input,\n function_to_execute=function_to_execute)\n\n executor.execute()\n\n\nif __name__ == '__main__':\n executor_main()\n" ], - "image": "python:3.7" + "image": "python:3.9" } } } diff --git a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml index 886f6141c4..58b6df51e7 100644 --- a/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml +++ b/frontend/mock-backend/data/v2/pipeline/lightweight_python_functions_v2_pipeline_rev.yaml @@ -6,7 +6,7 @@ deploymentSpec: executors: exec-preprocess: container: - image: python:3.7 + image: python:3.9 args: - --executor_input - '{{$}}' @@ -105,7 +105,7 @@ deploymentSpec: \ Model artifact, which has a .metadata dictionary\n # to store arbitrary\ \ metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\ \n" - image: python:3.7 + image: python:3.9 components: comp-preprocess: inputDefinitions: diff --git a/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.json b/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.json index f2364e2af3..6f4f6f7a7e 100644 --- a/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.json +++ b/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.json @@ -929,7 +929,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef args_generator_op() -> list:\n return [\n {\n 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n },\n ]\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-args-generator-op-2": { @@ -949,7 +949,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef args_generator_op() -> list:\n return [\n {\n 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n },\n ]\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-flip-coin-op": { @@ -969,7 +969,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef flip_coin_op() -> str:\n \"\"\"Flip a coin and output heads or tails randomly.\"\"\"\n import random\n result = 'heads' if random.randint(0, 1) == 0 else 'tails'\n return result\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-struct": { @@ -989,7 +989,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_struct(struct: dict):\n print(struct)\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text": { @@ -1009,7 +1009,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-2": { @@ -1029,7 +1029,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-3": { @@ -1049,7 +1049,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-4": { @@ -1069,7 +1069,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-5": { @@ -1089,7 +1089,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-6": { @@ -1109,7 +1109,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-7": { @@ -1129,7 +1129,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-8": { @@ -1149,7 +1149,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-print-text-9": { @@ -1169,7 +1169,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg: {msg}, msg2: {msg2}')\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } } } diff --git a/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.yaml b/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.yaml index 1d79405a07..f12bc94a80 100644 --- a/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.yaml +++ b/frontend/mock-backend/data/v2/pipeline/pipeline_with_loops_and_conditions.yaml @@ -588,7 +588,7 @@ deploymentSpec: \ 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n \ \ {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n \ \ },\n ]\n\n" - image: python:3.7 + image: python:3.9 exec-args-generator-op-2: container: args: @@ -617,7 +617,7 @@ deploymentSpec: \ 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n \ \ {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n \ \ },\n ]\n\n" - image: python:3.7 + image: python:3.9 exec-flip-coin-op: container: args: @@ -645,7 +645,7 @@ deploymentSpec: \ *\n\ndef flip_coin_op() -> str:\n \"\"\"Flip a coin and output heads\ \ or tails randomly.\"\"\"\n import random\n result = 'heads' if random.randint(0,\ \ 1) == 0 else 'tails'\n return result\n\n" - image: python:3.7 + image: python:3.9 exec-print-struct: container: args: @@ -671,7 +671,7 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_struct(struct: dict):\n print(struct)\n\n" - image: python:3.7 + image: python:3.9 exec-print-text: container: args: @@ -698,7 +698,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-2: container: args: @@ -725,7 +725,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-3: container: args: @@ -752,7 +752,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-4: container: args: @@ -779,7 +779,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-5: container: args: @@ -806,7 +806,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-6: container: args: @@ -833,7 +833,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-7: container: args: @@ -860,7 +860,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-8: container: args: @@ -887,7 +887,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-9: container: args: @@ -914,7 +914,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 pipelineInfo: name: pipeline-with-loops-and-conditions-multi-layers root: diff --git a/frontend/mock-backend/data/v2/pipeline/protobuf_value_params_v2.json b/frontend/mock-backend/data/v2/pipeline/protobuf_value_params_v2.json index f75a60e1bc..4bbf2bcb0d 100644 --- a/frontend/mock-backend/data/v2/pipeline/protobuf_value_params_v2.json +++ b/frontend/mock-backend/data/v2/pipeline/protobuf_value_params_v2.json @@ -48,7 +48,7 @@ "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_params(\n int_param: int = 1234, \n double_param: float = 56.78, \n string_param: str = 'lorem ipsum', \n bool_param: bool = True, \n list_string_param: List[str] = ['lorem', 'ipsum'], \n list_int_param: List[int] = [123, 456, 789], \n struct_param: Dict[str, int] = { 'key_1': 12345, 'key_2': 6789 }):\n print(\"int_param: \", int_param)\n print(\"double_param: \", double_param)\n print(\"string_param: \", string_param)\n print(\"bool_param: \", bool_param)\n print(\"list_string_param: \", list_string_param)\n print(\"list_int_param: \", list_int_param)\n print(\"struct_param: \", struct_param)\n\n" ], - "image": "python:3.7" + "image": "python:3.9" } } } diff --git a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.json b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.json index 15e5f4d424..00e235989e 100644 --- a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.json +++ b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.json @@ -327,7 +327,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef convert_csv_to_apache_parquet(\n data_path,\n output_data_path,\n):\n '''Converts CSV table to Apache Parquet.\n\n [Apache Parquet](https://parquet.apache.org/)\n\n Annotations:\n author: Alexey Volkov \n '''\n from pyarrow import csv, parquet\n\n table = csv.read_csv(data_path)\n parquet.write_table(table, output_data_path)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Convert csv to apache parquet', description='Converts CSV table to Apache Parquet.\\n\\n [Apache Parquet](https://parquet.apache.org/)\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--output-data\", dest=\"output_data_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n_output_files = _parsed_args.pop(\"_output_paths\", [])\n\n_outputs = convert_csv_to_apache_parquet(**_parsed_args)\n\n_output_serializers = [\n\n]\n\nimport os\nfor idx, output_file in enumerate(_output_files):\n try:\n os.makedirs(os.path.dirname(output_file))\n except OSError:\n pass\n with open(output_file, 'w') as f:\n f.write(_output_serializers[idx](_outputs[idx]))\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-predict": { @@ -351,7 +351,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_predict(\n data_path, # Also supports LibSVM\n model_path,\n predictions_path,\n label_column = None,\n):\n '''Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in CSV format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column: Column containing the label data.\n\n Annotations:\n author: Alexey Volkov \n '''\n from pathlib import Path\n\n import numpy\n import pandas\n import xgboost\n\n df = pandas.read_csv(\n data_path,\n )\n\n if label_column is not None:\n df = df.drop(columns=[df.columns[label_column]])\n\n testing_data = xgboost.DMatrix(\n data=df,\n )\n\n model = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(testing_data)\n\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\\n\\n Args:\\n data_path: Path for the feature data in CSV format.\\n model_path: Path for the trained model in binary XGBoost format.\\n predictions_path: Output path for the predictions.\\n label_column: Column containing the label data.\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_predict(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-predict-2": { @@ -375,7 +375,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n predictions_path,\n label_column_name = None,\n):\n '''Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in Apache Parquet format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\n\n Annotations:\n author: Alexey Volkov \n '''\n from pathlib import Path\n\n import numpy\n import pandas\n import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n if label_column_name:\n df = df.drop(columns=[label_column_name])\n\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n # Training\n model = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\\n\\n Args:\\n data_path: Path for the feature data in Apache Parquet format.\\n model_path: Path for the trained model in binary XGBoost format.\\n predictions_path: Output path for the predictions.\\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column-name\", dest=\"label_column_name\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_predict(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-predict-3": { @@ -399,7 +399,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_predict(\n data_path,\n model_path,\n predictions_path,\n label_column_name = None,\n):\n '''Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in Apache Parquet format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\n\n Annotations:\n author: Alexey Volkov \n '''\n from pathlib import Path\n\n import numpy\n import pandas\n import xgboost\n\n # Loading data\n df = pandas.read_parquet(data_path)\n if label_column_name:\n df = df.drop(columns=[label_column_name])\n\n evaluation_data = xgboost.DMatrix(\n data=df,\n )\n\n # Training\n model = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(evaluation_data)\n\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\\n\\n Args:\\n data_path: Path for the feature data in Apache Parquet format.\\n model_path: Path for the trained model in binary XGBoost format.\\n predictions_path: Output path for the predictions.\\n label_column_name: Optional. Name of the column containing the label data that is excluded during the prediction.\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column-name\", dest=\"label_column_name\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_predict(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-predict-4": { @@ -423,7 +423,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_predict(\n data_path, # Also supports LibSVM\n model_path,\n predictions_path,\n label_column = None,\n):\n '''Make predictions using a trained XGBoost model.\n\n Args:\n data_path: Path for the feature data in CSV format.\n model_path: Path for the trained model in binary XGBoost format.\n predictions_path: Output path for the predictions.\n label_column: Column containing the label data.\n\n Annotations:\n author: Alexey Volkov \n '''\n from pathlib import Path\n\n import numpy\n import pandas\n import xgboost\n\n df = pandas.read_csv(\n data_path,\n )\n\n if label_column is not None:\n df = df.drop(columns=[df.columns[label_column]])\n\n testing_data = xgboost.DMatrix(\n data=df,\n )\n\n model = xgboost.Booster(model_file=model_path)\n\n predictions = model.predict(testing_data)\n\n Path(predictions_path).parent.mkdir(parents=True, exist_ok=True)\n numpy.savetxt(predictions_path, predictions)\n\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost predict', description='Make predictions using a trained XGBoost model.\\n\\n Args:\\n data_path: Path for the feature data in CSV format.\\n model_path: Path for the trained model in binary XGBoost format.\\n predictions_path: Output path for the predictions.\\n label_column: Column containing the label data.\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--data\", dest=\"data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_predict(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-train": { @@ -459,7 +459,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_train(\n training_data_path, # Also supports LibSVM\n model_path,\n model_config_path,\n starting_model_path = None,\n\n label_column = 0,\n num_iterations = 10,\n booster_params = None,\n\n # Booster parameters\n objective = 'reg:squarederror',\n booster = 'gbtree',\n learning_rate = 0.3,\n min_split_loss = 0,\n max_depth = 6,\n):\n '''Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in CSV format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column: Column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n \"reg:squarederror\" - Regression with squared loss (default).\n \"reg:logistic\" - Logistic regression.\n \"binary:logistic\" - Logistic regression for binary classification, output probability.\n \"binary:logitraw\" - Logistic regression for binary classification, output score before logistic transformation\n \"rank:pairwise\" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n \"rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov \n '''\n import pandas\n import xgboost\n\n df = pandas.read_csv(\n training_data_path,\n )\n\n training_data = xgboost.DMatrix(\n data=df.drop(columns=[df.columns[label_column]]),\n label=df[df.columns[label_column]],\n )\n\n booster_params = booster_params or {}\n booster_params.setdefault('objective', objective)\n booster_params.setdefault('booster', booster)\n booster_params.setdefault('learning_rate', learning_rate)\n booster_params.setdefault('min_split_loss', min_split_loss)\n booster_params.setdefault('max_depth', max_depth)\n\n starting_model = None\n if starting_model_path:\n starting_model = xgboost.Booster(model_file=starting_model_path)\n\n model = xgboost.train(\n params=booster_params,\n dtrain=training_data,\n num_boost_round=num_iterations,\n xgb_model=starting_model\n )\n\n # Saving the model in binary format\n model.save_model(model_path)\n\n model_config_str = model.save_config()\n with open(model_config_path, 'w') as model_config_file:\n model_config_file.write(model_config_str)\n\nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\\n\\n Args:\\n training_data_path: Path for the training data in CSV format.\\n model_path: Output path for the trained model in binary XGBoost format.\\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\\n starting_model_path: Path for the existing trained model to start from.\\n label_column: Column containing the label data.\\n num_boost_rounds: Number of boosting iterations.\\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\n objective: The learning task and the corresponding learning objective.\\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\n The most common values are:\\n \"reg:squarederror\" - Regression with squared loss (default).\\n \"reg:logistic\" - Logistic regression.\\n \"binary:logistic\" - Logistic regression for binary classification, output probability.\\n \"binary:logitraw\" - Logistic regression for binary classification, output score before logistic transformation\\n \"rank:pairwise\" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\\n \"rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--training-data\", dest=\"training_data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--starting-model\", dest=\"starting_model_path\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column\", dest=\"label_column\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--num-iterations\", dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\", dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\", dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\", dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\", dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_train(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } }, "exec-xgboost-train-2": { @@ -495,7 +495,7 @@ "-c", "def _make_parent_dirs_and_return_path(file_path: str):\n import os\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n return file_path\n\ndef xgboost_train(\n training_data_path,\n model_path,\n model_config_path,\n label_column_name,\n\n starting_model_path = None,\n\n num_iterations = 10,\n booster_params = None,\n\n # Booster parameters\n objective = 'reg:squarederror',\n booster = 'gbtree',\n learning_rate = 0.3,\n min_split_loss = 0,\n max_depth = 6,\n):\n '''Train an XGBoost model.\n\n Args:\n training_data_path: Path for the training data in Apache Parquet format.\n model_path: Output path for the trained model in binary XGBoost format.\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\n starting_model_path: Path for the existing trained model to start from.\n label_column_name: Name of the column containing the label data.\n num_boost_rounds: Number of boosting iterations.\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\n objective: The learning task and the corresponding learning objective.\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\n The most common values are:\n \"reg:squarederror\" - Regression with squared loss (default).\n \"reg:logistic\" - Logistic regression.\n \"binary:logistic\" - Logistic regression for binary classification, output probability.\n \"binary:logitraw\" - Logistic regression for binary classification, output score before logistic transformation\n \"rank:pairwise\" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\n \"rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\n\n Annotations:\n author: Alexey Volkov \n '''\n import pandas\n import xgboost\n\n # Loading data\n df = pandas.read_parquet(training_data_path)\n training_data = xgboost.DMatrix(\n data=df.drop(columns=[label_column_name]),\n label=df[[label_column_name]],\n )\n # Training\n booster_params = booster_params or {}\n booster_params.setdefault('objective', objective)\n booster_params.setdefault('booster', booster)\n booster_params.setdefault('learning_rate', learning_rate)\n booster_params.setdefault('min_split_loss', min_split_loss)\n booster_params.setdefault('max_depth', max_depth)\n\n starting_model = None\n if starting_model_path:\n starting_model = xgboost.Booster(model_file=starting_model_path)\n\n model = xgboost.train(\n params=booster_params,\n dtrain=training_data,\n num_boost_round=num_iterations,\n xgb_model=starting_model\n )\n\n # Saving the model in binary format\n model.save_model(model_path)\n\n model_config_str = model.save_config()\n with open(model_config_path, 'w') as model_config_file:\n model_config_file.write(model_config_str)\n\nimport json\nimport argparse\n_parser = argparse.ArgumentParser(prog='Xgboost train', description='Train an XGBoost model.\\n\\n Args:\\n training_data_path: Path for the training data in Apache Parquet format.\\n model_path: Output path for the trained model in binary XGBoost format.\\n model_config_path: Output path for the internal parameter configuration of Booster as a JSON string.\\n starting_model_path: Path for the existing trained model to start from.\\n label_column_name: Name of the column containing the label data.\\n num_boost_rounds: Number of boosting iterations.\\n booster_params: Parameters for the booster. See https://xgboost.readthedocs.io/en/latest/parameter.html\\n objective: The learning task and the corresponding learning objective.\\n See https://xgboost.readthedocs.io/en/latest/parameter.html#learning-task-parameters\\n The most common values are:\\n \"reg:squarederror\" - Regression with squared loss (default).\\n \"reg:logistic\" - Logistic regression.\\n \"binary:logistic\" - Logistic regression for binary classification, output probability.\\n \"binary:logitraw\" - Logistic regression for binary classification, output score before logistic transformation\\n \"rank:pairwise\" - Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized\\n \"rank:ndcg\" - Use LambdaMART to perform list-wise ranking where Normalized Discounted Cumulative Gain (NDCG) is maximized\\n\\n Annotations:\\n author: Alexey Volkov ')\n_parser.add_argument(\"--training-data\", dest=\"training_data_path\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--label-column-name\", dest=\"label_column_name\", type=str, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--starting-model\", dest=\"starting_model_path\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--num-iterations\", dest=\"num_iterations\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--booster-params\", dest=\"booster_params\", type=json.loads, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--objective\", dest=\"objective\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--booster\", dest=\"booster\", type=str, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--learning-rate\", dest=\"learning_rate\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--min-split-loss\", dest=\"min_split_loss\", type=float, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--max-depth\", dest=\"max_depth\", type=int, required=False, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model\", dest=\"model_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parser.add_argument(\"--model-config\", dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\n_outputs = xgboost_train(**_parsed_args)\n" ], - "image": "python:3.7" + "image": "python:3.9" } } } diff --git a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml index 2c3dbf0be6..c5612c190d 100644 --- a/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml +++ b/frontend/mock-backend/data/v2/pipeline/xgboost_sample_pipeline.yaml @@ -253,7 +253,7 @@ deploymentSpec: \ try:\n os.makedirs(os.path.dirname(output_file))\n except\ \ OSError:\n pass\n with open(output_file, 'w') as f:\n \ \ f.write(_output_serializers[idx](_outputs[idx]))\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict: container: args: @@ -304,7 +304,7 @@ deploymentSpec: _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\ \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-2: container: args: @@ -358,7 +358,7 @@ deploymentSpec: predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-3: container: args: @@ -412,7 +412,7 @@ deploymentSpec: predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-4: container: args: @@ -463,7 +463,7 @@ deploymentSpec: _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\ \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-train: container: args: @@ -571,7 +571,7 @@ deploymentSpec: , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_train(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-train-2: container: args: @@ -679,7 +679,7 @@ deploymentSpec: , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_train(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 pipelineInfo: name: xgboost-sample-pipeline root: diff --git a/frontend/src/components/tabs/StaticNodeDetailsV2.test.tsx b/frontend/src/components/tabs/StaticNodeDetailsV2.test.tsx index 4bc5b84b11..d0948760c7 100644 --- a/frontend/src/components/tabs/StaticNodeDetailsV2.test.tsx +++ b/frontend/src/components/tabs/StaticNodeDetailsV2.test.tsx @@ -76,12 +76,12 @@ describe('StaticNodeDetailsV2', () => { expect(screen.getAllByText('STRING').length).toEqual(2); screen.getByText('Image'); - screen.getByText('python:3.7'); + screen.getByText('python:3.9'); screen.getByText('Command'); expect(screen.getAllByText('sh').length).toEqual(2); // The yaml file we used in this test has command as follow: - /* + /* sh -c @@ -131,7 +131,7 @@ describe('StaticNodeDetailsV2', () => { expect(screen.getAllByText('STRING').length).toEqual(1); screen.getByText('Image'); - screen.getByText('python:3.7'); + screen.getByText('python:3.9'); screen.getByText('Command'); expect(screen.getAllByText('sh').length).toEqual(2); diff --git a/frontend/src/data/test/create_mount_delete_dynamic_pvc.yaml b/frontend/src/data/test/create_mount_delete_dynamic_pvc.yaml index f92719a2f5..fc07ad77ad 100644 --- a/frontend/src/data/test/create_mount_delete_dynamic_pvc.yaml +++ b/frontend/src/data/test/create_mount_delete_dynamic_pvc.yaml @@ -76,7 +76,7 @@ deploymentSpec: \ *\n\ndef consumer() -> str:\n with open('/data/file.txt', 'r') as file:\n\ \ content = file.read()\n print(content)\n return content\n\ \n" - image: python:3.7 + image: python:3.9 exec-createpvc: container: image: argostub/createpvc @@ -111,7 +111,7 @@ deploymentSpec: \ file.write('Hello world')\n with open('/data/file.txt', 'r')\ \ as file:\n content = file.read()\n print(content)\n return\ \ content\n\n" - image: python:3.7 + image: python:3.9 pipelineInfo: name: my-pipeline root: diff --git a/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml b/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml index 5e62958d6e..c32ada3c00 100644 --- a/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml +++ b/frontend/src/data/test/lightweight_python_functions_v2_pipeline_rev.yaml @@ -6,7 +6,7 @@ deploymentSpec: executors: exec-preprocess: container: - image: python:3.7 + image: python:3.9 args: - --executor_input - '{{$}}' @@ -105,7 +105,7 @@ deploymentSpec: \ Model artifact, which has a .metadata dictionary\n # to store arbitrary\ \ metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\ \n" - image: python:3.7 + image: python:3.9 components: comp-preprocess: inputDefinitions: diff --git a/frontend/src/data/test/pipeline_with_loops_and_conditions.yaml b/frontend/src/data/test/pipeline_with_loops_and_conditions.yaml index 1d79405a07..f12bc94a80 100644 --- a/frontend/src/data/test/pipeline_with_loops_and_conditions.yaml +++ b/frontend/src/data/test/pipeline_with_loops_and_conditions.yaml @@ -588,7 +588,7 @@ deploymentSpec: \ 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n \ \ {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n \ \ },\n ]\n\n" - image: python:3.7 + image: python:3.9 exec-args-generator-op-2: container: args: @@ -617,7 +617,7 @@ deploymentSpec: \ 'A_a': '1',\n 'B_b': ['2', '20'],\n },\n \ \ {\n 'A_a': '10',\n 'B_b': ['22', '222'],\n \ \ },\n ]\n\n" - image: python:3.7 + image: python:3.9 exec-flip-coin-op: container: args: @@ -645,7 +645,7 @@ deploymentSpec: \ *\n\ndef flip_coin_op() -> str:\n \"\"\"Flip a coin and output heads\ \ or tails randomly.\"\"\"\n import random\n result = 'heads' if random.randint(0,\ \ 1) == 0 else 'tails'\n return result\n\n" - image: python:3.7 + image: python:3.9 exec-print-struct: container: args: @@ -671,7 +671,7 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_struct(struct: dict):\n print(struct)\n\n" - image: python:3.7 + image: python:3.9 exec-print-text: container: args: @@ -698,7 +698,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-2: container: args: @@ -725,7 +725,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-3: container: args: @@ -752,7 +752,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-4: container: args: @@ -779,7 +779,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-5: container: args: @@ -806,7 +806,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-6: container: args: @@ -833,7 +833,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-7: container: args: @@ -860,7 +860,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-8: container: args: @@ -887,7 +887,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 exec-print-text-9: container: args: @@ -914,7 +914,7 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef print_text(msg: str, msg2: Optional[str] = None):\n print(f'msg:\ \ {msg}, msg2: {msg2}')\n\n" - image: python:3.7 + image: python:3.9 pipelineInfo: name: pipeline-with-loops-and-conditions-multi-layers root: diff --git a/frontend/src/data/test/xgboost_sample_pipeline.yaml b/frontend/src/data/test/xgboost_sample_pipeline.yaml index bd9c668a10..e7a5410417 100644 --- a/frontend/src/data/test/xgboost_sample_pipeline.yaml +++ b/frontend/src/data/test/xgboost_sample_pipeline.yaml @@ -253,7 +253,7 @@ deploymentSpec: \ try:\n os.makedirs(os.path.dirname(output_file))\n except\ \ OSError:\n pass\n with open(output_file, 'w') as f:\n \ \ f.write(_output_serializers[idx](_outputs[idx]))\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict: container: args: @@ -304,7 +304,7 @@ deploymentSpec: _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\ \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-2: container: args: @@ -358,7 +358,7 @@ deploymentSpec: predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-3: container: args: @@ -412,7 +412,7 @@ deploymentSpec: predictions_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-predict-4: container: args: @@ -463,7 +463,7 @@ deploymentSpec: _parser.add_argument(\"--predictions\", dest=\"predictions_path\", type=_make_parent_dirs_and_return_path,\ \ required=True, default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_predict(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-train: container: args: @@ -571,7 +571,7 @@ deploymentSpec: , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_train(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 exec-xgboost-train-2: container: args: @@ -679,7 +679,7 @@ deploymentSpec: , dest=\"model_config_path\", type=_make_parent_dirs_and_return_path, required=True,\ \ default=argparse.SUPPRESS)\n_parsed_args = vars(_parser.parse_args())\n\ \n_outputs = xgboost_train(**_parsed_args)\n" - image: python:3.7 + image: python:3.9 pipelineInfo: name: xgboost-sample-pipeline root: diff --git a/frontend/src/lib/v2/WorkflowUtils.test.ts b/frontend/src/lib/v2/WorkflowUtils.test.ts index ee50388f34..d3d5f38263 100644 --- a/frontend/src/lib/v2/WorkflowUtils.test.ts +++ b/frontend/src/lib/v2/WorkflowUtils.test.ts @@ -143,7 +143,7 @@ PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-scr "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # An input parameter of type dict.\n input_dict_parameter: Dict[str, int],\n # An input parameter of type list.\n input_list_parameter: List[str],\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step.\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(\n str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps(input_dict_parameter))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(input_list_parameter))\n\n", ], env: [], - image: 'python:3.7', + image: 'python:3.9', lifecycle: undefined, resources: undefined, }); @@ -174,7 +174,7 @@ PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-scr "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef producer() -> str:\n with open('/data/file.txt', 'w') as file:\n file.write('Hello world')\n with open('/data/file.txt', 'r') as file:\n content = file.read()\n print(content)\n return content\n\n", ], env: [], - image: 'python:3.7', + image: 'python:3.9', lifecycle: undefined, resources: undefined, }); From a69000948650324559942047892d6f2399d25138 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Fri, 20 Oct 2023 10:52:38 -0700 Subject: [PATCH 232/253] chore(components): release GCPC 2.5.0 PiperOrigin-RevId: 575263464 --- components/google-cloud/Dockerfile | 2 +- components/google-cloud/RELEASE.md | 3 +++ components/google-cloud/docs/source/versions.json | 5 +++++ .../google-cloud/google_cloud_pipeline_components/version.py | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/Dockerfile b/components/google-cloud/Dockerfile index c9297ac28e..c4fba596e8 100644 --- a/components/google-cloud/Dockerfile +++ b/components/google-cloud/Dockerfile @@ -44,7 +44,7 @@ RUN pip3 install -U "fsspec>=0.7.4" "gcsfs>=0.6.0" "pandas<=1.3.5" "scikit-learn RUN pip3 install -U google-cloud-notebooks # Install main package -RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.4.1#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" +RUN pip3 install "git+https://github.com/kubeflow/pipelines.git@google-cloud-pipeline-components-2.5.0#egg=google-cloud-pipeline-components&subdirectory=components/google-cloud" # Note that components can override the container entry ponint. ENTRYPOINT ["python3","-m","google_cloud_pipeline_components.container.v1.aiplatform.remote_runner"] diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index ea87c0f659..ea01a911a3 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -1,4 +1,6 @@ ## Upcoming release + +## Release 2.5.0 * Upload tensorboard metrics from `preview.llm.rlhf_pipeline` if a `tensorboard_resource_id` is provided at runtime. * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`. * Add `preview.automl.vision` and `DataConverterJobOp`. @@ -8,6 +10,7 @@ * Release `DataflowFlexTemplateJobOp` to GA namespace (`v1.dataflow.DataflowFlexTemplateJobOp`). * Make `model_checkpoint` optional for `preview.llm.infer_pipeline`. If not provided, the base model associated with the `large_model_reference` will be used. * Bump `apache_beam[gcp]` version in GCPC container image from `<2.34.0` to `==2.50.0` for compatibility with `google-cloud-aiplatform`, which depends on `shapely<3.0.0dev`. Note: upgrades to `google-cloud-pipeline-components`>=2.5.0 and later may require using a Dataflow worker image with `apache_beam==2.50.0`. +* Apply latest GCPC image vulnerability resolutions (base OS and software updates) ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/docs/source/versions.json b/components/google-cloud/docs/source/versions.json index 7e03867497..3be6717cf2 100644 --- a/components/google-cloud/docs/source/versions.json +++ b/components/google-cloud/docs/source/versions.json @@ -1,4 +1,9 @@ [ + { + "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.5.0", + "title": "2.5.0", + "aliases": [] + }, { "version": "https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.4.1", "title": "2.4.1", diff --git a/components/google-cloud/google_cloud_pipeline_components/version.py b/components/google-cloud/google_cloud_pipeline_components/version.py index 737994ce38..ee5ff1441f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/version.py +++ b/components/google-cloud/google_cloud_pipeline_components/version.py @@ -13,4 +13,4 @@ # limitations under the License. """Google Cloud Pipeline Components version.""" -__version__ = "2.4.1" +__version__ = "2.5.0" From d53dddab1c8a042e58e06ff6eb38be82fefddb0a Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 20 Oct 2023 13:19:04 -0700 Subject: [PATCH 233/253] feat(components): add support for customizing model_parameters in LLM eval text generation and LLM eval text classification pipelines PiperOrigin-RevId: 575302065 --- components/google-cloud/RELEASE.md | 3 +++ .../evaluation_llm_classification_pipeline.py | 7 +++++-- .../evaluation_llm_text_generation_pipeline.py | 12 +++++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md index ea01a911a3..6fb3b383f0 100644 --- a/components/google-cloud/RELEASE.md +++ b/components/google-cloud/RELEASE.md @@ -11,6 +11,9 @@ * Make `model_checkpoint` optional for `preview.llm.infer_pipeline`. If not provided, the base model associated with the `large_model_reference` will be used. * Bump `apache_beam[gcp]` version in GCPC container image from `<2.34.0` to `==2.50.0` for compatibility with `google-cloud-aiplatform`, which depends on `shapely<3.0.0dev`. Note: upgrades to `google-cloud-pipeline-components`>=2.5.0 and later may require using a Dataflow worker image with `apache_beam==2.50.0`. * Apply latest GCPC image vulnerability resolutions (base OS and software updates) +* Add support for customizing model_parameters (maxOutputTokens, topK, topP, and + temperature) in LLM eval text generation and LLM eval text classification + pipelines. ## Release 2.4.1 * Disable caching for LLM pipeline tasks that store temporary artifacts. diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py index d32ebde7e1..e013db0f1e 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_classification_pipeline.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Vertex LLM standalone Evaluation for text classification task.""" +"""Vertex Gen AI Evaluation for text classification task.""" -from typing import List, NamedTuple +from typing import Dict, List, NamedTuple from google_cloud_pipeline_components._implementation.model_evaluation import LLMEvaluationClassificationPredictionsPostprocessorOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp @@ -38,6 +38,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default evaluation_class_labels: List[str] = [], batch_predict_instances_format: str = 'jsonl', batch_predict_predictions_format: str = 'jsonl', + batch_predict_model_parameters: Dict[str, str] = {}, machine_type: str = 'e2-highmem-16', service_account: str = '', network: str = '', @@ -68,6 +69,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default evaluation_class_labels: The JSON array of class names for the target_field, in the same order they appear in the batch predictions input file. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_model_parameters: A map of parameters that govern the predictions. Some acceptable parameters include: maxOutputTokens, topK, topP, and temperature. machine_type: The machine type of the custom jobs in this pipeline. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name, as in `myVPC`. To specify this field, you must have already configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. @@ -109,6 +111,7 @@ def evaluation_llm_classification_pipeline( # pylint: disable=dangerous-default instances_format=batch_predict_instances_format, predictions_format=batch_predict_predictions_format, gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + model_parameters=batch_predict_model_parameters, encryption_spec_key_name=encryption_spec_key_name, ) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py index ee8f5ceaf9..cae0f06a13 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/model_evaluation/evaluation_llm_text_generation_pipeline.py @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Vertex LLM standalone Evaluation for text generation task.""" +"""Vertex Gen AI Evaluation for Text Generation/QA/Summarization tasks.""" -from typing import List, NamedTuple +from typing import Dict, List, NamedTuple from google_cloud_pipeline_components._implementation.model_evaluation import LLMEvaluationTextGenerationOp from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp from google_cloud_pipeline_components.types.artifact_types import VertexModel from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp from kfp import dsl -from kfp.dsl import Metrics _PIPELINE_NAME = 'evaluation-llm-text-generation-pipeline' @@ -31,6 +30,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul location: str, batch_predict_gcs_source_uris: List[str], batch_predict_gcs_destination_output_uri: str, + batch_predict_model_parameters: Dict[str, str] = {}, model_name: str = 'publishers/google/models/text-bison@001', evaluation_task: str = 'text-generation', batch_predict_instances_format: str = 'jsonl', @@ -41,7 +41,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul encryption_spec_key_name: str = '', evaluation_display_name: str = 'evaluation-llm-text-generation-pipeline-{{$.pipeline_job_uuid}}', ) -> NamedTuple( - 'outputs', evaluation_metrics=Metrics, evaluation_resource_name=str + 'outputs', evaluation_metrics=dsl.Metrics, evaluation_resource_name=str ): # fmt: off """LLM Text Generation Evaluation pipeline. @@ -58,6 +58,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul evaluation_task: The task that the large language model will be evaluated on. The evaluation component computes a set of metrics relevant to that specific task. Currently supported tasks are: `summarization`, `question-answering`, `text-generation`. batch_predict_instances_format: The format in which instances are given, must be one of the Model's supportedInputStorageFormats. Only "jsonl" is currently supported. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. batch_predict_predictions_format: The format in which Vertex AI gives the predictions. Must be one of the Model's supportedOutputStorageFormats. Only "jsonl" is currently supported. For more details about this output config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig. + batch_predict_model_parameters: A map of parameters that govern the predictions. Some acceptable parameters include: maxOutputTokens, topK, topP, and temperature. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource service_account: Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name, as in `myVPC`. To specify this field, you must have already configured VPC Network Peering for Vertex AI (https://cloud.google.com/vertex-ai/docs/general/vpc-peering). If left unspecified, the job is not peered with any network. @@ -71,7 +72,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul # fmt: on outputs = NamedTuple( 'outputs', - evaluation_metrics=Metrics, + evaluation_metrics=dsl.Metrics, evaluation_resource_name=str, ) @@ -93,6 +94,7 @@ def evaluation_llm_text_generation_pipeline( # pylint: disable=dangerous-defaul instances_format=batch_predict_instances_format, predictions_format=batch_predict_predictions_format, gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri, + model_parameters=batch_predict_model_parameters, encryption_spec_key_name=encryption_spec_key_name, ) From 3eafa176265d997617628805b117d02ab8208173 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 21 Oct 2023 06:31:04 +0000 Subject: [PATCH 234/253] chore(deps-dev): bump @babel/traverse from 7.9.5 to 7.23.2 in /frontend/server (#10106) Bumps [@babel/traverse](https://github.com/babel/babel/tree/HEAD/packages/babel-traverse) from 7.9.5 to 7.23.2. - [Release notes](https://github.com/babel/babel/releases) - [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md) - [Commits](https://github.com/babel/babel/commits/v7.23.2/packages/babel-traverse) --- updated-dependencies: - dependency-name: "@babel/traverse" dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- frontend/server/package-lock.json | 210 +++++++++++++++++++++++++----- 1 file changed, 178 insertions(+), 32 deletions(-) diff --git a/frontend/server/package-lock.json b/frontend/server/package-lock.json index c5494c8ccf..a28bda218e 100644 --- a/frontend/server/package-lock.json +++ b/frontend/server/package-lock.json @@ -1381,24 +1381,38 @@ } } }, - "@babel/helper-function-name": { - "version": "7.9.5", - "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.9.5.tgz", - "integrity": "sha512-JVcQZeXM59Cd1qanDUxv9fgJpt3NeKUaqBqUEvfmQ+BCOKq2xUgaWZW2hr0dkbyJgezYuplEoh5knmrnS68efw==", - "dev": true, - "requires": { - "@babel/helper-get-function-arity": "^7.8.3", - "@babel/template": "^7.8.3", - "@babel/types": "^7.9.5" - } + "@babel/helper-environment-visitor": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", + "integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", + "dev": true }, - "@babel/helper-get-function-arity": { - "version": "7.8.3", - "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.8.3.tgz", - "integrity": "sha512-FVDR+Gd9iLjUMY1fzE2SR0IuaJToR4RkCDARVfsBBPSP53GEqSFjD8gNyxg246VUyc/ALRxFaAK8rVG7UT7xRA==", + "@babel/helper-hoist-variables": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", + "integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", "dev": true, "requires": { - "@babel/types": "^7.8.3" + "@babel/types": "^7.22.5" + }, + "dependencies": { + "@babel/helper-validator-identifier": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "dev": true + }, + "@babel/types": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.0.tgz", + "integrity": "sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==", + "dev": true, + "requires": { + "@babel/helper-string-parser": "^7.22.5", + "@babel/helper-validator-identifier": "^7.22.20", + "to-fast-properties": "^2.0.0" + } + } } }, "@babel/helper-member-expression-to-functions": { @@ -1480,6 +1494,12 @@ "@babel/types": "^7.8.3" } }, + "@babel/helper-string-parser": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.22.5.tgz", + "integrity": "sha512-mM4COjgZox8U+JcXQwPijIZLElkgEpO5rsERVDJTc2qfCDfERyob6k5WegS14SX18IIjv+XD+GrqNumY5JRCDw==", + "dev": true + }, "@babel/helper-validator-identifier": { "version": "7.9.5", "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.9.5.tgz", @@ -1616,29 +1636,116 @@ } }, "@babel/traverse": { - "version": "7.9.5", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.9.5.tgz", - "integrity": "sha512-c4gH3jsvSuGUezlP6rzSJ6jf8fYjLj3hsMZRx/nX0h+fmHN0w+ekubRrHPqnMec0meycA2nwCsJ7dC8IPem2FQ==", - "dev": true, - "requires": { - "@babel/code-frame": "^7.8.3", - "@babel/generator": "^7.9.5", - "@babel/helper-function-name": "^7.9.5", - "@babel/helper-split-export-declaration": "^7.8.3", - "@babel/parser": "^7.9.0", - "@babel/types": "^7.9.5", + "version": "7.23.2", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.2.tgz", + "integrity": "sha512-azpe59SQ48qG6nu2CzcMLbxUudtN+dOM9kDbUqGq3HXUJRlo7i8fvPoxQUzYgLZ4cMVmuZgm8vvBpNeRhd6XSw==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.22.13", + "@babel/generator": "^7.23.0", + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-function-name": "^7.23.0", + "@babel/helper-hoist-variables": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/parser": "^7.23.0", + "@babel/types": "^7.23.0", "debug": "^4.1.0", - "globals": "^11.1.0", - "lodash": "^4.17.13" + "globals": "^11.1.0" }, "dependencies": { + "@babel/code-frame": { + "version": "7.22.13", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.22.13.tgz", + "integrity": "sha512-XktuhWlJ5g+3TJXc5upd9Ks1HutSArik6jf2eAjYFyIOf4ej3RN+184cZbzDvbPnuTJIUhPKKJE3cIsYTiAT3w==", + "dev": true, + "requires": { + "@babel/highlight": "^7.22.13", + "chalk": "^2.4.2" + } + }, + "@babel/generator": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.0.tgz", + "integrity": "sha512-lN85QRR+5IbYrMWM6Y4pE/noaQtg4pNiqeNGX60eqOfo6gtEj6uw/JagelB8vVztSd7R6M5n1+PQkDbHbBRU4g==", + "dev": true, + "requires": { + "@babel/types": "^7.23.0", + "@jridgewell/gen-mapping": "^0.3.2", + "@jridgewell/trace-mapping": "^0.3.17", + "jsesc": "^2.5.1" + } + }, + "@babel/helper-function-name": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", + "integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", + "dev": true, + "requires": { + "@babel/template": "^7.22.15", + "@babel/types": "^7.23.0" + } + }, + "@babel/helper-split-export-declaration": { + "version": "7.22.6", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", + "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", + "dev": true, + "requires": { + "@babel/types": "^7.22.5" + } + }, + "@babel/helper-validator-identifier": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "dev": true + }, + "@babel/highlight": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.22.20.tgz", + "integrity": "sha512-dkdMCN3py0+ksCgYmGG8jKeGA/8Tk+gJwSYYlFGxG5lmhfKNoAy004YpLxpS1W2J8m/EK2Ew+yOs9pVRwO89mg==", + "dev": true, + "requires": { + "@babel/helper-validator-identifier": "^7.22.20", + "chalk": "^2.4.2", + "js-tokens": "^4.0.0" + } + }, + "@babel/parser": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.0.tgz", + "integrity": "sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==", + "dev": true + }, + "@babel/template": { + "version": "7.22.15", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.22.15.tgz", + "integrity": "sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.22.13", + "@babel/parser": "^7.22.15", + "@babel/types": "^7.22.15" + } + }, + "@babel/types": { + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.0.tgz", + "integrity": "sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==", + "dev": true, + "requires": { + "@babel/helper-string-parser": "^7.22.5", + "@babel/helper-validator-identifier": "^7.22.20", + "to-fast-properties": "^2.0.0" + } + }, "debug": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", - "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", "dev": true, "requires": { - "ms": "^2.1.1" + "ms": "2.1.2" } } } @@ -2524,6 +2631,45 @@ "@types/yargs": "^13.0.0" } }, + "@jridgewell/gen-mapping": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz", + "integrity": "sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ==", + "dev": true, + "requires": { + "@jridgewell/set-array": "^1.0.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.9" + } + }, + "@jridgewell/resolve-uri": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz", + "integrity": "sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==", + "dev": true + }, + "@jridgewell/set-array": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz", + "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==", + "dev": true + }, + "@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==", + "dev": true + }, + "@jridgewell/trace-mapping": { + "version": "0.3.19", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.19.tgz", + "integrity": "sha512-kf37QtfW+Hwx/buWGMPcR60iF9ziHa6r/CZJIHbmcm4+0qrXiVdxegAH0F6yddEVQ7zdkjcGCgCzUu+BcbhQxw==", + "dev": true, + "requires": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, "@kubernetes/client-node": { "version": "0.8.2", "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-0.8.2.tgz", From 0cb22179348ab9da952381980487cb8d75914d52 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Mon, 23 Oct 2023 09:04:43 -0700 Subject: [PATCH 235/253] chore(components): update GCPC custom job docstrings PiperOrigin-RevId: 575835146 --- .../v1/custom_job/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index 4faba30c68..e83d374175 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -72,27 +72,27 @@ def create_custom_training_job_from_component( # fmt: off """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + This utility converts a [KFP component](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: component_spec: A KFP component. display_name: The name of the CustomJob. If not provided the component's name will be used instead. - replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) - machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). - accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. - nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). - base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). Returns: From 2054b7c45d4831c787115563c8be0048abcb9be1 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn <139039524+magdalenakuhn17@users.noreply.github.com> Date: Tue, 24 Oct 2023 00:06:06 +0200 Subject: [PATCH 236/253] feat(components) Extend kserve component (#10136) * add runtime version, resource requests and resource limits * adjust kservedeployer * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li * Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li --------- Co-authored-by: Tommy Li --- components/kserve/README.md | 4 +- components/kserve/component.yaml | 40 +++++++++++-------- components/kserve/src/kservedeployer.py | 52 ++++++++++++++++++++----- 3 files changed, 69 insertions(+), 27 deletions(-) diff --git a/components/kserve/README.md b/components/kserve/README.md index 66f0e59b9f..c6a42842ef 100644 --- a/components/kserve/README.md +++ b/components/kserve/README.md @@ -39,6 +39,9 @@ kserve_op = components.load_component_from_url('https://raw.githubusercontent.co | canary_traffic_percent | `100` | The traffic split percentage between the candidate model and the last ready model | | namespace | | Kubernetes namespace where the KServe service is deployed. If no namespace is provided, `anonymous` will be used unless a namespace is provided in the `inferenceservice_yaml` argument. | | framework | | Machine learning framework for model serving. Currently the supported frameworks are `tensorflow`, `pytorch`, `sklearn`, `xgboost`, `onnx`, `triton`, `pmml`, and `lightgbm`. | +| runtime_version | `latest` | Runtime Version of Machine Learning Framework | +| resource_requests | `{"cpu": "0.5", "memory": "512Mi"}` | CPU and Memory requests for Model Serving | +| resource_limits | `{"cpu": "1", "memory": "1Gi"}` | CPU and Memory limits for Model Serving | | custom_model_spec | `{}` | Custom model runtime container spec in JSON. Sample spec: `{"image": "codait/max-object-detector", "port":5000, "name": "test-container"}` | | inferenceservice_yaml | `{}` | Raw InferenceService serialized YAML for deployment. Use this if you need additional configurations for your InferenceService. | | autoscaling_target | `0` | Autoscaling Target Number. If not 0, sets the following annotation on the InferenceService: `autoscaling.knative.dev/target` | @@ -185,4 +188,3 @@ kserve_op( inferenceservice_yaml=isvc_yaml ) ``` - diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 4bdcaac7b5..9d7b97e3e2 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -1,25 +1,28 @@ name: Serve a model with KServe description: Serve Models using KServe inputs: - - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} - - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} - - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} - - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} - - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} - - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} - - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} - - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} - - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} - - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} - - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} - - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} - - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} - - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} - - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} - - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} + - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} + - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} + - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} + - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} + - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} + - {name: Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} + - {name: Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for Model Serving'} + - {name: Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for Model Serving'} + - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} + - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} + - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} + - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} + - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} + - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} + - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} + - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} + - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} + - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} outputs: - - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} + - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} implementation: container: image: quay.io/aipipeline/kserve-component:v0.11.1 @@ -32,6 +35,9 @@ implementation: --canary-traffic-percent, {inputValue: Canary Traffic Percent}, --namespace, {inputValue: Namespace}, --framework, {inputValue: Framework}, + --runtime-version, {inputValue: Runtime Version}, + --resource-requests, {inputValue: Resource Requests}, + --resource-limits, {inputValue: Resource Limits}, --custom-model-spec, {inputValue: Custom Model Spec}, --autoscaling-target, {inputValue: Autoscaling Target}, --service-account, {inputValue: Service Account}, diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index db84e41727..c8799332f7 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -21,6 +21,7 @@ import yaml from kubernetes import client +from kubernetes.client.models import V1ResourceRequirements from kserve import constants from kserve import KServeClient @@ -50,8 +51,9 @@ } -def create_predictor_spec(framework, storage_uri, canary_traffic_percent, - service_account, min_replicas, max_replicas, containers, request_timeout): +def create_predictor_spec(framework, runtime_version, resource_requests, resource_limits, + storage_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout): """ Create and return V1beta1PredictorSpec to be used in a V1beta1InferenceServiceSpec object. @@ -81,7 +83,14 @@ def create_predictor_spec(framework, storage_uri, canary_traffic_percent, setattr( predictor_spec, framework, - AVAILABLE_FRAMEWORKS[framework](storage_uri=storage_uri) + AVAILABLE_FRAMEWORKS[framework]( + storage_uri=storage_uri, + resources=V1ResourceRequirements( + requests=resource_requests, + limits=resource_limits + ), + runtime_version=runtime_version + ) ) return predictor_spec @@ -178,10 +187,10 @@ def submit_api_request(kserve_client, action, name, isvc, namespace=None, return outputs -def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, - framework, custom_model_spec, service_account, inferenceservice_yaml, - request_timeout, autoscaling_target=0, enable_istio_sidecar=True, - watch_timeout=300, min_replicas=0, max_replicas=0): +def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, + runtime_version, resource_requests, resource_limits, custom_model_spec, + service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, + enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the @@ -224,8 +233,9 @@ def perform_action(action, model_name, model_uri, canary_traffic_percent, namesp # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec( - framework, model_uri, canary_traffic_percent, service_account, - min_replicas, max_replicas, containers, request_timeout + framework, runtime_version, resource_requests, resource_limits, + model_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout ) isvc = create_inference_service(metadata, predictor_spec) @@ -287,6 +297,24 @@ def main(): str(list(AVAILABLE_FRAMEWORKS.keys())), default="" ) + parser.add_argument( + "--runtime-version", + type=str, + help="Runtime Version of Machine Learning Framework", + default="latest" + ) + parser.add_argument( + "--resource-requests", + type=json.loads, + help="CPU and Memory requests for Model Serving", + default='{"cpu": "0.5", "memory": "512Mi"}', + ) + parser.add_argument( + "--resource-limits", + type=json.loads, + help="CPU and Memory limits for Model Serving", + default='{"cpu": "1", "memory": "1Gi"}', + ) parser.add_argument( "--custom-model-spec", type=json.loads, @@ -342,6 +370,9 @@ def main(): canary_traffic_percent = int(args.canary_traffic_percent) namespace = args.namespace framework = args.framework.lower() + runtime_version = args.runtime_version.lower() + resource_requests = args.resource_requests + resource_limits = args.resource_limits output_path = args.output_path custom_model_spec = args.custom_model_spec autoscaling_target = int(args.autoscaling_target) @@ -381,6 +412,9 @@ def main(): canary_traffic_percent=canary_traffic_percent, namespace=namespace, framework=framework, + runtime_version=runtime_version, + resource_requests=resource_requests, + resource_limits=resource_limits, custom_model_spec=custom_model_spec, autoscaling_target=autoscaling_target, service_account=service_account, From 21079b5910e597a38b67853f3ecfb3929344371e Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 23 Oct 2023 17:32:20 -0700 Subject: [PATCH 237/253] feat(components): [text2sql] Implement preprocess component logic PiperOrigin-RevId: 575976269 --- .../model_evaluation/text2sql_preprocess/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py index 4f9aa155d3..583da4c23b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_preprocess/component.py @@ -24,7 +24,7 @@ @container_component def text2sql_evaluation_preprocess( gcp_resources: OutputPath(str), - model_inference_input_path: OutputPath(str), + model_inference_input_path: OutputPath(list), project: str, location: str, evaluation_data_source_path: str, @@ -72,7 +72,7 @@ def text2sql_evaluation_preprocess( Returns: gcp_resources (str): Serialized gcp_resources proto tracking the custom job. - model_inference_input_path (str): + model_inference_input_path (list): The GCS path to save preprocessed data to run batch prediction to get table names. """ From ebb42450d0b07eaa8de35a3f6b70eacb5f26f0d8 Mon Sep 17 00:00:00 2001 From: Googler Date: Mon, 23 Oct 2023 17:41:48 -0700 Subject: [PATCH 238/253] feat(components): [text2sql] Generate table names by model batch prediction PiperOrigin-RevId: 575978329 --- .../evaluation_llm_text2sql_pipeline.py | 40 +++++++++++++++++-- .../component.py | 17 +++++--- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index f9e59493b4..e106efa698 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -12,13 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. """Text2SQL evaluation pipeline.""" +from typing import Dict from google_cloud_pipeline_components import _placeholders from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_evaluation.component import text2sql_evaluation as Text2SQLEvaluationOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_preprocess.component import text2sql_evaluation_preprocess as Text2SQLEvaluationPreprocessOp from google_cloud_pipeline_components._implementation.model_evaluation.text2sql_validate_and_process.component import text2sql_evaluation_validate_and_process as Text2SQLEvaluationValidateAndProcessOp from google_cloud_pipeline_components.types import artifact_types +from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp import kfp +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER _PIPELINE_NAME = 'evaluation_llm_text2sql_pipeline' @@ -34,6 +37,9 @@ def evaluation_llm_text2sql_pipeline( evaluation_method: str = 'parser', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, location: str = _placeholders.LOCATION_PLACEHOLDER, + model_parameters: Dict[str, str] = {}, + batch_predict_instances_format: str = 'jsonl', + batch_predict_predictions_format: str = 'jsonl', machine_type: str = 'e2-highmem-16', service_account: str = '', network: str = '', @@ -61,6 +67,16 @@ def evaluation_llm_text2sql_pipeline( Default value is the same project used to run the pipeline. location: Optional. The GCP region that runs the pipeline components. Default value is the same location used to run the pipeline. + model_parameters: Optional. The parameters that govern the predictions, e.g. + temperature, + batch_predict_instances_format: The format in which instances are given, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. + batch_predict_instances_format: The format in which perdictions are made, + must be one of the Model's supportedInputStorageFormats. If not set, + default to "jsonl". For more details about this input config, see + https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource @@ -85,7 +101,7 @@ def evaluation_llm_text2sql_pipeline( ) get_vertex_model_task.set_display_name('get-vertex-model') - _ = Text2SQLEvaluationPreprocessOp( + preprocess_task = Text2SQLEvaluationPreprocessOp( project=project, location=location, evaluation_data_source_path=evaluation_data_source_path, @@ -97,12 +113,28 @@ def evaluation_llm_text2sql_pipeline( encryption_spec_key_name=encryption_spec_key_name, ) + batch_predict_table_names_task = ModelBatchPredictOp( + job_display_name='text2sql-batch-predict-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + model=get_vertex_model_task.outputs['artifact'], + location=location, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_source_uris=preprocess_task.outputs['model_inference_input_path'], + model_parameters=model_parameters, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_table_names_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + project=project, + ) + _ = Text2SQLEvaluationValidateAndProcessOp( project=project, location=location, - # TODO(bozhengbz) Add value to model_inference_results_path - # when model batch prediction component is added. - model_inference_results_path='gs://test/model_inference_results.json', + model_inference_type='table_name_case', + model_inference_results_directory=batch_predict_table_names_task.outputs[ + 'gcs_output_directory' + ], tables_metadata_path=tables_metadata_path, prompt_template_path=prompt_template_path, machine_type=machine_type, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py index bc2deb06d4..3f1b097262 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_validate_and_process/component.py @@ -16,7 +16,9 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import Artifact from kfp.dsl import container_component +from kfp.dsl import Input from kfp.dsl import OutputPath from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER @@ -24,10 +26,11 @@ @container_component def text2sql_evaluation_validate_and_process( gcp_resources: OutputPath(str), - model_inference_input_path: OutputPath(str), + model_inference_input_path: OutputPath(list), project: str, location: str, - model_inference_results_path: str, + model_inference_type: str, + model_inference_results_directory: Input[Artifact], tables_metadata_path: str, prompt_template_path: str = '', display_name: str = 'text2sql-evaluation-validate-and-process', @@ -41,8 +44,11 @@ def text2sql_evaluation_validate_and_process( Args: project: Required. The GCP project that runs the pipeline component. location: Required. The GCP region that runs the pipeline component. - model_inference_results_path: Required. The path for json file containing - text2sql model inference results from the last step. + model_inference_type: Required. Model inference type to differentiate + model inference results validataion steps, values can be table_name_case + or column_name_case. + model_inference_results_directory: Required. The directory to store all of + files containing text2sql model inference results from the last step. tables_metadata_path: Required. The path for json file containing database metadata, including table names, schema fields. prompt_template_path: Required. The path for json file containing prompt @@ -86,7 +92,8 @@ def text2sql_evaluation_validate_and_process( f'--text2sql_validate_and_process={True}', f'--project={project}', f'--location={location}', - f'--model_inference_results_path={model_inference_results_path}', + f'--model_inference_type={model_inference_type}', + f'--model_inference_results_directory={model_inference_results_directory.path}', f'--tables_metadata_path={tables_metadata_path}', f'--prompt_template_path={prompt_template_path}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', From c0ef67cb8602dd1b9bb80721fe910e9019a399b4 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 09:25:54 -0700 Subject: [PATCH 239/253] chore(components): update GCPC docstrings PiperOrigin-RevId: 576164819 --- .../preview/custom_job/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py index 9651cc8467..93bc3221a3 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/custom_job/utils.py @@ -73,27 +73,27 @@ def create_custom_training_job_from_component( # fmt: off """Convert a KFP component into Vertex AI [custom training job](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) using the [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.customJobs) API. - This utility converts a [KFP component ](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy ](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs ](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. + This utility converts a [KFP component](https://www.kubeflow.org/docs/components/pipelines/v2/components/) provided to `component_spec` into `CustomTrainingJobOp` component. Your components inputs, outputs, and logic are carried over, with additional [CustomJob](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec) parameters exposed. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for uses cases such as executing a training component over multiple replicas with [MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) or [MirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). See [Create custom training jobs](https://cloud.google.com/vertex-ai/docs/training/create-custom-job) for more information. Args: component_spec: A KFP component. display_name: The name of the CustomJob. If not provided the component's name will be used instead. - replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information. ](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) - machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information ](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). - accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). + replica_count: The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. See [more information.](https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job) + machine_type: The type of the machine to run the CustomJob. The default value is "n1-standard-4". See [more information](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). + accelerator_type: The type of accelerator(s) that may be attached to the machine per `accelerator_count`. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype). accelerator_count: The number of accelerators to attach to the machine. Defaults to 1 if `accelerator_type` is set. boot_disk_type: Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_type is set as a static value and cannot be changed as a pipeline parameter. boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). `boot_disk_size_gb` is set as a static value and cannot be changed as a pipeline parameter. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". restart_job_on_worker_restart: Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. - service_account: Sets the default service account for workload run-as account. The [service account ](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. + service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network: The full name of the Compute Engine network to which the job should be peered. For example, `projects/12345/global/networks/myVPC`. Format is of the form `projects/{project}/global/networks/{network}`. Where `{project}` is a project number, as in `12345`, and `{network}` is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. tensorboard: The name of a Vertex AI TensorBoard resource to which this CustomJob will upload TensorBoard logs. - enable_web_access: Whether you want Vertex AI to enable [interactive shell access ](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. + enable_web_access: Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If `True`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. reserved_ip_ranges: A list of names for the reserved IP ranges under the VPC network that can be used for this job. If set, we will deploy the job within the provided IP ranges. Otherwise, the job will be deployed to any IP ranges under the provided VPC network. - nfs_mounts: A list of [NfsMount ](https://cloud.devsite.corp.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training ](https://cloud.devsite.corp.google.com/vertex-ai/docs/training/train-nfs-share). - base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information ](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). + nfs_mounts: A list of [NfsMount](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/CustomJobSpec#NfsMount) resource specs in Json dict format. For more details about mounting NFS for CustomJob, see [Mount an NFS share for custom training](https://cloud.google.com/vertex-ai/docs/training/train-nfs-share). + base_output_directory: The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. See [more information](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination). labels: The labels with user-defined metadata to organize the CustomJob. See [more information](https://goo.gl/xmQnxf). persistent_resource_id: The ID of the PersistentResource in the same Project and Location which to run. If this is specified, the job will be run on existing machines held by the PersistentResource instead of on-demand short-live machines. The network and CMEK configs on the job should be consistent with those on the PersistentResource, otherwise, the job will be rejected. (This is a Preview feature not yet recommended for production workloads.) From 04aac259a4d860eab1195654c02906a20643e6f0 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 11:49:48 -0700 Subject: [PATCH 240/253] chore(components): add GCPC Python 3.7 EOL warning PiperOrigin-RevId: 576214273 --- .../google_cloud_pipeline_components/__init__.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/__init__.py b/components/google-cloud/google_cloud_pipeline_components/__init__.py index 8489662cf6..42ec791d4b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/__init__.py @@ -12,4 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. """Google Cloud Pipeline Components.""" -from google_cloud_pipeline_components.version import __version__ +import sys +import warnings + +if sys.version_info < (3, 8): + warnings.warn( + ( + 'Python 3.7 has reached end-of-life. Google Cloud Pipeline Components' + ' will drop support for Python 3.7 on April 23, 2024. To use new' + ' versions of the KFP SDK after that date, you will need to upgrade' + ' to Python >= 3.8. See https://devguide.python.org/versions/ for' + ' more details.' + ), + FutureWarning, + stacklevel=2, + ) From c383eb64cd0819a412a4535a25e68b2467bc53f1 Mon Sep 17 00:00:00 2001 From: Changyu Zhu Date: Tue, 24 Oct 2023 13:57:24 -0700 Subject: [PATCH 241/253] chore(components): Update AutoML Vision data converter component PiperOrigin-RevId: 576269528 --- .../preview/automl/vision/data_converter.py | 47 +++---------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py index 6e6b108aa9..6ccc98dd40 100644 --- a/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py +++ b/components/google-cloud/google_cloud_pipeline_components/preview/automl/vision/data_converter.py @@ -27,7 +27,7 @@ def data_converter( input_file_path: str, input_file_type: str, objective: str, - output_dir: str, + output_dir: dsl.OutputPath(str), gcp_resources: dsl.OutputPath(str), location: str = 'us-central1', timeout: str = '604800s', @@ -36,10 +36,6 @@ def data_converter( output_shape: Optional[str] = None, split_ratio: Optional[str] = None, num_shard: Optional[str] = None, - output_fps: Optional[int] = None, - num_frames: Optional[int] = None, - min_duration_sec: Optional[float] = None, - pos_neg_ratio: Optional[float] = None, encryption_spec_key_name: str = '', project: str = _placeholders.PROJECT_ID_PLACEHOLDER, ): @@ -51,21 +47,17 @@ def data_converter( input_file_path: Input file path. Please refer to different input formats in Vertex AI Documentation. For example, [image classification prepare data](https://cloud.google.com/vertex-ai/docs/image-data/classification/prepare-data) page. input_file_type: 'csv', 'jsonl', or 'coco_json'. Must be one of the input file types supported by the objective. objective: One of 'icn', 'iod', 'isg', 'vcn', or 'var'. - output_dir: Cloud Storage directory for storing converted data and pipeline information. location: Location for creating the custom training job. If not set, default to us-central1. timeout: The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". service_account: Sets the default service account for workload run-as account. The [service account](https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) running the pipeline submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code [Service Agent ](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. machine_type: [Machine type](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types) for the CustomJob. If conversion failed, consider using a machine type with more RAM or splitting dataset into smaller pieces. - output_shape: Video only. Output shape (height,width) for video frames. + output_shape: Output shape (height,width) for images. split_ratio: Proportion of data to split into train/validation/test, separated by comma. num_shard: Number of train/validation/test shards, separated by comma. - output_fps: Video only. Output frames per second. - num_frames: VAR only. Number of frames inside a single video clip window. - min_duration_sec: VAR only. Minimum duration of a video clip annotation in seconds. - pos_neg_ratio: VAR only. Sampling ratio between positive and negative segments. encryption_spec_key_name: Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. project: Project to create the custom training job in. Defaults to the project in which the PipelineJob is run. Returns: + output_dir: Cloud Storage directory storing converted data and pipeline information. gcp_resources: Serialized JSON of `gcp_resources` [proto](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components/proto) which tracks the CustomJob. """ # fmt: on @@ -99,6 +91,7 @@ def data_converter( ' "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter",' ), '"args": [', + '"--enable_input_validation","true",', '"--input_file_path", "', input_file_path, '",', @@ -129,30 +122,6 @@ def data_converter( [',"--num_shard","', num_shard, '"'] ), ), - dsl.IfPresentPlaceholder( - input_name='output_fps', - then=dsl.ConcatPlaceholder( - [',"--output_fps","', output_fps, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='num_frames', - then=dsl.ConcatPlaceholder( - [',"--num_frames","', num_frames, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='min_duration_sec', - then=dsl.ConcatPlaceholder( - [',"--min_duration_sec","', min_duration_sec, '"'] - ), - ), - dsl.IfPresentPlaceholder( - input_name='pos_neg_ratio', - then=dsl.ConcatPlaceholder( - [',"--pos_neg_ratio","', pos_neg_ratio, '"'] - ), - ), ']}}],', '"scheduling": {', '"timeout": "', @@ -165,12 +134,8 @@ def data_converter( ['"service_account": "', service_account, '",'] ), ), - '"enable_web_access": false,', - '"base_output_directory": {', - '"output_uri_prefix": "', - output_dir, - '"', - '}},', + '"enable_web_access": false', + '},', '"encryption_spec": {', '"kms_key_name": "', encryption_spec_key_name, From e3b186379186d771316f39257e27d315c2c10a77 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Tue, 24 Oct 2023 17:06:08 -0500 Subject: [PATCH 242/253] chore(sdk): add Python 3.7 EOL notice (#10139) --- sdk/RELEASE.md | 1 + sdk/python/kfp/__init__.py | 11 ++++++ sdk/python/kfp/dsl/component_factory.py | 6 ++++ sdk/python/kfp/dsl/component_factory_test.py | 14 ++++++++ sdk/python/kfp/init_test.py | 36 ++++++++++++++++++++ 5 files changed, 68 insertions(+) create mode 100644 sdk/python/kfp/init_test.py diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 3e43e2633f..61dfc86d14 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -6,6 +6,7 @@ * Support collecting outputs from conditional branches using `dsl.OneOf` [\#10067](https://github.com/kubeflow/pipelines/pull/10067) ## Deprecations +* Add notice of Python 3.7 support removal on April 23, 2024 [\#10139](https://github.com/kubeflow/pipelines/pull/10139) ## Bug fixes and other changes * Fix type on `dsl.ParallelFor` sub-DAG output when a `dsl.Collected` is used. Non-functional fix. [\#10069](https://github.com/kubeflow/pipelines/pull/10069) diff --git a/sdk/python/kfp/__init__.py b/sdk/python/kfp/__init__.py index 74d0332f3b..eb1fce1d7e 100644 --- a/sdk/python/kfp/__init__.py +++ b/sdk/python/kfp/__init__.py @@ -18,6 +18,17 @@ __version__ = '2.3.0' +import sys +import warnings + +if sys.version_info < (3, 8): + warnings.warn( + ('Python 3.7 has reached end-of-life. KFP will drop support for Python 3.7 on April 23, 2024. To use new versions of the KFP SDK after that date, you will need to upgrade to Python >= 3.8. See https://devguide.python.org/versions/ for more details.' + ), + FutureWarning, + stacklevel=2, + ) + TYPE_CHECK = True import os diff --git a/sdk/python/kfp/dsl/component_factory.py b/sdk/python/kfp/dsl/component_factory.py index 29402dc131..5df3824e33 100644 --- a/sdk/python/kfp/dsl/component_factory.py +++ b/sdk/python/kfp/dsl/component_factory.py @@ -532,6 +532,12 @@ def create_component_from_func( args = [] if base_image is None: base_image = _DEFAULT_BASE_IMAGE + warnings.warn( + ("Python 3.7 has reached end-of-life. The default base_image used by the @dsl.component decorator will switch from 'python:3.7' to 'python:3.8' on April 23, 2024. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3.8." + ), + FutureWarning, + stacklevel=2, + ) component_image = base_image diff --git a/sdk/python/kfp/dsl/component_factory_test.py b/sdk/python/kfp/dsl/component_factory_test.py index 1b3f388e7f..0def6344d6 100644 --- a/sdk/python/kfp/dsl/component_factory_test.py +++ b/sdk/python/kfp/dsl/component_factory_test.py @@ -287,5 +287,19 @@ def comp(output_list: Output[List[Artifact]]): return dsl.ContainerSpec(image='alpine') +class TestPythonEOLWarning(unittest.TestCase): + + def test_default_base_image(self): + + with self.assertWarnsRegex( + FutureWarning, + r"Python 3\.7 has reached end-of-life\. The default base_image used by the @dsl\.component decorator will switch from 'python:3\.7' to 'python:3\.8' on April 23, 2024\. To ensure your existing components work with versions of the KFP SDK released after that date, you should provide an explicit base_image argument and ensure your component works as intended on Python 3\.8\." + ): + + @dsl.component + def foo(): + pass + + if __name__ == '__main__': unittest.main() diff --git a/sdk/python/kfp/init_test.py b/sdk/python/kfp/init_test.py new file mode 100644 index 0000000000..9e6a86598a --- /dev/null +++ b/sdk/python/kfp/init_test.py @@ -0,0 +1,36 @@ +# Copyright 2023 The Kubeflow Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import sys +import unittest +from unittest import mock + + +@mock.patch.object(sys, 'version_info', new=(3, 7, 12, 'final', 0)) +class TestPythonEOLWarning(unittest.TestCase): + + def test(self): + mod = importlib.import_module('kfp') + + with self.assertWarnsRegex( + FutureWarning, + r'Python 3\.7 has reached end-of-life\. KFP will drop support for Python 3\.7 on April 23, 2024\. To use new versions of the KFP SDK after that date, you will need to upgrade to Python >= 3\.8\. See https:\/\/devguide\.python\.org\/versions\/ for more details\.' + ): + # simulate first import from kfp + importlib.reload(mod) + + +if __name__ == '__main__': + unittest.main() From 0d7561199751e83b4d7e1603c3d32d4088a7e208 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 24 Oct 2023 15:17:52 -0700 Subject: [PATCH 243/253] feat(components): [endpoint_batch_predict] Initialize component PiperOrigin-RevId: 576300455 --- .../model_evaluation/__init__.py | 2 + .../endpoint_batch_predict/__init__.py | 14 ++ .../endpoint_batch_predict/component.py | 229 ++++++++++++++++++ 3 files changed, 245 insertions(+) create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py index e41a453603..07520b6f22 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/__init__.py @@ -18,6 +18,7 @@ from google_cloud_pipeline_components._implementation.model_evaluation.chunking.component import chunking as ChunkingOp from google_cloud_pipeline_components._implementation.model_evaluation.data_sampler.component import evaluation_data_sampler as EvaluationDataSamplerOp from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp +from google_cloud_pipeline_components._implementation.model_evaluation.endpoint_batch_predict.component import evaluation_llm_endpoint_batch_predict_pipeline_graph_component from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.evaluated_annotation.component import evaluated_annotation as EvaluatedAnnotationOp from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp @@ -41,6 +42,7 @@ 'evaluation_llm_safety_bias_pipeline', 'evaluation_llm_embedding_pipeline', 'evaluation_llm_text2sql_pipeline', + 'evaluation_llm_endpoint_batch_predict_pipeline_graph_component', 'ChunkingOp', 'EvaluationDataSamplerOp', 'EvaluationDatasetPreprocessorOp', diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py new file mode 100644 index 0000000000..7edc3ee88e --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Google Cloud Pipeline Evaluation Endpoint Batch Predict Component.""" diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py new file mode 100644 index 0000000000..acb8048b9c --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/endpoint_batch_predict/component.py @@ -0,0 +1,229 @@ +# Copyright 2023 The Kubeflow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Endpoint batch predict component used in KFP pipelines.""" + +from typing import Dict, NamedTuple, Optional, Union +from google_cloud_pipeline_components import utils as gcpc_utils +from google_cloud_pipeline_components._implementation.model_evaluation import utils +from kfp import dsl +from kfp.dsl import container_component +from kfp.dsl import OutputPath +from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER + +_IMAGE_URI = 'gcr.io/model-evaluation-dev/llm_eval:wjess-test' + + +@dsl.component +def add_json_escape_parameters(parameters: dict) -> str: + import json + + json_escaped_parameters = json.dumps(parameters).replace('"', '\\"') + return json_escaped_parameters + + +@container_component +def endpoint_batch_predict( + gcp_resources: OutputPath(str), + gcs_output_directory: OutputPath(str), + project: str, + location: str, + source_gcs_uri: str, + model_parameters: Optional[str] = None, + gcs_destination_output_uri_prefix: Optional[str] = '', + endpoint_id: Optional[str] = None, + publisher_model: Optional[str] = None, + qms_override: Optional[str] = None, + display_name: str = 'endpoint_batch_predict', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +): + """Returns the batch prediction results for a given batch of instances. + + Args: + project: Required. The GCP project that runs the pipeline component. + location: Required. The GCP region that runs the pipeline component. + source_gcs_uri: Google Cloud Storage URI to your instances to run + prediction on. The stored file format should be jsonl and each line + contains one Prediction instance. Instance should match Deployed model's + instance schema + gcs_destination_output_uri_prefix: The Google Cloud Storage location of + the directory where the output is to be written to. In the given + directory a new directory is created. Its name is + `prediction-model-`, where timestamp is in + YYYY-MM-DD-hh:mm:ss.sss format. Inside of it is file results.jsonl + endpoint_id: Required if no publisher_model is provided. The Endpoint ID + of the deployed the LLM to serve the prediction. When endpoint_id and + publisher_model are both provided, publisher_model will be used. + model_parameters: The parameters that govern the prediction. + publisher_model: Required if no endpoint_id is provided. Name of the + Publisher model. + location: Project the LLM Model is in. + qms_override: Manual control of a large language model's qms. Write up + when there's an approved quota increase for a LLM. Write down when + limiting qms of a LLM for this pipeline. Should be provided as a + dictionary, for example {'text-bison': 20}. For deployed model which + doesn't have google-vertex-llm-tuning-base-model-id label, override the + default here. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted + to `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Format is of the form projects/{project}/global/networks/{network}. + Where {project} is a project number, as in 12345, and {network} is a + network name. Private services access must already be configured for the + network. If left unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + gcp_resources (str): + Serialized gcp_resources proto tracking the custom job. + gcs_output_directory (str): + GCS directory where endpoint batch prediction results are stored. + """ + return gcpc_utils.build_serverless_customjob_container_spec( + project=project, + location=location, + custom_job_payload=utils.build_custom_job_payload( + display_name=display_name, + machine_type=machine_type, + image_uri=_IMAGE_URI, + args=[ + f'--endpoint_batch_predict={True}', + f'--project={project}', + f'--location={location}', + f'--source_gcs_uri={source_gcs_uri}', + f'--model_parameters={model_parameters}', + f'--gcs_destination_output_uri_prefix={gcs_destination_output_uri_prefix}', + f'--endpoint_id={endpoint_id}', + f'--publisher_model={publisher_model}', + f'--qms_override={qms_override}', + f'--gcs_output_directory={gcs_output_directory}', + f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', + f'--gcp_resources={gcp_resources}', + '--executor_input={{$.json_escape[1]}}', + ], + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ), + gcp_resources=gcp_resources, + ) + + +@dsl.pipeline(name='EvaludationLLMEndpointBatchPredictOp') +def evaluation_llm_endpoint_batch_predict_pipeline_graph_component( + project: str, + location: str, + source_gcs_uri: str, + model_parameters: Optional[Dict[str, Union[int, float]]] = {}, + gcs_destination_output_uri_prefix: Optional[str] = '', + endpoint_id: Optional[str] = None, + publisher_model: Optional[str] = None, + qms_override: Optional[str] = None, + display_name: str = 'endpoint_batch_predict', + machine_type: str = 'e2-highmem-16', + service_account: str = '', + network: str = '', + encryption_spec_key_name: str = '', +) -> NamedTuple('outputs', gcs_output_directory=str): + """The LLM Evaluation Text2SQL Pipeline. + + Args: + project: Required. The GCP project that runs the pipeline components. + location: Required. The GCP region that runs the pipeline components. + source_gcs_uri: Google Cloud Storage URI to your instances to run prediction + on. The stored file format should be jsonl and each line contains one + Prediction instance. Instance should match Deployed model's instance + schema + gcs_destination_output_uri_prefix: The Google Cloud Storage location of the + directory where the output is to be written to. In the given directory a + new directory is created. Its name is + `prediction-model-`, where timestamp is in + YYYY-MM-DD-hh:mm:ss.sss format. Inside of it is file results.jsonl + endpoint_id: Required if no publisher_model is provided. The Endpoint ID of + the deployed the LLM to serve the prediction. When endpoint_id and + publisher_model are both provided, publisher_model will be used. + model_parameters: The parameters that govern the prediction. + publisher_model: Required if no endpoint_id is provided. Name of the + Publisher model. + location: Project the LLM Model is in. + qms_override: Manual control of a large language model's qms. Write up when + there's an approved quota increase for a LLM. Write down when limiting qms + of a LLM for this pipeline. Should be provided as a dictionary, for + example {'text-bison': 20}. For deployed model which doesn't have + google-vertex-llm-tuning-base-model-id label, override the default here. + display_name: The name of the Evaluation job. + machine_type: The machine type of this custom job. If not set, defaulted to + `e2-highmem-16`. More details: + https://cloud.google.com/compute/docs/machine-resource + service_account: Sets the default service account for workload run-as + account. The service account running the pipeline + (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) + submitting jobs must have act-as permission on this run-as account. If + unspecified, the Vertex AI Custom Code Service + Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) + for the CustomJob's project. + network: The full name of the Compute Engine network to which the job should + be peered. For example, projects/12345/global/networks/myVPC. Format is of + the form projects/{project}/global/networks/{network}. Where {project} is + a project number, as in 12345, and {network} is a network name. Private + services access must already be configured for the network. If left + unspecified, the job is not peered with any network. + encryption_spec_key_name: Customer-managed encryption key options for the + CustomJob. If this is set, then all resources created by the CustomJob + will be encrypted with the provided encryption key. + + Returns: + NamedTuple: + gcs_output_directory (str): + GCS directory where endpoint batch prediction results are stored. + """ + outputs = NamedTuple('outputs', gcs_output_directory=str) + + endpoint_batch_predict_task = endpoint_batch_predict( + project=project, + location=location, + source_gcs_uri=source_gcs_uri, + model_parameters=add_json_escape_parameters( + parameters=model_parameters + ).output, + gcs_destination_output_uri_prefix=gcs_destination_output_uri_prefix, + endpoint_id=endpoint_id, + publisher_model=publisher_model, + qms_override=qms_override, + display_name=display_name, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + + return outputs( + gcs_output_directory=endpoint_batch_predict_task.outputs[ + 'gcs_output_directory' + ] + ) From 570e56dd09af32e173cf041eed7497e4533ec186 Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 25 Oct 2023 10:42:41 -0700 Subject: [PATCH 244/253] fix(components): [text2sql] Turn model_inference_results_path to model_inference_results_directory and remove duplicate comment PiperOrigin-RevId: 576576299 --- .../text2sql/evaluation_llm_text2sql_pipeline.py | 12 +++++------- .../text2sql_evaluation/component.py | 10 ++++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index e106efa698..6f0af29e52 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -73,10 +73,6 @@ def evaluation_llm_text2sql_pipeline( must be one of the Model's supportedInputStorageFormats. If not set, default to "jsonl". For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. - batch_predict_instances_format: The format in which perdictions are made, - must be one of the Model's supportedInputStorageFormats. If not set, - default to "jsonl". For more details about this input config, see - https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig. machine_type: The machine type of this custom job. If not set, defaulted to `e2-highmem-16`. More details: https://cloud.google.com/compute/docs/machine-resource @@ -148,9 +144,11 @@ def evaluation_llm_text2sql_pipeline( location=location, sql_dialect=sql_dialect, evaluation_method=evaluation_method, - # TODO(bozhengbz) Add value to model_inference_results_path - # when model batch prediction component is added. - model_inference_results_path='gs://test/model_inference_results.json', + # TODO(bozhengbz) Change value to model_inference_results_directory + # when sql query model batch prediction component is added. + model_inference_results_directory=batch_predict_table_names_task.outputs[ + 'gcs_output_directory' + ], tables_metadata_path=tables_metadata_path, machine_type=machine_type, service_account=service_account, diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py index 063172067a..a084de02d4 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql_evaluation/component.py @@ -16,7 +16,9 @@ from google_cloud_pipeline_components import utils as gcpc_utils from google_cloud_pipeline_components._implementation.model_evaluation import utils from google_cloud_pipeline_components._implementation.model_evaluation import version +from kfp.dsl import Artifact from kfp.dsl import container_component +from kfp.dsl import Input from kfp.dsl import Metrics from kfp.dsl import Output from kfp.dsl import OutputPath @@ -33,7 +35,7 @@ def text2sql_evaluation( location: str, sql_dialect: str, evaluation_method: str, - model_inference_results_path: str, + model_inference_results_directory: Input[Artifact], tables_metadata_path: str, display_name: str = 'text2sql-evaluation', machine_type: str = 'e2-highmem-16', @@ -49,8 +51,8 @@ def text2sql_evaluation( sql_dialect: Required. SQL dialect type, e.g. bigquery, mysql, etc. evaluation_method: Required. Text2SQL evaluation method, value can be 'parser', 'execution', 'all'. - model_inference_results_path: Required. The path for json file containing - text2sql model inference results from the last step. + model_inference_results_directory: Required. The path for json file + containing text2sql model inference results from the last step. tables_metadata_path: Required. The path for json file containing database metadata, including table names, schema fields. display_name: The name of the Evaluation job. @@ -98,7 +100,7 @@ def text2sql_evaluation( f'--location={location}', f'--sql_dialect={sql_dialect}', f'--evaluation_method={evaluation_method}', - f'--model_inference_results_path={model_inference_results_path}', + f'--model_inference_results_directory={model_inference_results_directory.path}', f'--tables_metadata_path={tables_metadata_path}', f'--root_dir={PIPELINE_ROOT_PLACEHOLDER}', f'--gcp_resources={gcp_resources}', From c83329f69c57cf7ecd03703e192878522c4d23fa Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 25 Oct 2023 11:18:57 -0700 Subject: [PATCH 245/253] chore(components): fix GCPC markdown docstrings rendering PiperOrigin-RevId: 576588522 --- .../v1/automl/training_job/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py index 1f5612bcc4..fa2f7099f1 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/automl/training_job/__init__.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex- - -ai/docs/beginner/beginners-guide) for image, text, video, and forecasting. -""" - +# fmt: off +"""Create [Vertex AI AutoML training jobs](https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide) for image, text, video, and forecasting.""" +# fmt: on from google_cloud_pipeline_components.v1.automl.training_job.automl_forecasting_training_job.component import automl_forecasting_training_job as AutoMLForecastingTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_image_training_job.component import automl_image_training_job as AutoMLImageTrainingJobRunOp from google_cloud_pipeline_components.v1.automl.training_job.automl_tabular_training_job.component import automl_tabular_training_job as AutoMLTabularTrainingJobRunOp From 7ab05d8a84fd295bb1b37285f831bda5bbf55cd2 Mon Sep 17 00:00:00 2001 From: rickyxie0929 <148598858+rickyxie0929@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:29:09 -0700 Subject: [PATCH 246/253] chore(sdk): Remove the ` ()`from docstring args. (#10159) * chore(sdk): Remove the ` ()`from docstring args. Remote the ` ()` from the docstring args, which are redundant since there are type annotations. * Trim the unnecessary leading space. --- sdk/python/kfp/compiler/read_write_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/python/kfp/compiler/read_write_test.py b/sdk/python/kfp/compiler/read_write_test.py index 7f33d73394..9be9d4ca2c 100644 --- a/sdk/python/kfp/compiler/read_write_test.py +++ b/sdk/python/kfp/compiler/read_write_test.py @@ -175,12 +175,12 @@ def test( """Tests serialization and deserialization consistency and correctness. Args: - name (str): '{test_group_name}-{test_case_name}'. Useful for print statements/debugging. - test_case (str): Test case name (without file extension). - test_data_dir (str): The directory containing the test case files. - function (str, optional): The function name to compile. - read (bool): Whether the pipeline/component supports deserialization from YAML (IR, except for V1 component YAML back compatability tests). - write (bool): Whether the pipeline/component supports compilation from a Python file. + name: '{test_group_name}-{test_case_name}'. Useful for print statements/debugging. + test_case: Test case name (without file extension). + test_data_dir: The directory containing the test case files. + function: The function name to compile. + read: Whether the pipeline/component supports deserialization from YAML (IR, except for V1 component YAML back compatability tests). + write: Whether the pipeline/component supports compilation from a Python file. """ yaml_file = os.path.join(test_data_dir, f'{test_case}.yaml') py_file = os.path.join(test_data_dir, f'{test_case}.py') From 2882fcf025dd1dae0a5fdd3ba02965ad34d2f326 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 25 Oct 2023 20:26:09 -0500 Subject: [PATCH 247/253] chore(sdk): add pytest.ini file (#10160) * add pytest ini file * Update pytest.ini --- pytest.ini | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..a079fdd1c7 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +addopts = --ignore=sdk/python/kfp/deprecated --ignore=sdk/python/kfp/tests +testpaths = sdk/python/kfp From 02e00e8439e9753dbf82856ac9c5a7cec8ce3243 Mon Sep 17 00:00:00 2001 From: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com> Date: Wed, 25 Oct 2023 18:39:10 -0700 Subject: [PATCH 248/253] fix(sdk): type annotation for client credentials (#10158) --- sdk/python/kfp/client/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/python/kfp/client/client.py b/sdk/python/kfp/client/client.py index 448433ed9d..bdf9cbdf20 100644 --- a/sdk/python/kfp/client/client.py +++ b/sdk/python/kfp/client/client.py @@ -32,6 +32,7 @@ from kfp import compiler from kfp.client import auth from kfp.client import set_volume_credentials +from kfp.client.token_credentials_base import TokenCredentialsBase from kfp.dsl import base_component from kfp.pipeline_spec import pipeline_spec_pb2 import kfp_server_api @@ -150,7 +151,7 @@ def __init__( proxy: Optional[str] = None, ssl_ca_cert: Optional[str] = None, kube_context: Optional[str] = None, - credentials: Optional[str] = None, + credentials: Optional[TokenCredentialsBase] = None, ui_host: Optional[str] = None, verify_ssl: Optional[bool] = None, ) -> None: @@ -221,7 +222,7 @@ def _load_config( proxy: Optional[str], ssl_ca_cert: Optional[str], kube_context: Optional[str], - credentials: Optional[str], + credentials: Optional[TokenCredentialsBase], verify_ssl: Optional[bool], ) -> kfp_server_api.Configuration: config = kfp_server_api.Configuration() From 03df9df68c9def59813075dacfa2328d92d008e5 Mon Sep 17 00:00:00 2001 From: Connor McCarthy Date: Wed, 25 Oct 2023 20:39:16 -0500 Subject: [PATCH 249/253] chore(sdk): fix local test failure (#10161) --- sdk/python/kfp/cli/component_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/python/kfp/cli/component_test.py b/sdk/python/kfp/cli/component_test.py index 4d256afaf2..a818aafb80 100644 --- a/sdk/python/kfp/cli/component_test.py +++ b/sdk/python/kfp/cli/component_test.py @@ -86,6 +86,7 @@ def setUp(self) -> None: }] self._docker_client.images.push.return_value = [{'status': 'Pushed'}] self.addCleanup(patcher.stop) + self.current_dir = os.path.dirname(os.path.abspath(__file__)) with contextlib.ExitStack() as stack: stack.enter_context(self.runner.isolated_filesystem()) @@ -579,8 +580,7 @@ def test_dockerfile_can_contain_custom_kfp_package(self): component = _make_component( func_name='train', target_image='custom-image') _write_components('components.py', component) - current_dir = os.path.dirname(os.path.abspath(__file__)) - package_dir = os.path.dirname(os.path.dirname(current_dir)) + package_dir = os.path.dirname(os.path.dirname(self.current_dir)) # suppresses large stdout from subprocess that builds kfp package with mock.patch.object( From 52f5cf51c4a6c233aae57125561c0fc95c4fd20f Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 26 Oct 2023 09:16:09 -0700 Subject: [PATCH 250/253] feat(backend): Support consuming parent DAG input artifact (#10162) --- backend/src/v2/driver/driver.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/backend/src/v2/driver/driver.go b/backend/src/v2/driver/driver.go index eda53baad5..d227855ca3 100644 --- a/backend/src/v2/driver/driver.go +++ b/backend/src/v2/driver/driver.go @@ -768,7 +768,11 @@ func resolveInputs(ctx context.Context, dag *metadata.DAG, iterationIndex *int, if err != nil { return nil, err } - glog.Infof("parent DAG input parameters %+v", inputParams) + inputArtifacts, err := mlmd.GetInputArtifactsByExecutionID(ctx, dag.Execution.GetID()) + if err != nil { + return nil, err + } + glog.Infof("parent DAG input parameters: %+v, artifacts: %+v", inputParams, inputArtifacts) inputs = &pipelinespec.ExecutorInput_Inputs{ ParameterValues: make(map[string]*structpb.Value), Artifacts: make(map[string]*pipelinespec.ArtifactList), @@ -998,7 +1002,15 @@ func resolveInputs(ctx context.Context, dag *metadata.DAG, iterationIndex *int, } switch t := artifactSpec.Kind.(type) { case *pipelinespec.TaskInputsSpec_InputArtifactSpec_ComponentInputArtifact: - return nil, artifactError(fmt.Errorf("component input artifact not implemented yet")) + inputArtifactName := artifactSpec.GetComponentInputArtifact() + if inputArtifactName == "" { + return nil, artifactError(fmt.Errorf("component input artifact key is empty")) + } + v, ok := inputArtifacts[inputArtifactName] + if !ok { + return nil, artifactError(fmt.Errorf("parent DAG does not have input artifact %s", inputArtifactName)) + } + inputs.Artifacts[name] = v case *pipelinespec.TaskInputsSpec_InputArtifactSpec_TaskOutputArtifact: taskOutput := artifactSpec.GetTaskOutputArtifact() From 1bee8be071a91f44c0129837c381863327cb337d Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 26 Oct 2023 11:44:41 -0700 Subject: [PATCH 251/253] feat(components): [text2sql] Generate column names by model batch predict PiperOrigin-RevId: 576941675 --- .../evaluation_llm_text2sql_pipeline.py | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py index 6f0af29e52..f1f591d681 100644 --- a/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py +++ b/components/google-cloud/google_cloud_pipeline_components/_implementation/model_evaluation/text2sql/evaluation_llm_text2sql_pipeline.py @@ -110,7 +110,7 @@ def evaluation_llm_text2sql_pipeline( ) batch_predict_table_names_task = ModelBatchPredictOp( - job_display_name='text2sql-batch-predict-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + job_display_name='text2sql-batch-prediction-table-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', model=get_vertex_model_task.outputs['artifact'], location=location, instances_format=batch_predict_instances_format, @@ -124,7 +124,7 @@ def evaluation_llm_text2sql_pipeline( project=project, ) - _ = Text2SQLEvaluationValidateAndProcessOp( + validate_table_names_and_process_task = Text2SQLEvaluationValidateAndProcessOp( project=project, location=location, model_inference_type='table_name_case', @@ -139,6 +139,38 @@ def evaluation_llm_text2sql_pipeline( encryption_spec_key_name=encryption_spec_key_name, ) + batch_predict_column_names_task = ModelBatchPredictOp( + job_display_name='text2sql-batch-prediction-column-names-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}', + model=get_vertex_model_task.outputs['artifact'], + location=location, + instances_format=batch_predict_instances_format, + predictions_format=batch_predict_predictions_format, + gcs_source_uris=validate_table_names_and_process_task.outputs[ + 'model_inference_input_path' + ], + model_parameters=model_parameters, + gcs_destination_output_uri_prefix=( + f'{PIPELINE_ROOT_PLACEHOLDER}/batch_predict_column_names_output' + ), + encryption_spec_key_name=encryption_spec_key_name, + project=project, + ) + + _ = Text2SQLEvaluationValidateAndProcessOp( + project=project, + location=location, + model_inference_type='column_name_case', + model_inference_results_directory=batch_predict_column_names_task.outputs[ + 'gcs_output_directory' + ], + tables_metadata_path=tables_metadata_path, + prompt_template_path=prompt_template_path, + machine_type=machine_type, + service_account=service_account, + network=network, + encryption_spec_key_name=encryption_spec_key_name, + ) + _ = Text2SQLEvaluationOp( project=project, location=location, From c0093ecef6bc5f056efa135d019267327115d79d Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Thu, 26 Oct 2023 21:02:10 -0700 Subject: [PATCH 252/253] feat(backend): Update driver and launcher images (#10164) --- backend/src/v2/compiler/argocompiler/argo.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/v2/compiler/argocompiler/argo.go b/backend/src/v2/compiler/argocompiler/argo.go index dc9dcd6457..d7c488972a 100644 --- a/backend/src/v2/compiler/argocompiler/argo.go +++ b/backend/src/v2/compiler/argocompiler/argo.go @@ -116,8 +116,8 @@ func Compile(jobArg *pipelinespec.PipelineJob, kubernetesSpecArg *pipelinespec.S wf: wf, templates: make(map[string]*wfapi.Template), // TODO(chensun): release process and update the images. - driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:fa68f52639b4f4683c9f8f468502867c9663823af0fbcff1cbe7847d5374bf5c", - launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:6641bf94acaeec03ee7e231241800fce2f0ad92eee25371bd5248ca800a086d7", + driverImage: "gcr.io/ml-pipeline/kfp-driver@sha256:8e60086b04d92b657898a310ca9757631d58547e76bbbb8bfc376d654bef1707", + launcherImage: "gcr.io/ml-pipeline/kfp-launcher@sha256:50151a8615c8d6907aa627902dce50a2619fd231f25d1e5c2a72737a2ea4001e", job: job, spec: spec, executors: deploy.GetExecutors(), From 58ce09e07d031964905020c749e77bf0f37e83d4 Mon Sep 17 00:00:00 2001 From: Chen Sun Date: Fri, 27 Oct 2023 04:08:53 +0000 Subject: [PATCH 253/253] chore(release): bumped version to 2.0.3 --- CHANGELOG.md | 42 +++++++++++++++++++ VERSION | 2 +- .../api/v1beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v1beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../api/v2beta1/python_http_client/README.md | 4 +- .../kfp_server_api/__init__.py | 2 +- .../kfp_server_api/api_client.py | 2 +- .../kfp_server_api/configuration.py | 4 +- .../api/v2beta1/python_http_client/setup.py | 2 +- .../swagger/kfp_api_single_file.swagger.json | 2 +- .../templates/application.yaml | 2 +- manifests/gcp_marketplace/schema.yaml | 4 +- .../base/cache-deployer/kustomization.yaml | 2 +- .../kustomize/base/cache/kustomization.yaml | 2 +- .../generic/pipeline-install-config.yaml | 2 +- .../base/metadata/base/kustomization.yaml | 2 +- .../base/pipeline/kustomization.yaml | 12 +++--- .../metadata-writer/kustomization.yaml | 2 +- .../env/gcp/inverse-proxy/kustomization.yaml | 2 +- 23 files changed, 74 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db4d260733..c65dcbe724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +### [2.0.3](https://github.com/kubeflow/pipelines/compare/2.0.2...2.0.3) (2023-10-27) + + +### Features + +* **backend:** Support consuming parent DAG input artifact ([\#10162](https://github.com/kubeflow/pipelines/issues/10162)) ([52f5cf5](https://github.com/kubeflow/pipelines/commit/52f5cf51c4a6c233aae57125561c0fc95c4fd20f)) +* **backend:** Update driver and launcher images ([\#10164](https://github.com/kubeflow/pipelines/issues/10164)) ([c0093ec](https://github.com/kubeflow/pipelines/commit/c0093ecef6bc5f056efa135d019267327115d79d)) +* **components:** [endpoint_batch_predict] Initialize component ([0d75611](https://github.com/kubeflow/pipelines/commit/0d7561199751e83b4d7e1603c3d32d4088a7e208)) +* **components:** [text2sql] Generate column names by model batch predict ([1bee8be](https://github.com/kubeflow/pipelines/commit/1bee8be071a91f44c0129837c381863327cb337d)) +* **components:** [text2sql] Generate table names by model batch prediction ([ebb4245](https://github.com/kubeflow/pipelines/commit/ebb42450d0b07eaa8de35a3f6b70eacb5f26f0d8)) +* **components:** [text2sql] Implement preprocess component logic ([21079b5](https://github.com/kubeflow/pipelines/commit/21079b5910e597a38b67853f3ecfb3929344371e)) +* **components:** [text2sql] Initialize preprocess component and integrate with text2sql pipeline ([9aa750e](https://github.com/kubeflow/pipelines/commit/9aa750e62f6e225d037ecdda9bf7cab95f05675d)) +* **components:** [text2sql] Initialize evaluation component ([ea93979](https://github.com/kubeflow/pipelines/commit/ea93979eed02e131bd20180da149b9465670dfe1)) +* **components:** [text2sql] Initialize validate and process component ([633ddeb](https://github.com/kubeflow/pipelines/commit/633ddeb07e9212d2e373dba8d20a0f6d67ab037d)) +* **components:** Add ability to preprocess chat llama datasets to `_implementation.llm.chat_dataset_preprocessor` ([99fd201](https://github.com/kubeflow/pipelines/commit/99fd2017a76660f30d0a04b71542cbef45783633)) +* **components:** Add question_answer support for AutoSxS default instructions ([412216f](https://github.com/kubeflow/pipelines/commit/412216f832a848bfc61ce289aed819d7f2860fdd)) +* **components:** Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation feature attribution pipeline ([d8a0660](https://github.com/kubeflow/pipelines/commit/d8a0660df525f5695015e507e981bceff836dd3d)) +* **components:** Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline ([0487f9a](https://github.com/kubeflow/pipelines/commit/0487f9a8b1d8ab0d96d757bd4b598ffd353ecc81)) +* **components:** add support for customizing model_parameters in LLM eval text generation and LLM eval text classification pipelines ([d53ddda](https://github.com/kubeflow/pipelines/commit/d53dddab1c8a042e58e06ff6eb38be82fefddb0a)) +* **components:** Make `model_checkpoint` optional for `preview.llm.infer_pipeline` ([e8fb699](https://github.com/kubeflow/pipelines/commit/e8fb6990dfdf036c941c522f9b384ff679b38ca6)) +* **components:** migrate `DataflowFlexTemplateJobOp` to GA namespace (now `v1.dataflow.DataflowFlexTemplateJobOp`) ([faba922](https://github.com/kubeflow/pipelines/commit/faba9223ee846d459f7bb497a6faa3c153dcf430)) +* **components:** Set display names for SFT, RLHF and LLM inference pipelines ([1386a82](https://github.com/kubeflow/pipelines/commit/1386a826ba2bcdbc19eb2007ca43f6acd1031e4d)) +* **components:** Support service account in kubeflow model_batch_predict component ([1682ce8](https://github.com/kubeflow/pipelines/commit/1682ce8adeb2c55a155588eae7492b2f0a8b783a)) +* **components:** Update image tag used by llm pipelines ([4d71fda](https://github.com/kubeflow/pipelines/commit/4d71fdac3fc92dd4d54c6be3a28725667b8f3c5e)) +* **sdk:** support a Pythonic artifact authoring style ([\#9932](https://github.com/kubeflow/pipelines/issues/9932)) ([8d00d0e](https://github.com/kubeflow/pipelines/commit/8d00d0eb9a1442ed994b6a90acea88604efc6423)) +* **sdk:** support collecting outputs from conditional branches using `dsl.OneOf` ([\#10067](https://github.com/kubeflow/pipelines/issues/10067)) ([2d3171c](https://github.com/kubeflow/pipelines/commit/2d3171cbfec626055e59b8a58ce83fb54ecad113)) + + +### Bug Fixes + +* **components:** [text2sql] Turn model_inference_results_path to model_inference_results_directory and remove duplicate comment ([570e56d](https://github.com/kubeflow/pipelines/commit/570e56dd09af32e173cf041eed7497e4533ec186)) +* **frontend:** Replace twitter artifactory endpoint with npm endpoint. ([\#10099](https://github.com/kubeflow/pipelines/issues/10099)) ([da6a360](https://github.com/kubeflow/pipelines/commit/da6a3601468282c0592eae8e89a3d97b982e2d43)) +* **sdk:** fix bug when `dsl.importer` argument is provided by loop variable ([\#10116](https://github.com/kubeflow/pipelines/issues/10116)) ([73d51c8](https://github.com/kubeflow/pipelines/commit/73d51c8a23afad97efb6d7e7436c081fa22ce24d)) +* **sdk:** Fix OOB for IPython and refactor. Closes [\#10075](https://github.com/kubeflow/pipelines/issues/10075). ([\#10094](https://github.com/kubeflow/pipelines/issues/10094)) ([c903271](https://github.com/kubeflow/pipelines/commit/c9032716ab2013df56cb1078a703d48ed8e36fb4)) +* **sdk:** type annotation for client credentials ([\#10158](https://github.com/kubeflow/pipelines/issues/10158)) ([02e00e8](https://github.com/kubeflow/pipelines/commit/02e00e8439e9753dbf82856ac9c5a7cec8ce3243)) + + +### Other Pull Requests + +* feat(components) Extend kserve component ([\#10136](https://github.com/kubeflow/pipelines/issues/10136)) ([2054b7c](https://github.com/kubeflow/pipelines/commit/2054b7c45d4831c787115563c8be0048abcb9be1)) +* No public description ([0e240db](https://github.com/kubeflow/pipelines/commit/0e240db39799cb0afbd8c7f982ffdd4f9eb58121)) + ### [2.0.2](https://github.com/kubeflow/pipelines/compare/2.0.0...2.0.2) (2023-10-11) diff --git a/VERSION b/VERSION index f93ea0ca33..6acdb44289 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.2 \ No newline at end of file +2.0.3 \ No newline at end of file diff --git a/backend/api/v1beta1/python_http_client/README.md b/backend/api/v1beta1/python_http_client/README.md index 12742f284f..1de30811ee 100644 --- a/backend/api/v1beta1/python_http_client/README.md +++ b/backend/api/v1beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.2 -- Package version: 2.0.2 +- API version: 2.0.3 +- Package version: 2.0.3 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py index 8d3f7b1a35..86713cb581 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.2" +__version__ = "2.0.3" # import apis into sdk package from kfp_server_api.api.experiment_service_api import ExperimentServiceApi diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py index e5afaf6b98..8a2be9ffd9 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.2/python' + self.user_agent = 'OpenAPI-Generator/2.0.3/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py index 578dcda2dc..fb157f5025 100644 --- a/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v1beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.2\n"\ - "SDK Package Version: 2.0.2".\ + "Version of the API: 2.0.3\n"\ + "SDK Package Version: 2.0.3".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v1beta1/python_http_client/setup.py b/backend/api/v1beta1/python_http_client/setup.py index aa45f1e52d..9c9464f5ae 100644 --- a/backend/api/v1beta1/python_http_client/setup.py +++ b/backend/api/v1beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.2" +VERSION = "2.0.3" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json index 233d7a0e88..9414f4cdcd 100644 --- a/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v1beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.2", + "version": "2.0.3", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/backend/api/v2beta1/python_http_client/README.md b/backend/api/v2beta1/python_http_client/README.md index 7b2ec51e9e..be20533cf9 100644 --- a/backend/api/v2beta1/python_http_client/README.md +++ b/backend/api/v2beta1/python_http_client/README.md @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project: -- API version: 2.0.2 -- Package version: 2.0.2 +- API version: 2.0.3 +- Package version: 2.0.3 - Build package: org.openapitools.codegen.languages.PythonClientCodegen For more information, please visit [https://www.google.com](https://www.google.com) diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py index 3f33d9f4fa..f7a521107d 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/__init__.py @@ -14,7 +14,7 @@ from __future__ import absolute_import -__version__ = "2.0.2" +__version__ = "2.0.3" # import apis into sdk package from kfp_server_api.api.auth_service_api import AuthServiceApi diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py index e5afaf6b98..8a2be9ffd9 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/api_client.py @@ -78,7 +78,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None, self.default_headers[header_name] = header_value self.cookie = cookie # Set default User-Agent. - self.user_agent = 'OpenAPI-Generator/2.0.2/python' + self.user_agent = 'OpenAPI-Generator/2.0.3/python' self.client_side_validation = configuration.client_side_validation def __enter__(self): diff --git a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py index 578dcda2dc..fb157f5025 100644 --- a/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py +++ b/backend/api/v2beta1/python_http_client/kfp_server_api/configuration.py @@ -351,8 +351,8 @@ def to_debug_report(self): return "Python SDK Debug Report:\n"\ "OS: {env}\n"\ "Python Version: {pyversion}\n"\ - "Version of the API: 2.0.2\n"\ - "SDK Package Version: 2.0.2".\ + "Version of the API: 2.0.3\n"\ + "SDK Package Version: 2.0.3".\ format(env=sys.platform, pyversion=sys.version) def get_host_settings(self): diff --git a/backend/api/v2beta1/python_http_client/setup.py b/backend/api/v2beta1/python_http_client/setup.py index aa45f1e52d..9c9464f5ae 100644 --- a/backend/api/v2beta1/python_http_client/setup.py +++ b/backend/api/v2beta1/python_http_client/setup.py @@ -13,7 +13,7 @@ from setuptools import setup, find_packages # noqa: H301 NAME = "kfp-server-api" -VERSION = "2.0.2" +VERSION = "2.0.3" # To install the library, run the following # # python setup.py install diff --git a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json index 60d0004a14..2a63b01dd6 100644 --- a/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json +++ b/backend/api/v2beta1/swagger/kfp_api_single_file.swagger.json @@ -2,7 +2,7 @@ "swagger": "2.0", "info": { "title": "Kubeflow Pipelines API", - "version": "2.0.2", + "version": "2.0.3", "description": "This file contains REST API specification for Kubeflow Pipelines. The file is autogenerated from the swagger definition.", "contact": { "name": "google", diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml index 77728a31db..a563a4844f 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/application.yaml @@ -12,7 +12,7 @@ metadata: spec: descriptor: type: Kubeflow Pipelines - version: 2.0.2 + version: 2.0.3 description: |- Reusable end-to-end ML workflow maintainers: diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index bc2c3fda7f..fa50b0207d 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -1,9 +1,9 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: 2.0.2 + publishedVersion: 2.0.3 publishedVersionMetadata: - releaseNote: Based on 2.0.2 version. + releaseNote: Based on 2.0.3 version. releaseTypes: - Feature recommended: false diff --git a/manifests/kustomize/base/cache-deployer/kustomization.yaml b/manifests/kustomize/base/cache-deployer/kustomization.yaml index 1e82e5ef34..de44a30c52 100644 --- a/manifests/kustomize/base/cache-deployer/kustomization.yaml +++ b/manifests/kustomize/base/cache-deployer/kustomization.yaml @@ -8,4 +8,4 @@ commonLabels: app: cache-deployer images: - name: gcr.io/ml-pipeline/cache-deployer - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml index 2f2ca2f4b4..24fa04023a 100644 --- a/manifests/kustomize/base/cache/kustomization.yaml +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -10,4 +10,4 @@ commonLabels: app: cache-server images: - name: gcr.io/ml-pipeline/cache-server - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml index b8cfddd1ad..cd3b48e8ee 100644 --- a/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml +++ b/manifests/kustomize/base/installs/generic/pipeline-install-config.yaml @@ -11,7 +11,7 @@ data: until the changes take effect. A quick way to restart all deployments in a namespace: `kubectl rollout restart deployment -n `. appName: pipeline - appVersion: 2.0.2 + appVersion: 2.0.3 dbHost: mysql # relic to be removed after release dbPort: "3306" # relic to be removed after release dbType: mysql diff --git a/manifests/kustomize/base/metadata/base/kustomization.yaml b/manifests/kustomize/base/metadata/base/kustomization.yaml index b25f43b46a..1988d30e0d 100644 --- a/manifests/kustomize/base/metadata/base/kustomization.yaml +++ b/manifests/kustomize/base/metadata/base/kustomization.yaml @@ -9,4 +9,4 @@ resources: - metadata-grpc-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-envoy - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/pipeline/kustomization.yaml b/manifests/kustomize/base/pipeline/kustomization.yaml index 492b72b00d..90b2d713c9 100644 --- a/manifests/kustomize/base/pipeline/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/kustomization.yaml @@ -37,14 +37,14 @@ resources: - kfp-launcher-configmap.yaml images: - name: gcr.io/ml-pipeline/api-server - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/persistenceagent - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/scheduledworkflow - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/frontend - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/viewer-crd-controller - newTag: 2.0.2 + newTag: 2.0.3 - name: gcr.io/ml-pipeline/visualization-server - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml index f27ba77689..b503511088 100644 --- a/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml +++ b/manifests/kustomize/base/pipeline/metadata-writer/kustomization.yaml @@ -7,4 +7,4 @@ resources: - metadata-writer-sa.yaml images: - name: gcr.io/ml-pipeline/metadata-writer - newTag: 2.0.2 + newTag: 2.0.3 diff --git a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml index 064b195182..827f961978 100644 --- a/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml +++ b/manifests/kustomize/env/gcp/inverse-proxy/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: gcr.io/ml-pipeline/inverse-proxy-agent - newTag: 2.0.2 + newTag: 2.0.3 resources: - proxy-configmap.yaml - proxy-deployment.yaml